Skip to content

v1.2.11

Compare
Choose a tag to compare
@tugrul512bit tugrul512bit released this 02 Jun 21:33
· 130 commits to master since this release

added single gpu pipeline sub-feature

runs all stages in parallel in gpu, concurrently to host codes and also device to host - host to device transmissions are parallel to all stages kernels

            var deviceForCompute = ClPlatforms.all().gpus()[0];
            deviceForCompute.logInfo();
            DevicePipeline gpuPipeline = new DevicePipeline(deviceForCompute,@""+File.ReadAllText("..//..//..//test.cl") );
            //gpuPipeline.enableSerialMode();
            DevicePipelineStage stage1 = new DevicePipelineStage("resize", maxImgSizeResult * maxImgSizeResult, 256);
            DevicePipelineStage stage2 = new DevicePipelineStage("parameterSet", maxImgSizeResult * maxImgSizeResult, 256);
            DevicePipelineStage stage3 = new DevicePipelineStage("gaussianBlur", maxImgSizeResult * maxImgSizeResult, 256);
            DevicePipelineStage stage4 = new DevicePipelineStage("rotateImgRad", maxImgSizeResult * maxImgSizeResult, 256);
            DevicePipelineStage stage5 = new DevicePipelineStage("blendImg", maxImgSizeResult * maxImgSizeResult, 256);
            DevicePipelineStage stage6 = new DevicePipelineStage("postProcess", maxImgSizeResult * maxImgSizeResult, 256);

            ClArray<byte> stage1Input = imageBytes;
            ClArray<byte> stage5Input = imageBlendBytes;
            ClArray<int> parameters = new int[1024];
            ClArray<int> acculumulator = new int[1024];
            ClArray<int> parametersPipe = new int[1024];
            ClArray<int> parametersPipe2 = new int[1024];
            ClArray<int> parametersPipe3 = new int[1024];
            ClArray<int> parametersPipe4 = new int[1024];
            ClArray<int> parametersPipe5 = new int[1024];
            ClArray<int> parametersPipe6 = new int[1024];
            ClArray<byte> resultImage = resultImageBytes;
            ClArray<byte> pipeBuffer = new ClArray<byte>(maxImgSizeResult * maxImgSizeResult * 4);
            ClArray<byte> pipeBuffer2 = new ClArray<byte>(maxImgSizeResult * maxImgSizeResult * 4);
            ClArray<byte> pipeBuffer3 = new ClArray<byte>(maxImgSizeResult * maxImgSizeResult * 4);
            ClArray<byte> pipeBuffer4 = new ClArray<byte>(maxImgSizeResult * maxImgSizeResult * 4);
            ClArray<byte> pipeBuffer5 = new ClArray<byte>(maxImgSizeResult * maxImgSizeResult * 4);

            DevicePipelineArray bufInput  = new DevicePipelineArray(DevicePipelineArrayType.INPUT, stage1Input);
            DevicePipelineArray bufBlendInput  = new DevicePipelineArray(DevicePipelineArrayType.INPUT, stage5Input);
            var bufAccumulator = new DevicePipelineArray(DevicePipelineArrayType.INTERNAL , acculumulator);
            var bufPipe1 = new DevicePipelineArray(DevicePipelineArrayType.TRANSITION , pipeBuffer);
            var bufPipe2 = new DevicePipelineArray(DevicePipelineArrayType.TRANSITION , pipeBuffer2);
            var bufPipe3 = new DevicePipelineArray(DevicePipelineArrayType.TRANSITION , pipeBuffer3);
            var bufPipe4 = new DevicePipelineArray(DevicePipelineArrayType.TRANSITION , pipeBuffer4);
            var bufPipe5 = new DevicePipelineArray(DevicePipelineArrayType.TRANSITION , pipeBuffer5);
            var bufPipeParameter = new DevicePipelineArray(DevicePipelineArrayType.TRANSITION , parametersPipe);
            var bufPipeParameter2 = new DevicePipelineArray(DevicePipelineArrayType.TRANSITION , parametersPipe2);
            var bufPipeParameter3 = new DevicePipelineArray(DevicePipelineArrayType.TRANSITION , parametersPipe3);
            var bufPipeParameter4 = new DevicePipelineArray(DevicePipelineArrayType.TRANSITION , parametersPipe4);
            var bufPipeParameter5 = new DevicePipelineArray(DevicePipelineArrayType.TRANSITION , parametersPipe5);
            var bufPipeParameter6 = new DevicePipelineArray(DevicePipelineArrayType.TRANSITION , parametersPipe6);
            var bufResult = new DevicePipelineArray(DevicePipelineArrayType.OUTPUT , resultImage);

            stage1.bindArray(bufInput);
            stage1.bindArray(new DevicePipelineArray(DevicePipelineArrayType.INPUT, parameters));
            stage1.bindArray(bufPipeParameter);
            stage1.bindArray(bufPipe1);

            stage2.bindArray(bufPipe1);
            stage2.bindArray(bufPipeParameter);
            stage2.bindArray(bufPipeParameter2);
            stage2.bindArray(bufPipe2);
            stage2.bindArray(bufAccumulator);

            stage3.bindArray(bufPipe2);
            stage3.bindArray(bufPipeParameter2);
            stage3.bindArray(bufPipeParameter3);
            stage3.bindArray(bufPipe3);

            stage4.bindArray(bufPipe3);
            stage4.bindArray(bufPipeParameter3);
            stage4.bindArray(bufPipeParameter4);
            stage4.bindArray(bufPipe4);

            stage5.bindArray(bufPipe4);
            stage5.bindArray(bufPipeParameter4);
            stage5.bindArray(bufPipeParameter5);
            stage5.bindArray(bufPipe5);
            stage5.bindArray(bufBlendInput);

            stage6.bindArray(bufPipe5);
            stage6.bindArray(bufPipeParameter5);
            stage6.bindArray(bufPipeParameter6);
            stage6.bindArray(bufResult);

            gpuPipeline.addStage(stage1);
            gpuPipeline.addStage(stage2);
            gpuPipeline.addStage(stage3);
            gpuPipeline.addStage(stage4);
            gpuPipeline.addStage(stage5);
            gpuPipeline.addStage(stage6);