v1.2.11
added single gpu pipeline sub-feature
runs all stages in parallel in gpu, concurrently to host codes and also device to host - host to device transmissions are parallel to all stages kernels
var deviceForCompute = ClPlatforms.all().gpus()[0];
deviceForCompute.logInfo();
DevicePipeline gpuPipeline = new DevicePipeline(deviceForCompute,@""+File.ReadAllText("..//..//..//test.cl") );
//gpuPipeline.enableSerialMode();
DevicePipelineStage stage1 = new DevicePipelineStage("resize", maxImgSizeResult * maxImgSizeResult, 256);
DevicePipelineStage stage2 = new DevicePipelineStage("parameterSet", maxImgSizeResult * maxImgSizeResult, 256);
DevicePipelineStage stage3 = new DevicePipelineStage("gaussianBlur", maxImgSizeResult * maxImgSizeResult, 256);
DevicePipelineStage stage4 = new DevicePipelineStage("rotateImgRad", maxImgSizeResult * maxImgSizeResult, 256);
DevicePipelineStage stage5 = new DevicePipelineStage("blendImg", maxImgSizeResult * maxImgSizeResult, 256);
DevicePipelineStage stage6 = new DevicePipelineStage("postProcess", maxImgSizeResult * maxImgSizeResult, 256);
ClArray<byte> stage1Input = imageBytes;
ClArray<byte> stage5Input = imageBlendBytes;
ClArray<int> parameters = new int[1024];
ClArray<int> acculumulator = new int[1024];
ClArray<int> parametersPipe = new int[1024];
ClArray<int> parametersPipe2 = new int[1024];
ClArray<int> parametersPipe3 = new int[1024];
ClArray<int> parametersPipe4 = new int[1024];
ClArray<int> parametersPipe5 = new int[1024];
ClArray<int> parametersPipe6 = new int[1024];
ClArray<byte> resultImage = resultImageBytes;
ClArray<byte> pipeBuffer = new ClArray<byte>(maxImgSizeResult * maxImgSizeResult * 4);
ClArray<byte> pipeBuffer2 = new ClArray<byte>(maxImgSizeResult * maxImgSizeResult * 4);
ClArray<byte> pipeBuffer3 = new ClArray<byte>(maxImgSizeResult * maxImgSizeResult * 4);
ClArray<byte> pipeBuffer4 = new ClArray<byte>(maxImgSizeResult * maxImgSizeResult * 4);
ClArray<byte> pipeBuffer5 = new ClArray<byte>(maxImgSizeResult * maxImgSizeResult * 4);
DevicePipelineArray bufInput = new DevicePipelineArray(DevicePipelineArrayType.INPUT, stage1Input);
DevicePipelineArray bufBlendInput = new DevicePipelineArray(DevicePipelineArrayType.INPUT, stage5Input);
var bufAccumulator = new DevicePipelineArray(DevicePipelineArrayType.INTERNAL , acculumulator);
var bufPipe1 = new DevicePipelineArray(DevicePipelineArrayType.TRANSITION , pipeBuffer);
var bufPipe2 = new DevicePipelineArray(DevicePipelineArrayType.TRANSITION , pipeBuffer2);
var bufPipe3 = new DevicePipelineArray(DevicePipelineArrayType.TRANSITION , pipeBuffer3);
var bufPipe4 = new DevicePipelineArray(DevicePipelineArrayType.TRANSITION , pipeBuffer4);
var bufPipe5 = new DevicePipelineArray(DevicePipelineArrayType.TRANSITION , pipeBuffer5);
var bufPipeParameter = new DevicePipelineArray(DevicePipelineArrayType.TRANSITION , parametersPipe);
var bufPipeParameter2 = new DevicePipelineArray(DevicePipelineArrayType.TRANSITION , parametersPipe2);
var bufPipeParameter3 = new DevicePipelineArray(DevicePipelineArrayType.TRANSITION , parametersPipe3);
var bufPipeParameter4 = new DevicePipelineArray(DevicePipelineArrayType.TRANSITION , parametersPipe4);
var bufPipeParameter5 = new DevicePipelineArray(DevicePipelineArrayType.TRANSITION , parametersPipe5);
var bufPipeParameter6 = new DevicePipelineArray(DevicePipelineArrayType.TRANSITION , parametersPipe6);
var bufResult = new DevicePipelineArray(DevicePipelineArrayType.OUTPUT , resultImage);
stage1.bindArray(bufInput);
stage1.bindArray(new DevicePipelineArray(DevicePipelineArrayType.INPUT, parameters));
stage1.bindArray(bufPipeParameter);
stage1.bindArray(bufPipe1);
stage2.bindArray(bufPipe1);
stage2.bindArray(bufPipeParameter);
stage2.bindArray(bufPipeParameter2);
stage2.bindArray(bufPipe2);
stage2.bindArray(bufAccumulator);
stage3.bindArray(bufPipe2);
stage3.bindArray(bufPipeParameter2);
stage3.bindArray(bufPipeParameter3);
stage3.bindArray(bufPipe3);
stage4.bindArray(bufPipe3);
stage4.bindArray(bufPipeParameter3);
stage4.bindArray(bufPipeParameter4);
stage4.bindArray(bufPipe4);
stage5.bindArray(bufPipe4);
stage5.bindArray(bufPipeParameter4);
stage5.bindArray(bufPipeParameter5);
stage5.bindArray(bufPipe5);
stage5.bindArray(bufBlendInput);
stage6.bindArray(bufPipe5);
stage6.bindArray(bufPipeParameter5);
stage6.bindArray(bufPipeParameter6);
stage6.bindArray(bufResult);
gpuPipeline.addStage(stage1);
gpuPipeline.addStage(stage2);
gpuPipeline.addStage(stage3);
gpuPipeline.addStage(stage4);
gpuPipeline.addStage(stage5);
gpuPipeline.addStage(stage6);