@@ -134,10 +134,15 @@ class CComputeBlit : public core::IReferenceCounted
134134 // the absolute minimum needed to store a single pixel of a worst case format (precise, all 4 channels)
135135 constexpr auto singlePixelStorage = 4 *sizeof (hlsl::float32_t );
136136 constexpr auto ratio = singlePixelStorage/sizeof (uint16_t );
137- const auto paddedAlphaBinCount = core::min (core::roundUp (baseBucketCount,workgroupSize),workgroupSize*ratio);
137+ // atomicAdd gets performed on MSB or LSB of a single DWORD
138+ const auto paddedAlphaBinCount = core::min (core::roundUp<uint16_t >(baseBucketCount,workgroupSize*2 ),workgroupSize*ratio);
138139 return paddedAlphaBinCount*layersToBlit;
139140 }
140-
141+
142+ static inline uint32_t getNormalizationByteSize (const uint16_t workgroupSize, const asset::E_FORMAT intermediateAlpha, const uint32_t layersToBlit)
143+ {
144+ return getAlphaBinCount (workgroupSize,intermediateAlpha,layersToBlit)*sizeof (uint16_t )+sizeof (uint32_t )+sizeof (uint32_t );
145+ }
141146#if 0
142147
143148 //! Returns the number of output texels produced by one workgroup, deciding factor is `m_availableSharedMemory`.
@@ -337,19 +342,14 @@ class CComputeBlit : public core::IReferenceCounted
337342 {
338343 dispatch_info_t dispatchInfo;
339344 buildAlphaTestDispatchInfo(dispatchInfo, inImageExtent, inImageType, layersToBlit);
340-
341- cmdbuf->bindDescriptorSets(asset::EPBP_COMPUTE, alphaTestPipeline->getLayout(), 0u, 1u, &alphaTestDS);
342- cmdbuf->bindComputePipeline(alphaTestPipeline);
345+ // bind omitted
343346 dispatchHelper(cmdbuf, alphaTestPipeline->getLayout(), pushConstants, dispatchInfo);
344347 }
345348
346349 {
347350 dispatch_info_t dispatchInfo;
348351 buildBlitDispatchInfo<BlitUtilities>(dispatchInfo, inImageExtent, outImageExtent, inImageFormat, inImageType, kernels, workgroupSize, layersToBlit);
349-
350- video::IGPUDescriptorSet* ds_raw[] = { blitDS, blitWeightsDS };
351- cmdbuf->bindDescriptorSets(asset::EPBP_COMPUTE, blitPipeline->getLayout(), 0, 2, ds_raw);
352- cmdbuf->bindComputePipeline(blitPipeline);
352+ // bind omitted
353353 dispatchHelper(cmdbuf, blitPipeline->getLayout(), pushConstants, dispatchInfo);
354354 }
355355
@@ -359,39 +359,6 @@ class CComputeBlit : public core::IReferenceCounted
359359 dispatch_info_t dispatchInfo;
360360 buildNormalizationDispatchInfo(dispatchInfo, outImageExtent, inImageType, layersToBlit);
361361
362- assert(coverageAdjustmentScratchBuffer);
363- IGPUCommandBuffer::SPipelineBarrierDependencyInfo depInfo;
364- // Memory dependency to ensure the alpha test pass has finished writing to alphaTestCounterBuffer
365- video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::buffer_barrier_t alphaTestBarrier = {};
366- alphaTestBarrier.barrier.dep.srcStageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT;
367- alphaTestBarrier.barrier.dep.srcAccessMask = asset::ACCESS_FLAGS::SHADER_WRITE_BITS;
368- alphaTestBarrier.barrier.dep.dstStageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT;
369- alphaTestBarrier.barrier.dep.dstAccessMask = asset::ACCESS_FLAGS::SHADER_READ_BITS;
370- alphaTestBarrier.range.buffer = coverageAdjustmentScratchBuffer;
371- alphaTestBarrier.range.size = coverageAdjustmentScratchBuffer->getSize();
372- alphaTestBarrier.range.offset = 0;
373-
374- // Memory dependency to ensure that the previous compute pass has finished writing to the output image,
375- // also transitions the layout of said image: GENERAL -> SHADER_READ_ONLY_OPTIMAL
376- video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t readyForNorm = {};
377- readyForNorm.barrier.dep.srcStageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT;
378- readyForNorm.barrier.dep.srcAccessMask = asset::ACCESS_FLAGS::SHADER_WRITE_BITS;
379- readyForNorm.barrier.dep.dstStageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT;
380- readyForNorm.barrier.dep.dstAccessMask = asset::ACCESS_FLAGS::SHADER_READ_BITS;
381- readyForNorm.oldLayout = video::IGPUImage::LAYOUT::GENERAL;
382- readyForNorm.newLayout = video::IGPUImage::LAYOUT::READ_ONLY_OPTIMAL;
383- readyForNorm.image = normalizationInImage.get();
384- readyForNorm.subresourceRange.aspectMask = asset::IImage::EAF_COLOR_BIT;
385- readyForNorm.subresourceRange.levelCount = 1u;
386- readyForNorm.subresourceRange.layerCount = normalizationInImage->getCreationParameters().arrayLayers;
387-
388- depInfo.bufBarriers = { &alphaTestBarrier, &alphaTestBarrier + 1 };
389- depInfo.imgBarriers = { &readyForNorm, &readyForNorm + 1 };
390-
391- cmdbuf->pipelineBarrier(asset::E_DEPENDENCY_FLAGS::EDF_NONE, depInfo);
392-
393- cmdbuf->bindDescriptorSets(asset::EPBP_COMPUTE, normalizationPipeline->getLayout(), 0u, 1u, &normalizationDS);
394- cmdbuf->bindComputePipeline(normalizationPipeline);
395362 dispatchHelper(cmdbuf, normalizationPipeline->getLayout(), pushConstants, dispatchInfo);
396363 }
397364 }
0 commit comments