Skip to content

Commit

Permalink
📝 Improve readability.
Browse files Browse the repository at this point in the history
  • Loading branch information
JonasGilg committed Jan 29, 2025
1 parent 13bf830 commit 3244c13
Showing 1 changed file with 28 additions and 63 deletions.
91 changes: 28 additions & 63 deletions src/cs-graphics/LuminanceMipMap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,8 @@ static const char* sComputeAverage = R"(
layout (rg32f, binding = 1) uniform image1D uOutLuminance;
// Shared array for this work group. Contains total luminance in the x-component and max luminance
// in the y-component of each element.
shared vec2 sData[LOCAL_SIZE];
shared float sTotal[LOCAL_SIZE];
shared float sMax[LOCAL_SIZE];
// Returns the luminance for the pixel in a 2D vector.
vec2 sampleHDRBuffer(ivec2 pos) {
Expand Down Expand Up @@ -71,16 +70,15 @@ static const char* sComputeAverage = R"(
vec2 right = j < maxSize ? sampleHDRBuffer(ivec2(j % bufferSize.x, j / bufferSize.x)) : vec2(0);
// The two values are being combined and written to this threads shared memory address.
sData[tid] = vec2(
left.x + right.x,
max(left.y, right.y)
);
sTotal[tid] = left.x + right.x;
sMax[tid] = max(left.y, right.y);
// Wait for all threads in the work group to finish.
memoryBarrierShared();
barrier();
#ifdef GL_KHR_shader_subgroup_basic
// Get the warp size using an extension.
const uint subGroupSize = gl_SubgroupSize;
#else
// Default warp size for NVIDIA. AMD might have 32 or 64.
Expand All @@ -89,17 +87,13 @@ static const char* sComputeAverage = R"(
// 2. Step
// Do the actual parallel reduction.
// Each thread combines its own value with a value of 2 * its current position.
// Each loop the amount of working threads are halfed.
// Each thread combines its own value with a value of 2 times its current position.
// Each loop the amount of working threads are halved.
// We stop, when only one warp is left.
for (uint s = gl_WorkGroupSize.x / 2; s > subGroupSize; s >>= 1) {
if (tid < s) {
vec2 left = sData[tid];
vec2 right = sData[tid + s];
sData[tid] = vec2(
left.x + right.x,
max(left.y, right.y)
);
sTotal[tid] += sTotal[tid + s];
sMax[tid] = max(sMax[tid], sMax[tid + s]);
}
memoryBarrierShared();
Expand All @@ -109,78 +103,49 @@ static const char* sComputeAverage = R"(
#if defined(GL_KHR_shader_subgroup_arithmetic) && defined(GL_KHR_shader_subgroup_basic)
// We make use of special warp arithmetic to reduce the last warp.
if (tid < subGroupSize) {
vec2 value = sData[tid];
float sum = subgroupAdd(value.x);
float max = subgroupMax(value.y);
float sum = subgroupAdd(sTotal[tid]);
float max = subgroupMax(sMax[tid]);
if (subgroupElect()) {
sData[tid] = vec2(sum, max);
sTotal[tid] = sum;
sMax[tid] = max;
}
}
#else
// Unroll the last warp for maximum performance gains.
if (tid < 32) {
vec2 left = sData[tid];
vec2 right = sData[tid + 32];
sData[tid] = vec2(
left.x + right.x,
max(left.y, right.y)
);
sTotal[tid] += sTotal[tid + 32];
sMax[tid] = max(sMax[tid], sMax[tid + 32]);
memoryBarrierShared();
barrier();
left = sData[tid];
right = sData[tid + 16];
sData[tid] = vec2(
left.x + right.x,
max(left.y, right.y)
);
sTotal[tid] += sTotal[tid + 16];
sMax[tid] = max(sMax[tid], sMax[tid + 16]);
memoryBarrierShared();
barrier();
left = sData[tid];
right = sData[tid + 8];
sData[tid] = vec2(
left.x + right.x,
max(left.y, right.y)
);
sTotal[tid] += sTotal[tid + 8];
sMax[tid] = max(sMax[tid], sMax[tid + 8]);
memoryBarrierShared();
barrier();
left = sData[tid];
right = sData[tid + 4];
sData[tid] = vec2(
left.x + right.x,
max(left.y, right.y)
);
sTotal[tid] += sTotal[tid + 4];
sMax[tid] = max(sMax[tid], sMax[tid + 4]);
memoryBarrierShared();
barrier();
left = sData[tid];
right = sData[tid + 2];
sData[tid] = vec2(
left.x + right.x,
max(left.y, right.y)
);
sTotal[tid] += sTotal[tid + 2];
sMax[tid] = max(sMax[tid], sMax[tid + 2]);
memoryBarrierShared();
barrier();
left = sData[tid];
right = sData[tid + 1];
sData[tid] = vec2(
left.x + right.x,
max(left.y, right.y)
);
sTotal[tid] += sTotal[tid + 1];
sMax[tid] = max(sMax[tid], sMax[tid + 1]);
}
#endif
// The first thread in each work group writes the final value to the output.
if (tid == 0) {
imageStore(uOutLuminance, int(gl_WorkGroupID.x), vec4(sData[0].x, sData[0].y, 0.0, 0.0));
imageStore(uOutLuminance, int(gl_WorkGroupID.x), vec4(sTotal[0], sMax[0], 0.0, 0.0));
}
}
)";
Expand All @@ -198,8 +163,8 @@ LuminanceMipMap::LuminanceMipMap(uint32_t hdrBufferSamples, int hdrBufferWidth,

mLuminanceBuffer = std::make_unique<VistaTexture>(GL_TEXTURE_1D);
mLuminanceBuffer->Bind();
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);

glTexStorage1D(GL_TEXTURE_1D, 1, GL_RG32F, mWorkGroups);
Expand Down

0 comments on commit 3244c13

Please sign in to comment.