From 1fe988eabf4b56490fbd0bd8c6a0359036931f37 Mon Sep 17 00:00:00 2001 From: Hajime Hoshi Date: Sat, 26 Oct 2024 00:20:13 +0900 Subject: [PATCH] internal/graphicscommand: improve performance of prependPreservedUniforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` % benchstat old.txt new.txt goos: darwin goarch: arm64 pkg: github.com/hajimehoshi/ebiten/v2/internal/graphicscommand cpu: Apple M3 Pro │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ PrependPreservedUniforms-12 23.07n ± 2% 19.96n ± 0% -13.48% (p=0.000 n=10) ``` Updates #3144 --- internal/graphicscommand/commandqueue.go | 117 ++++++++++++++--------- 1 file changed, 73 insertions(+), 44 deletions(-) diff --git a/internal/graphicscommand/commandqueue.go b/internal/graphicscommand/commandqueue.go index 519b25b12a7f..8048dff65059 100644 --- a/internal/graphicscommand/commandqueue.go +++ b/internal/graphicscommand/commandqueue.go @@ -354,23 +354,45 @@ func (q *commandQueue) prependPreservedUniforms(uniforms []uint32, shader *Shade func prependPreservedUniforms(uniforms []uint32, shader *Shader, dst *Image, srcs [graphics.ShaderSrcImageCount]*Image, dstRegion image.Rectangle, srcRegions [graphics.ShaderSrcImageCount]image.Rectangle) []uint32 { // Set the destination texture size. + // Hard-code indices for BCE optimization. + _ = uniforms[graphics.PreservedUniformUint32Count-1] + dw, dh := dst.InternalSize() uniforms[0] = math.Float32bits(float32(dw)) uniforms[1] = math.Float32bits(float32(dh)) - uniformIndex := 2 - - for i := 0; i < graphics.ShaderSrcImageCount; i++ { - var floatW, floatH uint32 - if srcs[i] != nil { - w, h := srcs[i].InternalSize() - floatW = math.Float32bits(float32(w)) - floatH = math.Float32bits(float32(h)) - } - uniforms[uniformIndex+i*2] = floatW - uniforms[uniformIndex+1+i*2] = floatH + if srcs[0] != nil { + w, h := srcs[0].InternalSize() + uniforms[2] = math.Float32bits(float32(w)) + uniforms[3] = math.Float32bits(float32(h)) + } else { + uniforms[2] = 0 + uniforms[3] = 0 + } + if srcs[1] != nil { + w, h := srcs[1].InternalSize() + uniforms[4] = math.Float32bits(float32(w)) + uniforms[5] = math.Float32bits(float32(h)) + } else { + uniforms[4] = 0 + uniforms[5] = 0 + } + if srcs[2] != nil { + w, h := srcs[2].InternalSize() + uniforms[6] = math.Float32bits(float32(w)) + uniforms[7] = math.Float32bits(float32(h)) + } else { + uniforms[6] = 0 + uniforms[7] = 0 + } + if srcs[3] != nil { + w, h := srcs[3].InternalSize() + uniforms[8] = math.Float32bits(float32(w)) + uniforms[9] = math.Float32bits(float32(h)) + } else { + uniforms[8] = 0 + uniforms[9] = 0 } - uniformIndex += graphics.ShaderSrcImageCount * 2 dr := imageRectangleToRectangleF32(dstRegion) if shader.unit() == shaderir.Texels { @@ -381,14 +403,12 @@ func prependPreservedUniforms(uniforms []uint32, shader *Shader, dst *Image, src } // Set the destination region origin. - uniforms[uniformIndex] = math.Float32bits(dr.x) - uniforms[uniformIndex+1] = math.Float32bits(dr.y) - uniformIndex += 2 + uniforms[10] = math.Float32bits(dr.x) + uniforms[11] = math.Float32bits(dr.y) // Set the destination region size. - uniforms[uniformIndex] = math.Float32bits(dr.width) - uniforms[uniformIndex+1] = math.Float32bits(dr.height) - uniformIndex += 2 + uniforms[12] = math.Float32bits(dr.width) + uniforms[13] = math.Float32bits(dr.height) var srs [graphics.ShaderSrcImageCount]rectangleF32 for i, r := range srcRegions { @@ -408,36 +428,45 @@ func prependPreservedUniforms(uniforms []uint32, shader *Shader, dst *Image, src } // Set the source region origins. - for i := 0; i < graphics.ShaderSrcImageCount; i++ { - uniforms[uniformIndex+i*2] = math.Float32bits(srs[i].x) - uniforms[uniformIndex+1+i*2] = math.Float32bits(srs[i].y) - } - uniformIndex += graphics.ShaderSrcImageCount * 2 + uniforms[14] = math.Float32bits(srs[0].x) + uniforms[15] = math.Float32bits(srs[0].y) + uniforms[16] = math.Float32bits(srs[1].x) + uniforms[17] = math.Float32bits(srs[1].y) + uniforms[18] = math.Float32bits(srs[2].x) + uniforms[19] = math.Float32bits(srs[2].y) + uniforms[20] = math.Float32bits(srs[3].x) + uniforms[21] = math.Float32bits(srs[3].y) // Set the source region sizes. - for i := 0; i < graphics.ShaderSrcImageCount; i++ { - uniforms[uniformIndex+i*2] = math.Float32bits(srs[i].width) - uniforms[uniformIndex+1+i*2] = math.Float32bits(srs[i].height) - } - uniformIndex += graphics.ShaderSrcImageCount * 2 + uniforms[22] = math.Float32bits(srs[0].width) + uniforms[23] = math.Float32bits(srs[0].height) + uniforms[24] = math.Float32bits(srs[1].width) + uniforms[25] = math.Float32bits(srs[1].height) + uniforms[26] = math.Float32bits(srs[2].width) + uniforms[27] = math.Float32bits(srs[2].height) + uniforms[28] = math.Float32bits(srs[3].width) + uniforms[29] = math.Float32bits(srs[3].height) // Set the projection matrix. - uniforms[uniformIndex] = math.Float32bits(2 / float32(dw)) - uniforms[uniformIndex+1] = 0 - uniforms[uniformIndex+2] = 0 - uniforms[uniformIndex+3] = 0 - uniforms[uniformIndex+4] = 0 - uniforms[uniformIndex+5] = math.Float32bits(2 / float32(dh)) - uniforms[uniformIndex+6] = 0 - uniforms[uniformIndex+7] = 0 - uniforms[uniformIndex+8] = 0 - uniforms[uniformIndex+9] = 0 - uniforms[uniformIndex+10] = math.Float32bits(1) - uniforms[uniformIndex+11] = 0 - uniforms[uniformIndex+12] = math.Float32bits(-1) - uniforms[uniformIndex+13] = math.Float32bits(-1) - uniforms[uniformIndex+14] = 0 - uniforms[uniformIndex+15] = math.Float32bits(1) + uniforms[30] = math.Float32bits(2 / float32(dw)) + uniforms[31] = 0 + uniforms[32] = 0 + uniforms[33] = 0 + uniforms[34] = 0 + uniforms[35] = math.Float32bits(2 / float32(dh)) + uniforms[36] = 0 + uniforms[37] = 0 + uniforms[38] = 0 + uniforms[39] = 0 + uniforms[40] = math.Float32bits(1) + uniforms[41] = 0 + uniforms[42] = math.Float32bits(-1) + uniforms[43] = math.Float32bits(-1) + uniforms[44] = 0 + uniforms[45] = math.Float32bits(1) + + // Confirm the concrete value of graphics.PreservedUniformUint32Count. + var _ [0]struct{} = [graphics.PreservedUniformUint32Count - 46]struct{}{} return uniforms }