From c8411578ee55222467e6faa9e8d87a479caa91d0 Mon Sep 17 00:00:00 2001 From: alexguirre Date: Sat, 8 Feb 2025 23:34:11 +0100 Subject: [PATCH 1/3] fix(core/rdr3): AMV hardcoded limit causing blinking There is a global bitset used to store the enabled/disabled state of every AMV. This bitset has a hardcoded size of 6016 entries, once there are more AMVs than the bitset size, it starts reading/writing out-of-bounds. This causes the AMVs to start blinking and can potentially crash. This patch replaces that bitset with a dynamic bitset resized to fit the capacity of all the AMV pools. --- code/client/shared/Hooking.h | 6 + .../components/gta-core-rdr3/src/PatchAMV.cpp | 131 ++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 code/components/gta-core-rdr3/src/PatchAMV.cpp diff --git a/code/client/shared/Hooking.h b/code/client/shared/Hooking.h index dd086e1e9a..4f167df2a0 100644 --- a/code/client/shared/Hooking.h +++ b/code/client/shared/Hooking.h @@ -719,6 +719,12 @@ inline void jump_rcx(AT address, T func) jump_reg<1>(address, func); } +template +inline void jump_rdx(AT address, T func) +{ + jump_reg<2>(address, func); +} + template inline std::enable_if_t<(Register < 8 && Register >= 0)> call_reg(AT address, T func) { diff --git a/code/components/gta-core-rdr3/src/PatchAMV.cpp b/code/components/gta-core-rdr3/src/PatchAMV.cpp new file mode 100644 index 0000000000..c061acb89d --- /dev/null +++ b/code/components/gta-core-rdr3/src/PatchAMV.cpp @@ -0,0 +1,131 @@ +#include +#include +#include +#include + +// +// There is a global bitset used to store the enabled/disabled state of every AMV. This bitset has a hardcoded size +// of 6016 entries, once there are more AMVs than the bitset size, it starts reading/writing out-of-bounds. This causes +// the AMVs to start blinking and can potentially crash. +// +// Here we replace that bitset with a dynamic bitset resized to fit the capacity of all the AMV pools. +// + +static std::vector enabledAMVs[2]; // double-buffered, one for render thread and another one for update thread + +static void AMVInitEnabledBitset() +{ + const size_t totalAMVs = rage::GetPoolBase("CAmbientMaskVolume")->GetSize() + + rage::GetPoolBase("CAmbientMaskVolumeDoor")->GetSize() + + rage::GetPoolBase("CAmbientMaskVolumeEntity")->GetSize(); + enabledAMVs[0].clear(); + enabledAMVs[1].clear(); + enabledAMVs[0].resize(totalAMVs, false); + enabledAMVs[1].resize(totalAMVs, false); +} + +static void AMVSetEnabled(uint16_t amvIndex, bool enable, uint32_t bufferIndex) +{ + enabledAMVs[bufferIndex][amvIndex] = enable; +} + +static bool AMVIsEnabled(uint16_t amvIndex, uint32_t bufferIndex) +{ + return enabledAMVs[bufferIndex][amvIndex]; +} + +static void AMVPresentEnabledBitset(uint32_t srcBufferIndex) +{ + // copy update buffer to render buffer + const uint32_t dstBufferIndex = 1 - srcBufferIndex; + enabledAMVs[dstBufferIndex] = enabledAMVs[srcBufferIndex]; +} + +static HookFunction hookFunction([]() +{ + // AMV init + { + auto location = hook::get_pattern("48 8D 0D ? ? ? ? E8 ? ? ? ? 8A 05 ? ? ? ? 84 C0 75 ? 49 8B CC"); + hook::nop(location, 12); + hook::call_rcx(location, AMVInitEnabledBitset); + } + + // AMV set enabled + { + static struct : jitasm::Frontend + { + void InternalMain() override + { + // eax = buffer index + // cx = AMV index + // r10b = enable + + sub(rsp, 0x28); + + // cx already correct + mov(dl, r10b); + mov(r8d, eax); + mov(rax, reinterpret_cast(AMVSetEnabled)); + call(rax); + + add(rsp, 0x28); + + ret(); + } + } amvSetEnabledStub; + + auto location = hook::get_pattern("4C 69 C0 ? ? ? ? 0F B7 C9"); + hook::jump_rdx(location, amvSetEnabledStub.GetCode()); + } + + // AMV is enabled + { + static struct : jitasm::Frontend + { + void InternalMain() override + { + // eax = buffer index + // cx = AMV index + + sub(rsp, 0x28); + + // cx already correct + mov(edx, eax); + mov(rax, reinterpret_cast(AMVIsEnabled)); + call(rax); + + add(rsp, 0x28); + + ret(); + } + } amvIsEnabledStub; + + auto location = hook::get_pattern("0F B7 D1 48 69 C8"); + hook::jump_rdx(location, amvIsEnabledStub.GetCode()); + } + + // AMV present buffer + { + static struct : jitasm::Frontend + { + void InternalMain() override + { + // r10d = source buffer index + + sub(rsp, 0x20); + + mov(ecx, r10d); + mov(rax, reinterpret_cast(AMVPresentEnabledBitset)); + call(rax); + + add(rsp, 0x20); + + ret(); + } + } amvPresentBufferStub; + + auto location = hook::get_pattern("41 8B C2 45 2B C1 48 69 D0"); + hook::nop(location, 0xAD); + hook::call(location, amvPresentBufferStub.GetCode()); + } +}); From b5e1f7101ae3165ec4cceb0667309af011e2e3df Mon Sep 17 00:00:00 2001 From: alexguirre Date: Mon, 10 Feb 2025 11:03:51 +0100 Subject: [PATCH 2/3] tweak(core/rdr3): simplify AMV patch --- code/client/shared/Hooking.h | 6 - .../components/gta-core-rdr3/src/PatchAMV.cpp | 134 ++++-------------- 2 files changed, 31 insertions(+), 109 deletions(-) diff --git a/code/client/shared/Hooking.h b/code/client/shared/Hooking.h index 4f167df2a0..dd086e1e9a 100644 --- a/code/client/shared/Hooking.h +++ b/code/client/shared/Hooking.h @@ -719,12 +719,6 @@ inline void jump_rcx(AT address, T func) jump_reg<1>(address, func); } -template -inline void jump_rdx(AT address, T func) -{ - jump_reg<2>(address, func); -} - template inline std::enable_if_t<(Register < 8 && Register >= 0)> call_reg(AT address, T func) { diff --git a/code/components/gta-core-rdr3/src/PatchAMV.cpp b/code/components/gta-core-rdr3/src/PatchAMV.cpp index c061acb89d..94d6819e24 100644 --- a/code/components/gta-core-rdr3/src/PatchAMV.cpp +++ b/code/components/gta-core-rdr3/src/PatchAMV.cpp @@ -8,124 +8,52 @@ // of 6016 entries, once there are more AMVs than the bitset size, it starts reading/writing out-of-bounds. This causes // the AMVs to start blinking and can potentially crash. // -// Here we replace that bitset with a dynamic bitset resized to fit the capacity of all the AMV pools. +// Here we increase the size of that bitset to fit the capacity of all the AMV pools. // -static std::vector enabledAMVs[2]; // double-buffered, one for render thread and another one for update thread - -static void AMVInitEnabledBitset() -{ - const size_t totalAMVs = rage::GetPoolBase("CAmbientMaskVolume")->GetSize() + - rage::GetPoolBase("CAmbientMaskVolumeDoor")->GetSize() + - rage::GetPoolBase("CAmbientMaskVolumeEntity")->GetSize(); - enabledAMVs[0].clear(); - enabledAMVs[1].clear(); - enabledAMVs[0].resize(totalAMVs, false); - enabledAMVs[1].resize(totalAMVs, false); -} - -static void AMVSetEnabled(uint16_t amvIndex, bool enable, uint32_t bufferIndex) +static HookFunction hookFunction([]() { - enabledAMVs[bufferIndex][amvIndex] = enable; -} + // keep in sync with gameconfig + constexpr size_t CAmbientMaskVolume_PoolSize = 6686; + constexpr size_t CAmbientMaskVolumeDoor_PoolSize = 1850; + constexpr size_t CAmbientMaskVolumeEntity_PoolSize = 150; -static bool AMVIsEnabled(uint16_t amvIndex, uint32_t bufferIndex) -{ - return enabledAMVs[bufferIndex][amvIndex]; -} + constexpr size_t TotalAMVs = CAmbientMaskVolume_PoolSize + CAmbientMaskVolumeDoor_PoolSize + CAmbientMaskVolumeEntity_PoolSize; + constexpr size_t BitsetNumBlocks = TotalAMVs / 32; + constexpr size_t BitsetNumBytes = BitsetNumBlocks * sizeof(uint32_t); -static void AMVPresentEnabledBitset(uint32_t srcBufferIndex) -{ - // copy update buffer to render buffer - const uint32_t dstBufferIndex = 1 - srcBufferIndex; - enabledAMVs[dstBufferIndex] = enabledAMVs[srcBufferIndex]; -} + // x2 because the bitset is double-buffered, one for render thread and another one for update thread + uint32_t* amvEnabledBitsetReplacement = reinterpret_cast(hook::AllocateStubMemory(BitsetNumBytes * 2)); + memset(amvEnabledBitsetReplacement, 0, BitsetNumBytes * 2); -static HookFunction hookFunction([]() -{ - // AMV init + // AMVInit { - auto location = hook::get_pattern("48 8D 0D ? ? ? ? E8 ? ? ? ? 8A 05 ? ? ? ? 84 C0 75 ? 49 8B CC"); - hook::nop(location, 12); - hook::call_rcx(location, AMVInitEnabledBitset); + auto location = hook::get_pattern("41 B8 ? ? ? ? 44 89 25"); + hook::put(location + 2, BitsetNumBytes * 2); + hook::put(location + 0x10, (intptr_t)amvEnabledBitsetReplacement - (intptr_t)location - 0x10 - 4); } - // AMV set enabled + // AMVIsEnabled { - static struct : jitasm::Frontend - { - void InternalMain() override - { - // eax = buffer index - // cx = AMV index - // r10b = enable - - sub(rsp, 0x28); - - // cx already correct - mov(dl, r10b); - mov(r8d, eax); - mov(rax, reinterpret_cast(AMVSetEnabled)); - call(rax); - - add(rsp, 0x28); - - ret(); - } - } amvSetEnabledStub; - - auto location = hook::get_pattern("4C 69 C0 ? ? ? ? 0F B7 C9"); - hook::jump_rdx(location, amvSetEnabledStub.GetCode()); + + auto location = hook::get_pattern("48 69 C8 ? ? ? ? 44 8B C2"); + hook::put(location + 3, BitsetNumBlocks); + hook::put(location + 0xD, (intptr_t)amvEnabledBitsetReplacement - (intptr_t)location - 0xD - 4); } - // AMV is enabled + // AMVSetEnabled { - static struct : jitasm::Frontend - { - void InternalMain() override - { - // eax = buffer index - // cx = AMV index - - sub(rsp, 0x28); - - // cx already correct - mov(edx, eax); - mov(rax, reinterpret_cast(AMVIsEnabled)); - call(rax); - - add(rsp, 0x28); - - ret(); - } - } amvIsEnabledStub; - - auto location = hook::get_pattern("0F B7 D1 48 69 C8"); - hook::jump_rdx(location, amvIsEnabledStub.GetCode()); + auto location = hook::get_pattern("4C 69 C0 ? ? ? ? 0F B7 C9"); + hook::put(location + 3, BitsetNumBytes); + hook::put(location + 0xD, (intptr_t)amvEnabledBitsetReplacement - (intptr_t)location - 0xD - 4); } - // AMV present buffer + // AMVPresentBuffer { - static struct : jitasm::Frontend - { - void InternalMain() override - { - // r10d = source buffer index - - sub(rsp, 0x20); - - mov(ecx, r10d); - mov(rax, reinterpret_cast(AMVPresentEnabledBitset)); - call(rax); - - add(rsp, 0x20); - - ret(); - } - } amvPresentBufferStub; - - auto location = hook::get_pattern("41 8B C2 45 2B C1 48 69 D0"); - hook::nop(location, 0xAD); - hook::call(location, amvPresentBufferStub.GetCode()); + auto location = hook::get_pattern("48 69 D0 ? ? ? ? 41 8B C0"); + hook::put(location + 3, BitsetNumBytes); + hook::put(location + 0x14, BitsetNumBytes); + hook::put(location + 0x19, BitsetNumBlocks / 32); // number of iterations, each one processes 32 blocks + hook::put(location + 0xD, (intptr_t)amvEnabledBitsetReplacement - (intptr_t)location - 0xD - 4); } }); From 7676a3bea6cb6b5652c03c7ad74b656cb5956c5f Mon Sep 17 00:00:00 2001 From: alexguirre Date: Tue, 11 Feb 2025 22:35:43 +0100 Subject: [PATCH 3/3] fix(core/rdr3): possible out-of-bounds read/write in AMV patch --- code/components/gta-core-rdr3/src/PatchAMV.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/code/components/gta-core-rdr3/src/PatchAMV.cpp b/code/components/gta-core-rdr3/src/PatchAMV.cpp index 94d6819e24..d443fa96ee 100644 --- a/code/components/gta-core-rdr3/src/PatchAMV.cpp +++ b/code/components/gta-core-rdr3/src/PatchAMV.cpp @@ -19,7 +19,14 @@ static HookFunction hookFunction([]() constexpr size_t CAmbientMaskVolumeEntity_PoolSize = 150; constexpr size_t TotalAMVs = CAmbientMaskVolume_PoolSize + CAmbientMaskVolumeDoor_PoolSize + CAmbientMaskVolumeEntity_PoolSize; - constexpr size_t BitsetNumBlocks = TotalAMVs / 32; + + // Due to how AMVPresentBuffer is compiled, the number of blocks needs to match the form (N*32+28). AMVPresentBuffer copies the bitset + // from update thread to the render thread bitset. The copy loop is vectorized to copy 32 blocks per iteration, then has an epilogue to + // copy the remaining 28 blocks, which works out for the original 188 blocks. When resizing the bitset we need to pad the number of blocks + // so it doesn't read/write out-of-bounds. + constexpr size_t BitsetNumBlocksNoPadding = (TotalAMVs + 31) / 32; // minimum number of 32-bit blocks needed by the AMVs + constexpr size_t BitsetPresentCopyNumIterations = (BitsetNumBlocksNoPadding - 28 + 31) / 32; // smallest N such that (N*32+28) >= BitsetNumBlocksNoPadding + constexpr size_t BitsetNumBlocks = BitsetPresentCopyNumIterations * 32 + 28; constexpr size_t BitsetNumBytes = BitsetNumBlocks * sizeof(uint32_t); // x2 because the bitset is double-buffered, one for render thread and another one for update thread @@ -53,7 +60,7 @@ static HookFunction hookFunction([]() auto location = hook::get_pattern("48 69 D0 ? ? ? ? 41 8B C0"); hook::put(location + 3, BitsetNumBytes); hook::put(location + 0x14, BitsetNumBytes); - hook::put(location + 0x19, BitsetNumBlocks / 32); // number of iterations, each one processes 32 blocks + hook::put(location + 0x19, BitsetPresentCopyNumIterations); hook::put(location + 0xD, (intptr_t)amvEnabledBitsetReplacement - (intptr_t)location - 0xD - 4); } });