From f9a3adeb158a17b9a6bd4a5fbaab18d8357c9e7d Mon Sep 17 00:00:00 2001 From: usuyus Date: Wed, 24 Sep 2025 10:34:50 +0100 Subject: [PATCH 1/4] alloc entries in frametables + disable musttail in RS4GC --- .../CodeGen/AsmPrinter/OxCamlGCPrinter.cpp | 29 +++++++++++++++++-- .../Scalar/RewriteStatepointsForGC.cpp | 5 ++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/OxCamlGCPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/OxCamlGCPrinter.cpp index cc4ae59d04fcb..f95a5238768c9 100644 --- a/llvm/lib/CodeGen/AsmPrinter/OxCamlGCPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/OxCamlGCPrinter.cpp @@ -140,6 +140,20 @@ static unsigned mapLLVMDwarfRegToOxCamlIndex(unsigned DwarfRegNum) { } } +static uint64_t stackOffsetOfID(uint64_t ID) { + return ID & ((1ull << 16) - 1) & ~(1ull); +} + +static uint64_t allocSizeOfID(uint64_t ID) { + return ID >> 16; +} + +static bool IDHasAlloc(uint64_t ID) { + return ID & 1ull; +} + +static const int allocMask = 2; + bool OxCamlGCMetadataPrinter::emitStackMaps(Module &M, StackMaps &SM, AsmPrinter &AP) { MCStreamer &OS = *AP.OutStreamer; unsigned PtrSize = M.getDataLayout().getPointerSize(); // Can only be 8 for now @@ -173,10 +187,16 @@ bool OxCamlGCMetadataPrinter::emitStackMaps(Module &M, StackMaps &SM, AsmPrinter // frame_data uint64_t FrameSize = CSI.CSFunctionInfo.StaticStackSize; - if (CSI.ID != StatepointDirectives::DefaultStatepointID) - FrameSize += CSI.ID; // Stack offset from OxCaml FrameSize += PtrSize; // Return address + if (CSI.ID != StatepointDirectives::DefaultStatepointID) { + // Alloc bit + if (IDHasAlloc(CSI.ID)) FrameSize |= allocMask; + + // Stack offset from OxCaml + FrameSize += stackOffsetOfID(CSI.ID); + } + if (FrameSize >= 1 << 16) report_fatal_error("Long frames not supported for OxCaml GC: FrameSize = " + Twine(FrameSize)); @@ -238,6 +258,11 @@ bool OxCamlGCMetadataPrinter::emitStackMaps(Module &M, StackMaps &SM, AsmPrinter OS.emitInt16(EncodedReg); } + if (IDHasAlloc(CSI.ID)) { + OS.emitInt8(1); + OS.emitInt8(allocSizeOfID(CSI.ID)); + } + OS.emitValueToAlignment(Align(PtrSize)); } diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index f83ae3d7ae021..a74a4032ce2ae 100644 --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -3087,6 +3087,11 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT, "Don't expect any other calls here!"); return false; } + + // `musttail` calls wrapped in statepoints fail to verify due to + // the intrinsic using variadic arguments. + if (Call->isMustTailCall()) return false; + return true; } return false; From 11d8151c3b3a3430564eb771e6309a67d82ee2b0 Mon Sep 17 00:00:00 2001 From: usuyus Date: Thu, 25 Sep 2025 14:18:20 +0100 Subject: [PATCH 2/4] handle bigger allocs --- .../CodeGen/AsmPrinter/OxCamlGCPrinter.cpp | 83 ++++++++++++++++--- 1 file changed, 72 insertions(+), 11 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/OxCamlGCPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/OxCamlGCPrinter.cpp index f95a5238768c9..618baeb744620 100644 --- a/llvm/lib/CodeGen/AsmPrinter/OxCamlGCPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/OxCamlGCPrinter.cpp @@ -67,7 +67,7 @@ static std::string camlGlobalSymName(const Module &M, const char *Id) { } } - report_fatal_error("Module name not provided for OxCaml GC!"); + report_fatal_error("[OxCamlGCPrinter] module name not provided"); } static void emitCamlGlobal(const Module &M, MCStreamer &OS, const char *Id) { @@ -135,11 +135,14 @@ static unsigned mapLLVMDwarfRegToOxCamlIndex(unsigned DwarfRegNum) { } else if (XMMBeginDwarf <= DwarfRegNum && DwarfRegNum <= XMMEndDwarf) { return DwarfRegNum - XMMBeginDwarf + XMMBeginOxCaml; } else { - report_fatal_error("Unrecognised DWARF register for use in OxCaml frametable: " + report_fatal_error("[OxCamlGCPrinter] unrecognised DWARF register: " + Twine(DwarfRegNum)); } } +// note that although `StackMaps` keeps `ID` as a 64-bit integer, anything +// above 32 bits gets truncated, so we can't use them. + static uint64_t stackOffsetOfID(uint64_t ID) { return ID & ((1ull << 16) - 1) & ~(1ull); } @@ -152,7 +155,14 @@ static bool IDHasAlloc(uint64_t ID) { return ID & 1ull; } -static const int allocMask = 2; +// Every 8-bit entry emitted in the frametable is offset by 2 (since that is the +// min allocation size). So, every slot can represent allocations of size [2, 257] +static uint8_t encodeAllocSize(uint64_t AllocSize) { + return AllocSize - 2; +} + +static const int AllocMask = 2; +static const int FrameSizeReservedMask = 3; // Debug + Alloc bool OxCamlGCMetadataPrinter::emitStackMaps(Module &M, StackMaps &SM, AsmPrinter &AP) { MCStreamer &OS = *AP.OutStreamer; @@ -189,16 +199,27 @@ bool OxCamlGCMetadataPrinter::emitStackMaps(Module &M, StackMaps &SM, AsmPrinter uint64_t FrameSize = CSI.CSFunctionInfo.StaticStackSize; FrameSize += PtrSize; // Return address + // The LLVM IR emitted from OxCaml will always set the statepoint ID for + // calls to be wrapped in a statepoint. Also, dote that DefaultStatepointID + // (= 0xABCDEF00 as of now) does not clash with the encoding we use since + // anything that sets the upper 16 bits will also set the bottom bit. if (CSI.ID != StatepointDirectives::DefaultStatepointID) { - // Alloc bit - if (IDHasAlloc(CSI.ID)) FrameSize |= allocMask; - // Stack offset from OxCaml FrameSize += stackOffsetOfID(CSI.ID); + + if (FrameSize & FrameSizeReservedMask) { + report_fatal_error("[OxCamlGCPrinter] frame size has bottom bits set: " + + Twine(FrameSize)); + } + + // Alloc bit + if (IDHasAlloc(CSI.ID)) { + FrameSize |= AllocMask; + } } if (FrameSize >= 1 << 16) - report_fatal_error("Long frames not supported for OxCaml GC: FrameSize = " + report_fatal_error("[OxCamlGCPrinter] frame size requires long frames:" + Twine(FrameSize)); OS.emitInt16(FrameSize); @@ -215,7 +236,7 @@ bool OxCamlGCMetadataPrinter::emitStackMaps(Module &M, StackMaps &SM, AsmPrinter if (LiveCount >= 1 << 16) { // Very rude! - report_fatal_error("Long frames not supported for OxCaml GC: LiveCount = " + report_fatal_error("[OxCamlGCPrinter] live count requires long frames:" + Twine(LiveCount)); } OS.emitInt16(LiveCount); @@ -243,7 +264,7 @@ bool OxCamlGCMetadataPrinter::emitStackMaps(Module &M, StackMaps &SM, AsmPrinter if (Offset < -(1 << 15) || Offset >= (1 << 15)) { // Very rude! - report_fatal_error("Stack offset too large for OxCaml frametable: " + report_fatal_error("[OxCamlGCPrinter] stack offset too large: " + Twine(Offset)); } OS.emitInt16(static_cast(Offset)); @@ -259,8 +280,48 @@ bool OxCamlGCMetadataPrinter::emitStackMaps(Module &M, StackMaps &SM, AsmPrinter } if (IDHasAlloc(CSI.ID)) { - OS.emitInt8(1); - OS.emitInt8(allocSizeOfID(CSI.ID)); + int AllocSize = allocSizeOfID(CSI.ID); + + if (AllocSize < 2) { + report_fatal_error("[OxCamlGCPrinter] alloc size must at least be two!"); + } + + // Allocations can theoretically go up to 255 * 257 = 65535 words, + // but in practice comballoc never gives us allocations that exceed 255, + // so this handling isn't necessarily needed, but it's here just in case. + + int MaxAllocSize = 257; + + if (AllocSize % MaxAllocSize == 0) { + size_t NumAlloc = AllocSize / MaxAllocSize; + + OS.emitInt8(NumAlloc); + for (size_t i = 0; i < NumAlloc; ++i) { + OS.emitInt8(encodeAllocSize(MaxAllocSize)); + } + } else if (AllocSize % MaxAllocSize == 1) { + // This is special since we cannot have allocations of size 1... + + // Guaranteed to be nonnegative + size_t NumMaxAlloc = AllocSize / MaxAllocSize - 1; + + OS.emitInt8(NumMaxAlloc + 2); + for (size_t i = 0; i < NumMaxAlloc; ++i) { + OS.emitInt8(encodeAllocSize(MaxAllocSize)); + } + + OS.emitInt8(encodeAllocSize(MaxAllocSize - 1)); + OS.emitInt8(encodeAllocSize(2)); + } else { + size_t NumMaxAlloc = AllocSize / MaxAllocSize; + + OS.emitInt8(NumMaxAlloc + 1); + for (size_t i = 0; i < NumMaxAlloc; ++i) { + OS.emitInt8(encodeAllocSize(MaxAllocSize)); + } + + OS.emitInt8(encodeAllocSize(AllocSize % MaxAllocSize)); + } } OS.emitValueToAlignment(Align(PtrSize)); From c347f3743c61a6415d13c2923358e9ae73bdbf53 Mon Sep 17 00:00:00 2001 From: usuyus Date: Thu, 25 Sep 2025 14:21:27 +0100 Subject: [PATCH 3/4] typos --- llvm/lib/CodeGen/AsmPrinter/OxCamlGCPrinter.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/OxCamlGCPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/OxCamlGCPrinter.cpp index 618baeb744620..629938afb847f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/OxCamlGCPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/OxCamlGCPrinter.cpp @@ -200,7 +200,7 @@ bool OxCamlGCMetadataPrinter::emitStackMaps(Module &M, StackMaps &SM, AsmPrinter FrameSize += PtrSize; // Return address // The LLVM IR emitted from OxCaml will always set the statepoint ID for - // calls to be wrapped in a statepoint. Also, dote that DefaultStatepointID + // calls to be wrapped in a statepoint. Also, note that DefaultStatepointID // (= 0xABCDEF00 as of now) does not clash with the encoding we use since // anything that sets the upper 16 bits will also set the bottom bit. if (CSI.ID != StatepointDirectives::DefaultStatepointID) { @@ -219,7 +219,7 @@ bool OxCamlGCMetadataPrinter::emitStackMaps(Module &M, StackMaps &SM, AsmPrinter } if (FrameSize >= 1 << 16) - report_fatal_error("[OxCamlGCPrinter] frame size requires long frames:" + report_fatal_error("[OxCamlGCPrinter] frame size requires long frames: " + Twine(FrameSize)); OS.emitInt16(FrameSize); @@ -236,7 +236,7 @@ bool OxCamlGCMetadataPrinter::emitStackMaps(Module &M, StackMaps &SM, AsmPrinter if (LiveCount >= 1 << 16) { // Very rude! - report_fatal_error("[OxCamlGCPrinter] live count requires long frames:" + report_fatal_error("[OxCamlGCPrinter] live count requires long frames: " + Twine(LiveCount)); } OS.emitInt16(LiveCount); From 81cb60dd5ff1c57f3527773a957661e0a2a86ecf Mon Sep 17 00:00:00 2001 From: usuyus Date: Thu, 25 Sep 2025 18:16:57 +0100 Subject: [PATCH 4/4] c stack args cc + use stack offset only when we have dynamic objects --- llvm/lib/CodeGen/AsmPrinter/OxCamlGCPrinter.cpp | 8 ++++++-- llvm/lib/Target/X86/X86CallingConv.td | 10 ++++------ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/OxCamlGCPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/OxCamlGCPrinter.cpp index 629938afb847f..0da27ae303d6d 100644 --- a/llvm/lib/CodeGen/AsmPrinter/OxCamlGCPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/OxCamlGCPrinter.cpp @@ -204,8 +204,12 @@ bool OxCamlGCMetadataPrinter::emitStackMaps(Module &M, StackMaps &SM, AsmPrinter // (= 0xABCDEF00 as of now) does not clash with the encoding we use since // anything that sets the upper 16 bits will also set the bottom bit. if (CSI.ID != StatepointDirectives::DefaultStatepointID) { - // Stack offset from OxCaml - FrameSize += stackOffsetOfID(CSI.ID); + // Stack offset from OxCaml (in case LLVM says we have dynamic objects) + // This will get set to UINT64_MAX in `StackMaps.recordStackMapOpers` if + // that is the case. + if (CSI.CSFunctionInfo.FrameSize != UINT64_MAX) { + FrameSize += stackOffsetOfID(CSI.ID); + } if (FrameSize & FrameSizeReservedMask) { report_fatal_error("[OxCamlGCPrinter] frame size has bottom bits set: " diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td index c941a2c1ee687..b26d740fefd65 100644 --- a/llvm/lib/Target/X86/X86CallingConv.td +++ b/llvm/lib/Target/X86/X86CallingConv.td @@ -766,11 +766,9 @@ def CC_X86_64_OxCaml_C_Call : CallingConv<[ def CC_X86_64_OxCaml_C_Call_StackArgs : CallingConv<[ // Calling conventions followed by [caml_c_call_stack_args] to additionally handle - // transfer of stack arguments. Note that this function normally takes a pair of - // pointers on the stack, but since LLVM makes it hard to directly meddle with the - // stack, this in reality calls yet anothr wrapper which calculates this range given - // the number of stack arguments in bytes in R12. - CCIfType<[i64], CCAssignToReg<[R14, R15, RAX, R12]>>, + // transfer of stack arguments. As before, RAX is the function ptr, and [R13, R12] + // delimit arguments on the stack + CCIfType<[i64], CCAssignToReg<[R14, R15, RAX, R13, R12]>>, // Follow C convention normally otherwise CCDelegateTo @@ -1322,7 +1320,7 @@ def CSR_64_OxCaml_WithoutFP : CalleeSavedRegs<(add)>; // R14 and R15 (and also R12 in the latter) are used as return registers, // so they aren't callee saved. def CSR_64_OxCaml_C_Call : CalleeSavedRegs<(sub CSR_64, R14, R15)>; -def CSR_64_OxCaml_C_Call_StackArgs : CalleeSavedRegs<(sub CSR_64, R14, R15, R12)>; +def CSR_64_OxCaml_C_Call_StackArgs : CalleeSavedRegs<(sub CSR_64, R14, R15, R13, R12)>; // See [Proc.destroyed_at_alloc_or_poll] for more details: // https://github.com/oxcaml/oxcaml/blob/main/backend/amd64/proc.ml#L457