Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 97 additions & 7 deletions llvm/lib/CodeGen/AsmPrinter/OxCamlGCPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ static std::string camlGlobalSymName(const Module &M, const char *Id) {
}
}

report_fatal_error("Module name not provided for OxCaml GC!");
report_fatal_error("[OxCamlGCPrinter] module name not provided");
}

static void emitCamlGlobal(const Module &M, MCStreamer &OS, const char *Id) {
Expand Down Expand Up @@ -135,11 +135,35 @@ static unsigned mapLLVMDwarfRegToOxCamlIndex(unsigned DwarfRegNum) {
} else if (XMMBeginDwarf <= DwarfRegNum && DwarfRegNum <= XMMEndDwarf) {
return DwarfRegNum - XMMBeginDwarf + XMMBeginOxCaml;
} else {
report_fatal_error("Unrecognised DWARF register for use in OxCaml frametable: "
report_fatal_error("[OxCamlGCPrinter] unrecognised DWARF register: "
+ Twine(DwarfRegNum));
}
}

// note that although `StackMaps` keeps `ID` as a 64-bit integer, anything
// above 32 bits gets truncated, so we can't use them.

static uint64_t stackOffsetOfID(uint64_t ID) {
return ID & ((1ull << 16) - 1) & ~(1ull);
}

static uint64_t allocSizeOfID(uint64_t ID) {
return ID >> 16;
}

static bool IDHasAlloc(uint64_t ID) {
return ID & 1ull;
}

// Every 8-bit entry emitted in the frametable is offset by 2 (since that is the
// min allocation size). So, every slot can represent allocations of size [2, 257]
static uint8_t encodeAllocSize(uint64_t AllocSize) {
return AllocSize - 2;
}

static const int AllocMask = 2;
static const int FrameSizeReservedMask = 3; // Debug + Alloc

bool OxCamlGCMetadataPrinter::emitStackMaps(Module &M, StackMaps &SM, AsmPrinter &AP) {
MCStreamer &OS = *AP.OutStreamer;
unsigned PtrSize = M.getDataLayout().getPointerSize(); // Can only be 8 for now
Expand Down Expand Up @@ -173,12 +197,33 @@ bool OxCamlGCMetadataPrinter::emitStackMaps(Module &M, StackMaps &SM, AsmPrinter

// frame_data
uint64_t FrameSize = CSI.CSFunctionInfo.StaticStackSize;
if (CSI.ID != StatepointDirectives::DefaultStatepointID)
FrameSize += CSI.ID; // Stack offset from OxCaml
FrameSize += PtrSize; // Return address

// The LLVM IR emitted from OxCaml will always set the statepoint ID for
// calls to be wrapped in a statepoint. Also, note that DefaultStatepointID
// (= 0xABCDEF00 as of now) does not clash with the encoding we use since
// anything that sets the upper 16 bits will also set the bottom bit.
if (CSI.ID != StatepointDirectives::DefaultStatepointID) {
// Stack offset from OxCaml (in case LLVM says we have dynamic objects)
// This will get set to UINT64_MAX in `StackMaps.recordStackMapOpers` if
// that is the case.
if (CSI.CSFunctionInfo.FrameSize != UINT64_MAX) {
FrameSize += stackOffsetOfID(CSI.ID);
}

if (FrameSize & FrameSizeReservedMask) {
report_fatal_error("[OxCamlGCPrinter] frame size has bottom bits set: "
+ Twine(FrameSize));
}

// Alloc bit
if (IDHasAlloc(CSI.ID)) {
FrameSize |= AllocMask;
}
}

if (FrameSize >= 1 << 16)
report_fatal_error("Long frames not supported for OxCaml GC: FrameSize = "
report_fatal_error("[OxCamlGCPrinter] frame size requires long frames: "
+ Twine(FrameSize));
OS.emitInt16(FrameSize);

Expand All @@ -195,7 +240,7 @@ bool OxCamlGCMetadataPrinter::emitStackMaps(Module &M, StackMaps &SM, AsmPrinter

if (LiveCount >= 1 << 16) {
// Very rude!
report_fatal_error("Long frames not supported for OxCaml GC: LiveCount = "
report_fatal_error("[OxCamlGCPrinter] live count requires long frames: "
+ Twine(LiveCount));
}
OS.emitInt16(LiveCount);
Expand Down Expand Up @@ -223,7 +268,7 @@ bool OxCamlGCMetadataPrinter::emitStackMaps(Module &M, StackMaps &SM, AsmPrinter

if (Offset < -(1 << 15) || Offset >= (1 << 15)) {
// Very rude!
report_fatal_error("Stack offset too large for OxCaml frametable: "
report_fatal_error("[OxCamlGCPrinter] stack offset too large: "
+ Twine(Offset));
}
OS.emitInt16(static_cast<uint16_t>(Offset));
Expand All @@ -238,6 +283,51 @@ bool OxCamlGCMetadataPrinter::emitStackMaps(Module &M, StackMaps &SM, AsmPrinter
OS.emitInt16(EncodedReg);
}

if (IDHasAlloc(CSI.ID)) {
int AllocSize = allocSizeOfID(CSI.ID);

if (AllocSize < 2) {
report_fatal_error("[OxCamlGCPrinter] alloc size must at least be two!");
}

// Allocations can theoretically go up to 255 * 257 = 65535 words,
// but in practice comballoc never gives us allocations that exceed 255,
// so this handling isn't necessarily needed, but it's here just in case.

int MaxAllocSize = 257;

if (AllocSize % MaxAllocSize == 0) {
size_t NumAlloc = AllocSize / MaxAllocSize;

OS.emitInt8(NumAlloc);
for (size_t i = 0; i < NumAlloc; ++i) {
OS.emitInt8(encodeAllocSize(MaxAllocSize));
}
} else if (AllocSize % MaxAllocSize == 1) {
// This is special since we cannot have allocations of size 1...

// Guaranteed to be nonnegative
size_t NumMaxAlloc = AllocSize / MaxAllocSize - 1;

OS.emitInt8(NumMaxAlloc + 2);
for (size_t i = 0; i < NumMaxAlloc; ++i) {
OS.emitInt8(encodeAllocSize(MaxAllocSize));
}

OS.emitInt8(encodeAllocSize(MaxAllocSize - 1));
OS.emitInt8(encodeAllocSize(2));
} else {
size_t NumMaxAlloc = AllocSize / MaxAllocSize;

OS.emitInt8(NumMaxAlloc + 1);
for (size_t i = 0; i < NumMaxAlloc; ++i) {
OS.emitInt8(encodeAllocSize(MaxAllocSize));
}

OS.emitInt8(encodeAllocSize(AllocSize % MaxAllocSize));
}
}

OS.emitValueToAlignment(Align(PtrSize));
}

Expand Down
10 changes: 4 additions & 6 deletions llvm/lib/Target/X86/X86CallingConv.td
Original file line number Diff line number Diff line change
Expand Up @@ -766,11 +766,9 @@ def CC_X86_64_OxCaml_C_Call : CallingConv<[

def CC_X86_64_OxCaml_C_Call_StackArgs : CallingConv<[
// Calling conventions followed by [caml_c_call_stack_args] to additionally handle
// transfer of stack arguments. Note that this function normally takes a pair of
// pointers on the stack, but since LLVM makes it hard to directly meddle with the
// stack, this in reality calls yet anothr wrapper which calculates this range given
// the number of stack arguments in bytes in R12.
CCIfType<[i64], CCAssignToReg<[R14, R15, RAX, R12]>>,
// transfer of stack arguments. As before, RAX is the function ptr, and [R13, R12]
// delimit arguments on the stack
CCIfType<[i64], CCAssignToReg<[R14, R15, RAX, R13, R12]>>,

// Follow C convention normally otherwise
CCDelegateTo<CC_X86_64_C>
Expand Down Expand Up @@ -1322,7 +1320,7 @@ def CSR_64_OxCaml_WithoutFP : CalleeSavedRegs<(add)>;
// R14 and R15 (and also R12 in the latter) are used as return registers,
// so they aren't callee saved.
def CSR_64_OxCaml_C_Call : CalleeSavedRegs<(sub CSR_64, R14, R15)>;
def CSR_64_OxCaml_C_Call_StackArgs : CalleeSavedRegs<(sub CSR_64, R14, R15, R12)>;
def CSR_64_OxCaml_C_Call_StackArgs : CalleeSavedRegs<(sub CSR_64, R14, R15, R13, R12)>;

// See [Proc.destroyed_at_alloc_or_poll] for more details:
// https://github.com/oxcaml/oxcaml/blob/main/backend/amd64/proc.ml#L457
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3087,6 +3087,11 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
"Don't expect any other calls here!");
return false;
}

// `musttail` calls wrapped in statepoints fail to verify due to
// the intrinsic using variadic arguments.
if (Call->isMustTailCall()) return false;

return true;
}
return false;
Expand Down