Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions tests/ir_lowering/call_operands.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
; call f(1i32, 2i32, 3i32) [safepoint: 1i64, ()]
; ret 0i32
; }
;
; func llvm.experimental.stackmap(%arg0: i64, %arg1: i32, ...);
; ...

; Check a call instruction lowers and prints correctly.
;
Expand Down
5 changes: 3 additions & 2 deletions tests/ir_lowering/empty.ll
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
; Dump:
; stdout:
; # IR format version: 0
; # Num funcs: 1
; # Num funcs: 2
; # Num consts: 0
; # Num global decls: 0
; # Num types: 2
; # Num types: 4
;
; func main() {
; bb0:
; ret
; }
; ...

; The simplest test you could write. Checks an empty module lowers correctly.

Expand Down
1 change: 1 addition & 0 deletions tests/ir_lowering/gepoperand.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
; %0_1: i32 = load %0_0
; ret
; }
; ...

; Check that GEP operands are rewritten to GEP instructions which in turn are
; lowered to a ptr_add and a load.
Expand Down
1 change: 1 addition & 0 deletions tests/ir_lowering/mem_access.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
; unimplemented << store i16 0, ptr %0, align 1>>
; ret
; }
; ...

; Check that loads and stores lower OK.

Expand Down
1 change: 1 addition & 0 deletions tests/ir_lowering/null_ptr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
; bb0:
; ret 0x0
; }
; ...

; Check null pointer constants lower OK.

Expand Down
1 change: 1 addition & 0 deletions tests/ir_lowering/struct.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
; %0_1: {0: i32, 64: i64} = insert_val %0_0, 100i32
; ret
; }
; ...

; Check that a structure type lowers correctly.

Expand Down
45 changes: 22 additions & 23 deletions tests/ir_lowering/unsupported_variants.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,49 +2,48 @@
; stdout:
; ...
; func main(...
; bb0:
; bb{{_}}:
; ...
; %{{_}}: ?ty<<8 x ptr>> = unimplemented << %{{4}} = getelementptr i32, <8 x ptr> %{{1}}, i32 1>>
; %{{_}}: ?ty<<8 x ptr>> = unimplemented << %{{4}} = getelementptr i32, <8 x ptr> %{{1}}, i32 1, ...
; %{{_}}: ptr = unimplemented << %{{_}} = getelementptr [8 x i8], ptr %{{_}}, i512 %{{_}}>>
; br bb1
; bb1:
; %{{_}}: ptr = unimplemented << %{{6}} = alloca inalloca i32, align 4>>
; br bb{{_}}
; bb{{_}}:
; %{{_}}: ptr = unimplemented << %{{6}} = alloca inalloca i32, align 4, ...
; %{{_}}: ptr = unimplemented << %{{7}} = alloca i32, align 4, addrspace(4)>>
; %{{_}}: ptr = unimplemented << %{{8}} = alloca i32, i32 %2, align 4>>
; br bb2
; bb2:
; %{{_}}: float = unimplemented << %{{13}} = fadd nnan float %{{3}}, %{{3}}>>
; br bb{{_}}
; bb{{_}}:
; %{{_}}: float = unimplemented << %{{13}} = fadd nnan float %{{3}}, %{{3}}, ...
; %{{_}}: ?ty<<4 x i32>> = unimplemented << %{{15}} = add <4 x i32> %{{44}}, %{{44}}>>
; br bb3
; bb3:
; %{{_}}: i32 = unimplemented << %{{17}} = call i32 @f(i32 swiftself 5) <note: swiftself param attr>>>
; br bb{{_}}
; bb{{_}}:
; %{{_}}: i32 = unimplemented << %{{17}} = call i32 @f(i32 swiftself 5), !yk-swt-bb-purpose !4 <note: swiftself param attr>>>
; %{{_}}: i32 = unimplemented << %{{18}} = call inreg i32 @f(i32 5) <note: inreg ret attr>>>
; %{{_}}: i32 = unimplemented << %{{19}} = call i32 @f(i32 5) #{{0}} <note: alignstack(8) fn attr>>>
; %{{_}}: float = unimplemented << %{{20}} = call nnan float @g() <note: fastmath>>>
; %{{_}}: i32 = unimplemented << %{{21}} = call ghccc i32 @f(i32 5) <note: cconv>>>
; %{{_}}: i32 = unimplemented << %{{22}} = call i32 @f(i32 5) [ "kcfi"(i32 1234) ] <note: bundles>>>
; %{{_}}: ptr = unimplemented << %{{23}} = call addrspace(6) ptr @p() <note: addrspace>>>
; br bb4
; bb4:
; %{{_}}: ?ty<<8 x i8>> = unimplemented << %{{26}} = ptrtoint <8 x ptr> %{{ptrs}} to <8 x i8>>>
; br bb{{_}}
; bb{{_}}:
; %{{_}}: ?ty<<8 x i8>> = unimplemented << %{{26}} = ptrtoint <8 x ptr> %{{ptrs}} to <8 x i8>, ...
; %{{_}}: ?ty<<4 x i64>> = unimplemented << %{{_}} = sext <4 x i32> %{{_}} to <4 x i64>>>
; %{{_}}: ?ty<<4 x i64>> = unimplemented << %{{_}} = zext <4 x i32> %{{_}} to <4 x i64>>>
; %{{_}}: ?ty<<4 x i8>> = unimplemented << %{{_}} = trunc <4 x i32> %{{_}} to <4 x i8>>>
; br bb5
; bb5:
; %{{_}}: ?ty<<4 x i1>> = unimplemented << %{{27}} = icmp ne <4 x i32> %{{444}}, zeroinitializer>>
; br bb6
; bb6:
; br bb{{_}}
; bb{{_}}:
; %{{_}}: ?ty<<4 x i1>> = unimplemented << %{{27}} = icmp ne <4 x i32> %{{444}}, zeroinitializer, ...
; br bb{{_}}
; bb{{_}}:
; %{{_}}: i32 = load %0_0
; %{{_}}: i32 = unimplemented << %{{_}} = load i32, ptr addrspace(10) %{{_}}, align 4 <note: addrspace>>>
; %{{_}}: i32 = load %0_0
; br ...
; ...
; bb10:
; %{{_}}: float = unimplemented << %{{_}} = phi nnan float...
; br bb11
; bb11:
; unimplemented << store atomic i32 0, ptr %0 release, align 4>>
; br bb{{_}}
; bb{{_}}:
; unimplemented << store atomic i32 0, ptr %0 release, align 4, ...
; unimplemented << store i32 0, ptr addrspace(10) %5, align 4 <note: addrspace>>>
; unimplemented << store i32 0, ptr %0, align 2>>
; ret
Expand Down
14 changes: 11 additions & 3 deletions tests/langtest_ir_lowering.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,26 @@ fn main() {
// We don't use yk-config here, as we are testing one very specific functionality that
// requires only one special flag.
let mut compiler = Command::new(ykllvm_bin("clang"));
let md = env::var("CARGO_MANIFEST_DIR").unwrap();
let profile = full_cargo_profile();
let ykcapi_path = [&md, "..", "target", &profile, "deps"]
.iter()
.collect::<PathBuf>();
let ykcapi_linkdir = format!("-L{}", ykcapi_path.to_str().unwrap());
compiler.args([
"-flto",
"-fuse-ld=lld",
"-O0",
"-o",
exe.to_str().unwrap(),
"-Wl,-mllvm=--yk-embed-ir",
// The serialiser now assumes that we are doing software tracing.
"-Wl,--mllvm=--yk-basicblock-tracer",
// Link libykcapi so that the tests inherit the necessary software tracing symbols.
&ykcapi_linkdir,
"-lykcapi",
p.to_str().unwrap(),
]);

let md = env::var("CARGO_MANIFEST_DIR").unwrap();
let profile = full_cargo_profile();
let dumper_path = [&md, "..", "target", &profile, "dump_ir"]
.iter()
.collect::<PathBuf>();
Expand Down
26 changes: 22 additions & 4 deletions ykcapi/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,29 @@ pub extern "C" fn __ykrt_control_point(
// FIXME: We could get rid of this entire function if we pass the frame's base pointer into the
// control point from the interpreter.
std::arch::naked_asm!(
// Pass the interpreter frame's base pointer via the 4th argument register.
"sub rsp, 8", // Alignment
"mov rcx, rbp", // Pass interpreter frame's base pointer via 4th argument register.
"sub rsp, 8", // Alignment
// Push the callee-save registers to the stack. This is required so that traces can read
// live variables from them.
"push rbp",
"push rbx",
"push r12",
"push r13",
"push r14",
"push r15",
// Pass interpreter frame's base pointer via 4th argument register.
"mov rcx, rbp",
// Do the call
"call __ykrt_control_point_real",
"add rsp, 8",
// Restore callee-save registers.
"pop r15",
"pop r14",
"pop r13",
"pop r12",
"pop rbx",
"pop rbp",
"add rsp, 8", // Alignment.
// NOTE! If the control point determined that a trace needs to be executed, then the return
// address has been overwritten and this `ret` will jump to the trace's entry point!
"ret",
);
}
Expand Down
72 changes: 59 additions & 13 deletions ykrt/src/compile/jitc_yk/trace_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@ use crate::{
mt::{MT, TraceId},
trace::{AOTTraceIterator, TraceAction},
};
use smallvec::SmallVec;
use std::{collections::HashMap, ffi::CString, sync::Arc};
use ykaddr::addr::symbol_to_ptr;

/// Caller-saved registers in DWARF notation.
static CALLER_CLOBBER_REG: [u16; 9] = [0, 1, 2, 4, 5, 8, 9, 10, 11];
/// SysV x86_64 callee-saved registers in DWARF notation.
#[cfg(target_arch = "x86_64")]
static CALLEE_SAVE_REGS: [u16; 6] = [6, 3, 12, 13, 14, 15];

/// Given an execution trace and AOT IR, creates a JIT IR trace.
pub(crate) struct TraceBuilder {
Expand Down Expand Up @@ -135,6 +137,12 @@ impl TraceBuilder {
&mut self,
blk: &'static aot_ir::BBlock,
) -> Result<(), CompilationError> {
// This code assumes that the trace starts from a call to the control point. In other
// words, that it's a root trace.
assert!(!matches!(
self.jit_mod.tracekind(),
TraceKind::Sidetrace(..)
));
// Find the control point call to retrieve the live variables from its safepoint.
let safepoint = match self.jit_mod.tracekind() {
TraceKind::HeaderOnly | TraceKind::HeaderAndBody | TraceKind::DifferentFrames => {
Expand Down Expand Up @@ -172,29 +180,67 @@ impl TraceBuilder {
todo!("Deal with multi register locations");
}

// Rewrite registers to their spill locations. We need to do this as we no longer
// push/pop registers around the control point to reduce its overhead. We know that
// for every live variable in a caller-saved register there must exist a spill offset
// in that location's extras.
// Rewrite live values in caller-save registers to their saved locations.
//
// Due to the unconventional control flow involved in executing a root trace, we cannot
// guarantee that caller-save registers have their values preserved between entering
// the control point and executing a trace.
//
// When the interpreter calls the control point, and a trace is going to be executed,
// this is the sequence of events:
//
// - interpreter does caller-save before calling `__ykrt_control_point`.
// - `__ykrt_control_point` does the callee-save before running all of the Rust
// internals of the control point.
// - eventually the control point internals determine that a trace needs to be
// executed and calls `__yk_exec_trace`.
// - `__yk_exec_trace` *overwrites the return value* of the control point with the
// trace entry point, before returning.
// - Rust frames return normally until we reach the frame for `__ykrt_control_point`:
// the frame that had its return address overwritten.
// - `__ykrt_control_point` returns to *the trace*.
//
// After this, the stack looks as though the interpreter directly called the trace, but
// this isn't the case at all, and the caller-save registers remain in their "probably
// clobbered" state. Yet sometimes traces expect to be able to read from a caller-save
// register, as though the control point had never run.
//
// For every live variable in a caller-save register we *must* find the caller-saved
// copy of that value created during the call sequence to the control point. For each
// such live variable there must be a copy either spilled to the stack, or in a
// *callee-save* register. Here we tell the trace to use one of those alternative
// locations instead.
let loc = match &var[0] {
yksmp::Location::Register(reg, size, v) => {
yksmp::Location::Register(_, size, v) => {
let mut newloc = None;
for offset in v {
if *offset < 0 {
newloc = Some(yksmp::Location::Indirect(6, i32::from(*offset), *size));
break;
for extra in v {
if *extra > 0 && CALLEE_SAVE_REGS.contains(&u16::try_from(*extra).unwrap())
{
newloc = Some(yksmp::Location::Register(
u16::try_from(*extra).unwrap(),
*size,
SmallVec::new(),
));
break; // Stop now, a register save is the best-case scenario.
} else if *extra < 0 && newloc.is_none() {
newloc = Some(yksmp::Location::Indirect(6, i32::from(*extra), *size));
// No `break`. Keep trying, in case we find the value in a register,
// which is preferable.
}
}
if let Some(loc) = newloc {
loc
} else if CALLER_CLOBBER_REG.contains(reg) {
panic!("No spill offset for caller-saved register.")
} else {
var[0].clone()
}
}
_ => var[0].clone(),
};
if let yksmp::Location::Register(r, ..) = loc {
// If we plan to read a live value from a register, it must be from a callee save
// register now. See large comment above for an explanation.
assert!(CALLEE_SAVE_REGS.contains(&r));
}

let param_inst = jit_ir::ParamInst::new(ParamIdx::try_from(idx)?, input_tyidx).into();
self.jit_mod.push(param_inst)?;
Expand Down
1 change: 1 addition & 0 deletions ykrt/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#![feature(assert_matches)]
#![feature(int_roundings)]
#![feature(trim_prefix_suffix)]
#![feature(thread_local)]
#![allow(clippy::too_many_arguments)]
#![allow(clippy::type_complexity)]
#![allow(clippy::upper_case_acronyms)]
Expand Down
23 changes: 14 additions & 9 deletions ykrt/src/mt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ use std::{
},
};

use atomic_enum::atomic_enum;
use parking_lot::Mutex;
#[cfg(not(all(feature = "yk_testing", not(test))))]
use parking_lot_core::SpinWait;
Expand Down Expand Up @@ -74,11 +73,17 @@ thread_local! {
/// This thread's [MTThread]. Do not access this directly: use [MTThread::with_borrow] or
/// [MTThread::with_borrow_mut].
static THREAD_MTTHREAD: RefCell<MTThread> = RefCell::new(MTThread::new());
/// Is this thread tracing something? Do not access this directly: use [MTThread::is_tracing]
/// and friends.
static THREAD_IS_TRACING: AtomicIsTracing = const { AtomicIsTracing::new(IsTracing::None) };
}

// The current thread's tracing state.
//
// Note: This thread local is shared to code generated by our BasicBlockTracer ykllvm pass, hence
// we use "native" TLS, instead of `thread_local!`
#[allow(non_upper_case_globals)]
#[unsafe(no_mangle)]
#[thread_local]
static mut __yk_thread_tracing_state: IsTracing = IsTracing::None;

/// A meta-tracer. This is always passed around stored in an [Arc].
///
/// When you are finished with this meta-tracer, it is best to explicitly call [MT::shutdown] to
Expand Down Expand Up @@ -1319,17 +1324,18 @@ impl MTThread {

/// Is this thread currently tracing something?
pub(crate) fn is_tracing() -> bool {
THREAD_IS_TRACING.with(|x| x.load(Ordering::Relaxed) != IsTracing::None)
unsafe { __yk_thread_tracing_state != IsTracing::None }
}

/// What kind of tracing (if any!) is this thread undertaking?
fn tracing_kind() -> IsTracing {
THREAD_IS_TRACING.with(|x| x.load(Ordering::Relaxed))
let raw = &raw mut __yk_thread_tracing_state;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will try again (in a follow up PR) to use an atomic here, thus avoiding the need for &raw, but as you can see, this PR is already big enough.

unsafe { std::ptr::read(raw) }
}

/// Mark this thread as currently tracing something.
/// Set this thread's tracing state.
fn set_tracing(kind: IsTracing) {
THREAD_IS_TRACING.with(|x| x.store(kind, Ordering::Relaxed));
unsafe { __yk_thread_tracing_state = kind }
}

/// Call `f` with a `&` reference to this thread's [MTThread] instance.
Expand Down Expand Up @@ -1483,7 +1489,6 @@ impl MTThread {
}
}

#[atomic_enum]
#[derive(PartialEq)]
enum IsTracing {
None,
Expand Down
Loading