Skip to content

Commit cc45953

Browse files
k0kubunjacob-shops
authored andcommitted
ZJIT: Use a shared trampoline across all ISEQs (ruby#15042)
1 parent 4699c25 commit cc45953

File tree

10 files changed

+133
-71
lines changed

10 files changed

+133
-71
lines changed

vm.c

Lines changed: 44 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,7 @@ rb_yjit_threshold_hit(const rb_iseq_t *iseq, uint64_t entry_calls)
503503
#define rb_yjit_threshold_hit(iseq, entry_calls) false
504504
#endif
505505

506-
#if USE_YJIT || USE_ZJIT
506+
#if USE_YJIT
507507
// Generate JIT code that supports the following kinds of ISEQ entries:
508508
// * The first ISEQ on vm_exec (e.g. <main>, or Ruby methods/blocks
509509
// called by a C method). The current frame has VM_FRAME_FLAG_FINISH.
@@ -513,13 +513,32 @@ rb_yjit_threshold_hit(const rb_iseq_t *iseq, uint64_t entry_calls)
513513
// The current frame doesn't have VM_FRAME_FLAG_FINISH. The current
514514
// vm_exec does NOT stop whether JIT code returns Qundef or not.
515515
static inline rb_jit_func_t
516-
jit_compile(rb_execution_context_t *ec)
516+
yjit_compile(rb_execution_context_t *ec)
517517
{
518518
const rb_iseq_t *iseq = ec->cfp->iseq;
519519
struct rb_iseq_constant_body *body = ISEQ_BODY(iseq);
520520

521+
// Increment the ISEQ's call counter and trigger JIT compilation if not compiled
522+
if (body->jit_entry == NULL) {
523+
body->jit_entry_calls++;
524+
if (rb_yjit_threshold_hit(iseq, body->jit_entry_calls)) {
525+
rb_yjit_compile_iseq(iseq, ec, false);
526+
}
527+
}
528+
return body->jit_entry;
529+
}
530+
#else
531+
# define yjit_compile(ec) ((rb_jit_func_t)0)
532+
#endif
533+
521534
#if USE_ZJIT
522-
if (body->jit_entry == NULL && rb_zjit_enabled_p) {
535+
static inline rb_jit_func_t
536+
zjit_compile(rb_execution_context_t *ec)
537+
{
538+
const rb_iseq_t *iseq = ec->cfp->iseq;
539+
struct rb_iseq_constant_body *body = ISEQ_BODY(iseq);
540+
541+
if (body->jit_entry == NULL) {
523542
body->jit_entry_calls++;
524543

525544
// At profile-threshold, rewrite some of the YARV instructions
@@ -533,38 +552,38 @@ jit_compile(rb_execution_context_t *ec)
533552
rb_zjit_compile_iseq(iseq, false);
534553
}
535554
}
536-
#endif
537-
538-
#if USE_YJIT
539-
// Increment the ISEQ's call counter and trigger JIT compilation if not compiled
540-
if (body->jit_entry == NULL && rb_yjit_enabled_p) {
541-
body->jit_entry_calls++;
542-
if (rb_yjit_threshold_hit(iseq, body->jit_entry_calls)) {
543-
rb_yjit_compile_iseq(iseq, ec, false);
544-
}
545-
}
546-
#endif
547555
return body->jit_entry;
548556
}
557+
#else
558+
# define zjit_compile(ec) ((rb_jit_func_t)0)
559+
#endif
549560

550-
// Execute JIT code compiled by jit_compile()
561+
// Execute JIT code compiled by yjit_compile() or zjit_compile()
551562
static inline VALUE
552563
jit_exec(rb_execution_context_t *ec)
553564
{
554-
rb_jit_func_t func = jit_compile(ec);
555-
if (func) {
556-
// Call the JIT code
557-
return func(ec, ec->cfp);
558-
}
559-
else {
565+
#if USE_YJIT
566+
if (rb_yjit_enabled_p) {
567+
rb_jit_func_t func = yjit_compile(ec);
568+
if (func) {
569+
return func(ec, ec->cfp);
570+
}
560571
return Qundef;
561572
}
562-
}
563-
#else
564-
# define jit_compile(ec) ((rb_jit_func_t)0)
565-
# define jit_exec(ec) Qundef
566573
#endif
567574

575+
#if USE_ZJIT
576+
void *zjit_entry = rb_zjit_entry;
577+
if (zjit_entry) {
578+
rb_jit_func_t func = zjit_compile(ec);
579+
if (func) {
580+
return ((rb_zjit_func_t)zjit_entry)(ec, ec->cfp, func);
581+
}
582+
}
583+
#endif
584+
return Qundef;
585+
}
586+
568587
#if USE_YJIT
569588
// Generate JIT code that supports the following kind of ISEQ entry:
570589
// * The first ISEQ pushed by vm_exec_handle_exception. The frame would

vm_core.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,7 @@ enum rb_builtin_attr {
398398
};
399399

400400
typedef VALUE (*rb_jit_func_t)(struct rb_execution_context_struct *, struct rb_control_frame_struct *);
401+
typedef VALUE (*rb_zjit_func_t)(struct rb_execution_context_struct *, struct rb_control_frame_struct *, rb_jit_func_t);
401402

402403
struct rb_iseq_constant_body {
403404
enum rb_iseq_type type;

vm_exec.h

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -175,11 +175,22 @@ default: \
175175

176176
// Run the JIT from the interpreter
177177
#define JIT_EXEC(ec, val) do { \
178-
rb_jit_func_t func; \
179178
/* don't run tailcalls since that breaks FINISH */ \
180-
if (UNDEF_P(val) && GET_CFP() != ec->cfp && (func = jit_compile(ec))) { \
181-
val = func(ec, ec->cfp); \
182-
if (ec->tag->state) THROW_EXCEPTION(val); \
179+
if (UNDEF_P(val) && GET_CFP() != ec->cfp) { \
180+
rb_zjit_func_t zjit_entry; \
181+
if (rb_yjit_enabled_p) { \
182+
rb_jit_func_t func = yjit_compile(ec); \
183+
if (func) { \
184+
val = func(ec, ec->cfp); \
185+
if (ec->tag->state) THROW_EXCEPTION(val); \
186+
} \
187+
} \
188+
else if ((zjit_entry = rb_zjit_entry)) { \
189+
rb_jit_func_t func = zjit_compile(ec); \
190+
if (func) { \
191+
val = zjit_entry(ec, ec->cfp, func); \
192+
} \
193+
} \
183194
} \
184195
} while (0)
185196

zjit.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
#endif
1111

1212
#if USE_ZJIT
13-
extern bool rb_zjit_enabled_p;
13+
extern void *rb_zjit_entry;
1414
extern uint64_t rb_zjit_call_threshold;
1515
extern uint64_t rb_zjit_profile_threshold;
1616
void rb_zjit_compile_iseq(const rb_iseq_t *iseq, bool jit_exception);
@@ -29,7 +29,7 @@ void rb_zjit_before_ractor_spawn(void);
2929
void rb_zjit_tracing_invalidate_all(void);
3030
void rb_zjit_invalidate_no_singleton_class(VALUE klass);
3131
#else
32-
#define rb_zjit_enabled_p false
32+
#define rb_zjit_entry 0
3333
static inline void rb_zjit_compile_iseq(const rb_iseq_t *iseq, bool jit_exception) {}
3434
static inline void rb_zjit_profile_insn(uint32_t insn, rb_execution_context_t *ec) {}
3535
static inline void rb_zjit_profile_enable(const rb_iseq_t *iseq) {}
@@ -42,4 +42,6 @@ static inline void rb_zjit_tracing_invalidate_all(void) {}
4242
static inline void rb_zjit_invalidate_no_singleton_class(VALUE klass) {}
4343
#endif // #if USE_ZJIT
4444

45+
#define rb_zjit_enabled_p (rb_zjit_entry != 0)
46+
4547
#endif // #ifndef ZJIT_H

zjit/src/backend/arm64/mod.rs

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1428,17 +1428,25 @@ impl Assembler {
14281428
}
14291429
},
14301430
Insn::CCall { fptr, .. } => {
1431-
// The offset to the call target in bytes
1432-
let src_addr = cb.get_write_ptr().raw_ptr(cb) as i64;
1433-
let dst_addr = *fptr as i64;
1434-
1435-
// Use BL if the offset is short enough to encode as an immediate.
1436-
// Otherwise, use BLR with a register.
1437-
if b_offset_fits_bits((dst_addr - src_addr) / 4) {
1438-
bl(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32));
1439-
} else {
1440-
emit_load_value(cb, Self::EMIT_OPND, dst_addr as u64);
1441-
blr(cb, Self::EMIT_OPND);
1431+
match fptr {
1432+
Opnd::UImm(fptr) => {
1433+
// The offset to the call target in bytes
1434+
let src_addr = cb.get_write_ptr().raw_ptr(cb) as i64;
1435+
let dst_addr = *fptr as i64;
1436+
1437+
// Use BL if the offset is short enough to encode as an immediate.
1438+
// Otherwise, use BLR with a register.
1439+
if b_offset_fits_bits((dst_addr - src_addr) / 4) {
1440+
bl(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32));
1441+
} else {
1442+
emit_load_value(cb, Self::EMIT_OPND, dst_addr as u64);
1443+
blr(cb, Self::EMIT_OPND);
1444+
}
1445+
}
1446+
Opnd::Reg(_) => {
1447+
blr(cb, fptr.into());
1448+
}
1449+
_ => unreachable!("unsupported ccall fptr: {fptr:?}")
14421450
}
14431451
},
14441452
Insn::CRet { .. } => {

zjit/src/backend/lir.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,9 @@ pub enum Insn {
386386
// C function call with N arguments (variadic)
387387
CCall {
388388
opnds: Vec<Opnd>,
389-
fptr: *const u8,
389+
/// The function pointer to be called. This should be Opnd::const_ptr
390+
/// (Opnd::UImm) in most cases. gen_entry_trampoline() uses Opnd::Reg.
391+
fptr: Opnd,
390392
/// Optional PosMarker to remember the start address of the C call.
391393
/// It's embedded here to insert the PosMarker after push instructions
392394
/// that are split from this CCall on alloc_regs().
@@ -1989,11 +1991,20 @@ impl Assembler {
19891991
pub fn ccall(&mut self, fptr: *const u8, opnds: Vec<Opnd>) -> Opnd {
19901992
let canary_opnd = self.set_stack_canary();
19911993
let out = self.new_vreg(Opnd::match_num_bits(&opnds));
1994+
let fptr = Opnd::const_ptr(fptr);
19921995
self.push_insn(Insn::CCall { fptr, opnds, start_marker: None, end_marker: None, out });
19931996
self.clear_stack_canary(canary_opnd);
19941997
out
19951998
}
19961999

2000+
/// Call a C function stored in a register
2001+
pub fn ccall_reg(&mut self, fptr: Opnd, num_bits: u8) -> Opnd {
2002+
assert!(matches!(fptr, Opnd::Reg(_)), "ccall_reg must be called with Opnd::Reg: {fptr:?}");
2003+
let out = self.new_vreg(num_bits);
2004+
self.push_insn(Insn::CCall { fptr, opnds: vec![], start_marker: None, end_marker: None, out });
2005+
out
2006+
}
2007+
19972008
/// Call a C function with PosMarkers. This is used for recording the start and end
19982009
/// addresses of the C call and rewriting it with a different function address later.
19992010
pub fn ccall_with_pos_markers(
@@ -2005,7 +2016,7 @@ impl Assembler {
20052016
) -> Opnd {
20062017
let out = self.new_vreg(Opnd::match_num_bits(&opnds));
20072018
self.push_insn(Insn::CCall {
2008-
fptr,
2019+
fptr: Opnd::const_ptr(fptr),
20092020
opnds,
20102021
start_marker: Some(Rc::new(start_marker)),
20112022
end_marker: Some(Rc::new(end_marker)),

zjit/src/backend/x86_64/mod.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -863,7 +863,15 @@ impl Assembler {
863863

864864
// C function call
865865
Insn::CCall { fptr, .. } => {
866-
call_ptr(cb, RAX, *fptr);
866+
match fptr {
867+
Opnd::UImm(fptr) => {
868+
call_ptr(cb, RAX, *fptr as *const u8);
869+
}
870+
Opnd::Reg(_) => {
871+
call(cb, fptr.into());
872+
}
873+
_ => unreachable!("unsupported ccall fptr: {fptr:?}")
874+
}
867875
},
868876

869877
Insn::CRet(opnd) => {

zjit/src/codegen.rs

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,7 @@ pub extern "C" fn rb_zjit_iseq_gen_entry_point(iseq: IseqPtr, jit_exception: boo
106106
}
107107

108108
// Always mark the code region executable if asm.compile() has been used.
109-
// We need to do this even if code_ptr is None because, whether gen_entry()
110-
// fails or not, gen_iseq() may have already used asm.compile().
109+
// We need to do this even if code_ptr is None because gen_iseq() may have already used asm.compile().
111110
cb.mark_all_executable();
112111

113112
code_ptr.map_or(std::ptr::null(), |ptr| ptr.raw_ptr(cb))
@@ -131,10 +130,7 @@ fn gen_iseq_entry_point(cb: &mut CodeBlock, iseq: IseqPtr, jit_exception: bool)
131130
debug!("{err:?}: gen_iseq failed: {}", iseq_get_location(iseq, 0));
132131
})?;
133132

134-
// Compile an entry point to the JIT code
135-
gen_entry(cb, iseq, start_ptr).inspect_err(|err| {
136-
debug!("{err:?}: gen_entry failed: {}", iseq_get_location(iseq, 0));
137-
})
133+
Ok(start_ptr)
138134
}
139135

140136
/// Stub a branch for a JIT-to-JIT call
@@ -170,14 +166,16 @@ fn register_with_perf(iseq_name: String, start_ptr: usize, code_size: usize) {
170166
};
171167
}
172168

173-
/// Compile a JIT entry
174-
fn gen_entry(cb: &mut CodeBlock, iseq: IseqPtr, function_ptr: CodePtr) -> Result<CodePtr, CompileError> {
169+
/// Compile a shared JIT entry trampoline
170+
pub fn gen_entry_trampoline(cb: &mut CodeBlock) -> Result<CodePtr, CompileError> {
175171
// Set up registers for CFP, EC, SP, and basic block arguments
176172
let mut asm = Assembler::new();
177-
gen_entry_prologue(&mut asm, iseq);
173+
gen_entry_prologue(&mut asm);
178174

179-
// Jump to the first block using a call instruction
180-
asm.ccall(function_ptr.raw_ptr(cb), vec![]);
175+
// Jump to the first block using a call instruction. This trampoline is used
176+
// as rb_zjit_func_t in jit_exec(), which takes (EC, CFP, rb_jit_func_t).
177+
// So C_ARG_OPNDS[2] is rb_jit_func_t, which is (EC, CFP) -> VALUE.
178+
asm.ccall_reg(C_ARG_OPNDS[2], VALUE_BITS);
181179

182180
// Restore registers for CFP, EC, and SP after use
183181
asm_comment!(asm, "return to the interpreter");
@@ -190,8 +188,7 @@ fn gen_entry(cb: &mut CodeBlock, iseq: IseqPtr, function_ptr: CodePtr) -> Result
190188
let start_ptr = code_ptr.raw_addr(cb);
191189
let end_ptr = cb.get_write_ptr().raw_addr(cb);
192190
let code_size = end_ptr - start_ptr;
193-
let iseq_name = iseq_get_location(iseq, 0);
194-
register_with_perf(format!("entry for {iseq_name}"), start_ptr, code_size);
191+
register_with_perf("ZJIT entry trampoline".into(), start_ptr, code_size);
195192
}
196193
Ok(code_ptr)
197194
}
@@ -990,8 +987,8 @@ fn gen_load_field(asm: &mut Assembler, recv: Opnd, id: ID, offset: i32) -> Opnd
990987
}
991988

992989
/// Compile an interpreter entry block to be inserted into an ISEQ
993-
fn gen_entry_prologue(asm: &mut Assembler, iseq: IseqPtr) {
994-
asm_comment!(asm, "ZJIT entry point: {}", iseq_get_location(iseq, 0));
990+
fn gen_entry_prologue(asm: &mut Assembler) {
991+
asm_comment!(asm, "ZJIT entry trampoline");
995992
// Save the registers we'll use for CFP, EP, SP
996993
asm.frame_setup(lir::JIT_PRESERVED_REGS);
997994

zjit/src/cruby.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1071,7 +1071,7 @@ pub use manual_defs::*;
10711071
pub mod test_utils {
10721072
use std::{ptr::null, sync::Once};
10731073

1074-
use crate::{options::{rb_zjit_call_threshold, rb_zjit_prepare_options, set_call_threshold, DEFAULT_CALL_THRESHOLD}, state::{rb_zjit_enabled_p, ZJITState}};
1074+
use crate::{options::{rb_zjit_call_threshold, rb_zjit_prepare_options, set_call_threshold, DEFAULT_CALL_THRESHOLD}, state::{rb_zjit_entry, ZJITState}};
10751075

10761076
use super::*;
10771077

@@ -1114,10 +1114,10 @@ pub mod test_utils {
11141114
}
11151115

11161116
// Set up globals for convenience
1117-
ZJITState::init();
1117+
let zjit_entry = ZJITState::init();
11181118

11191119
// Enable zjit_* instructions
1120-
unsafe { rb_zjit_enabled_p = true; }
1120+
unsafe { rb_zjit_entry = zjit_entry; }
11211121
}
11221122

11231123
/// Make sure the Ruby VM is set up and run a given callback with rb_protect()

0 commit comments

Comments
 (0)