Skip to content

Commit 5a909ee

Browse files
k0kubunjacob-shops
authored andcommitted
ZJIT: Implement register spill (ruby#14936)
1 parent 31668c9 commit 5a909ee

File tree

5 files changed

+431
-151
lines changed

5 files changed

+431
-151
lines changed

zjit/src/asm/x86_64/mod.rs

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -779,13 +779,6 @@ pub fn imul(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
779779
write_rm(cb, false, true, opnd0, opnd1, None, &[0x0F, 0xAF]);
780780
}
781781

782-
// Flip the operands to handle this case. This instruction has weird encoding restrictions.
783-
(X86Opnd::Mem(_), X86Opnd::Reg(_)) => {
784-
//REX.W + 0F AF /rIMUL r64, r/m64
785-
// Quadword register := Quadword register * r/m64.
786-
write_rm(cb, false, true, opnd1, opnd0, None, &[0x0F, 0xAF]);
787-
}
788-
789782
_ => unreachable!()
790783
}
791784
}

zjit/src/asm/x86_64/tests.rs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -228,22 +228,28 @@ fn test_cqo() {
228228
fn test_imul() {
229229
let cb1 = compile(|cb| imul(cb, RAX, RBX));
230230
let cb2 = compile(|cb| imul(cb, RDX, mem_opnd(64, RAX, 0)));
231-
// Operands flipped for encoding since multiplication is commutative
232-
let cb3 = compile(|cb| imul(cb, mem_opnd(64, RAX, 0), RDX));
233231

234-
assert_disasm_snapshot!(disasms!(cb1, cb2, cb3), @r"
232+
assert_disasm_snapshot!(disasms!(cb1, cb2), @r"
235233
0x0: imul rax, rbx
236234
0x0: imul rdx, qword ptr [rax]
237-
0x0: imul rdx, qword ptr [rax]
238235
");
239236

240-
assert_snapshot!(hexdumps!(cb1, cb2, cb3), @r"
237+
assert_snapshot!(hexdumps!(cb1, cb2), @r"
241238
480fafc3
242239
480faf10
243-
480faf10
244240
");
245241
}
246242

243+
#[test]
244+
#[should_panic]
245+
fn test_imul_mem_reg() {
246+
// imul doesn't have (Mem, Reg) encoding. Since multiplication is communicative, imul() could
247+
// swap operands. However, x86_scratch_split may need to move the result to the output operand,
248+
// which can be complicated if the assembler may sometimes change the result operand.
249+
// So x86_scratch_split should be responsible for that swap, not the assembler.
250+
compile(|cb| imul(cb, mem_opnd(64, RAX, 0), RDX));
251+
}
252+
247253
#[test]
248254
fn test_jge_label() {
249255
let cb = compile(|cb| {

zjit/src/backend/arm64/mod.rs

Lines changed: 154 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@ impl From<Opnd> for A64Opnd {
7979
Opnd::Mem(Mem { base: MemBase::VReg(_), .. }) => {
8080
panic!("attempted to lower an Opnd::Mem with a MemBase::VReg base")
8181
},
82+
Opnd::Mem(Mem { base: MemBase::Stack { .. }, .. }) => {
83+
panic!("attempted to lower an Opnd::Mem with a MemBase::Stack base")
84+
},
8285
Opnd::VReg { .. } => panic!("attempted to lower an Opnd::VReg"),
8386
Opnd::Value(_) => panic!("attempted to lower an Opnd::Value"),
8487
Opnd::None => panic!(
@@ -203,6 +206,7 @@ pub const ALLOC_REGS: &[Reg] = &[
203206
/// [`Assembler::arm64_scratch_split`] or [`Assembler::new_with_scratch_reg`].
204207
const SCRATCH0_OPND: Opnd = Opnd::Reg(X15_REG);
205208
const SCRATCH1_OPND: Opnd = Opnd::Reg(X17_REG);
209+
const SCRATCH2_OPND: Opnd = Opnd::Reg(X14_REG);
206210

207211
impl Assembler {
208212
/// Special register for intermediate processing in arm64_emit. It should be used only by arm64_emit.
@@ -690,22 +694,129 @@ impl Assembler {
690694
/// need to be split with registers after `alloc_regs`, e.g. for `compile_exits`, so this
691695
/// splits them and uses scratch registers for it.
692696
fn arm64_scratch_split(self) -> Assembler {
693-
let mut asm = Assembler::new_with_asm(&self);
697+
/// If opnd is Opnd::Mem with a too large disp, make the disp smaller using lea.
698+
fn split_large_disp(asm: &mut Assembler, opnd: Opnd, scratch_opnd: Opnd) -> Opnd {
699+
match opnd {
700+
Opnd::Mem(Mem { num_bits, disp, .. }) if !mem_disp_fits_bits(disp) => {
701+
asm.lea_into(scratch_opnd, opnd);
702+
Opnd::mem(num_bits, scratch_opnd, 0)
703+
}
704+
_ => opnd,
705+
}
706+
}
707+
708+
/// If opnd is Opnd::Mem with MemBase::Stack, lower it to Opnd::Mem with MemBase::Reg, and split a large disp.
709+
fn split_stack_membase(asm: &mut Assembler, opnd: Opnd, scratch_opnd: Opnd, stack_state: &StackState) -> Opnd {
710+
let opnd = split_only_stack_membase(asm, opnd, scratch_opnd, stack_state);
711+
split_large_disp(asm, opnd, scratch_opnd)
712+
}
713+
714+
/// split_stack_membase but without split_large_disp. This should be used only by lea.
715+
fn split_only_stack_membase(asm: &mut Assembler, opnd: Opnd, scratch_opnd: Opnd, stack_state: &StackState) -> Opnd {
716+
if let Opnd::Mem(Mem { base: stack_membase @ MemBase::Stack { .. }, disp: opnd_disp, num_bits: opnd_num_bits }) = opnd {
717+
let base = Opnd::Mem(stack_state.stack_membase_to_mem(stack_membase));
718+
let base = split_large_disp(asm, base, scratch_opnd);
719+
asm.load_into(scratch_opnd, base);
720+
Opnd::Mem(Mem { base: MemBase::Reg(scratch_opnd.unwrap_reg().reg_no), disp: opnd_disp, num_bits: opnd_num_bits })
721+
} else {
722+
opnd
723+
}
724+
}
725+
726+
/// If opnd is Opnd::Mem, lower it to scratch_opnd. You should use this when `opnd` is read by the instruction, not written.
727+
fn split_memory_read(asm: &mut Assembler, opnd: Opnd, scratch_opnd: Opnd) -> Opnd {
728+
if let Opnd::Mem(_) = opnd {
729+
let opnd = split_large_disp(asm, opnd, scratch_opnd);
730+
let scratch_opnd = opnd.num_bits().map(|num_bits| scratch_opnd.with_num_bits(num_bits)).unwrap_or(scratch_opnd);
731+
asm.load_into(scratch_opnd, opnd);
732+
scratch_opnd
733+
} else {
734+
opnd
735+
}
736+
}
737+
738+
/// If opnd is Opnd::Mem, set scratch_reg to *opnd. Return Some(Opnd::Mem) if it needs to be written back from scratch_reg.
739+
fn split_memory_write(opnd: &mut Opnd, scratch_opnd: Opnd) -> Option<Opnd> {
740+
if let Opnd::Mem(_) = opnd {
741+
let mem_opnd = opnd.clone();
742+
*opnd = opnd.num_bits().map(|num_bits| scratch_opnd.with_num_bits(num_bits)).unwrap_or(scratch_opnd);
743+
Some(mem_opnd)
744+
} else {
745+
None
746+
}
747+
}
748+
749+
// Prepare StackState to lower MemBase::Stack
750+
let stack_state = StackState::new(self.stack_base_idx);
751+
752+
let mut asm_local = Assembler::new_with_asm(&self);
753+
let asm = &mut asm_local;
694754
asm.accept_scratch_reg = true;
695755
let mut iterator = self.insns.into_iter().enumerate().peekable();
696756

697757
while let Some((_, mut insn)) = iterator.next() {
698758
match &mut insn {
699-
&mut Insn::Mul { out, .. } => {
759+
Insn::Add { left, right, out } |
760+
Insn::Sub { left, right, out } |
761+
Insn::And { left, right, out } |
762+
Insn::Or { left, right, out } |
763+
Insn::Xor { left, right, out } |
764+
Insn::CSelZ { truthy: left, falsy: right, out } |
765+
Insn::CSelNZ { truthy: left, falsy: right, out } |
766+
Insn::CSelE { truthy: left, falsy: right, out } |
767+
Insn::CSelNE { truthy: left, falsy: right, out } |
768+
Insn::CSelL { truthy: left, falsy: right, out } |
769+
Insn::CSelLE { truthy: left, falsy: right, out } |
770+
Insn::CSelG { truthy: left, falsy: right, out } |
771+
Insn::CSelGE { truthy: left, falsy: right, out } => {
772+
*left = split_memory_read(asm, *left, SCRATCH0_OPND);
773+
*right = split_memory_read(asm, *right, SCRATCH1_OPND);
774+
let mem_out = split_memory_write(out, SCRATCH0_OPND);
775+
776+
asm.push_insn(insn);
777+
778+
if let Some(mem_out) = mem_out {
779+
let mem_out = split_large_disp(asm, mem_out, SCRATCH1_OPND);
780+
asm.store(mem_out, SCRATCH0_OPND);
781+
}
782+
}
783+
Insn::Mul { left, right, out } => {
784+
*left = split_memory_read(asm, *left, SCRATCH0_OPND);
785+
*right = split_memory_read(asm, *right, SCRATCH1_OPND);
786+
let mem_out = split_memory_write(out, SCRATCH0_OPND);
787+
let reg_out = out.clone();
788+
700789
asm.push_insn(insn);
701790

791+
if let Some(mem_out) = mem_out {
792+
let mem_out = split_large_disp(asm, mem_out, SCRATCH1_OPND);
793+
asm.store(mem_out, SCRATCH0_OPND);
794+
};
795+
702796
// If the next instruction is JoMul
703797
if matches!(iterator.peek(), Some((_, Insn::JoMul(_)))) {
704798
// Produce a register that is all zeros or all ones
705799
// Based on the sign bit of the 64-bit mul result
706-
asm.push_insn(Insn::RShift { out: SCRATCH0_OPND, opnd: out, shift: Opnd::UImm(63) });
800+
asm.push_insn(Insn::RShift { out: SCRATCH0_OPND, opnd: reg_out, shift: Opnd::UImm(63) });
801+
}
802+
}
803+
Insn::RShift { opnd, out, .. } => {
804+
*opnd = split_memory_read(asm, *opnd, SCRATCH0_OPND);
805+
let mem_out = split_memory_write(out, SCRATCH0_OPND);
806+
807+
asm.push_insn(insn);
808+
809+
if let Some(mem_out) = mem_out {
810+
let mem_out = split_large_disp(asm, mem_out, SCRATCH1_OPND);
811+
asm.store(mem_out, SCRATCH0_OPND);
707812
}
708813
}
814+
Insn::Cmp { left, right } |
815+
Insn::Test { left, right } => {
816+
*left = split_memory_read(asm, *left, SCRATCH0_OPND);
817+
*right = split_memory_read(asm, *right, SCRATCH1_OPND);
818+
asm.push_insn(insn);
819+
}
709820
// For compile_exits, support splitting simple C arguments here
710821
Insn::CCall { opnds, .. } if !opnds.is_empty() => {
711822
for (i, opnd) in opnds.iter().enumerate() {
@@ -714,16 +825,32 @@ impl Assembler {
714825
*opnds = vec![];
715826
asm.push_insn(insn);
716827
}
717-
&mut Insn::Lea { opnd, out } => {
718-
match (opnd, out) {
719-
// Split here for compile_exits
720-
(Opnd::Mem(_), Opnd::Mem(_)) => {
721-
asm.lea_into(SCRATCH0_OPND, opnd);
722-
asm.store(out, SCRATCH0_OPND);
723-
}
724-
_ => {
725-
asm.push_insn(insn);
828+
Insn::Lea { opnd, out } => {
829+
*opnd = split_only_stack_membase(asm, *opnd, SCRATCH0_OPND, &stack_state);
830+
let mem_out = split_memory_write(out, SCRATCH0_OPND);
831+
832+
asm.push_insn(insn);
833+
834+
if let Some(mem_out) = mem_out {
835+
let mem_out = split_large_disp(asm, mem_out, SCRATCH1_OPND);
836+
asm.store(mem_out, SCRATCH0_OPND);
837+
}
838+
}
839+
Insn::Load { opnd, out } |
840+
Insn::LoadInto { opnd, dest: out } => {
841+
*opnd = split_stack_membase(asm, *opnd, SCRATCH0_OPND, &stack_state);
842+
*out = split_stack_membase(asm, *out, SCRATCH1_OPND, &stack_state);
843+
844+
if let Opnd::Mem(_) = out {
845+
// If NATIVE_STACK_PTR is used as a source for Store, it's handled as xzr, storeing zero.
846+
// To save the content of NATIVE_STACK_PTR, we need to load it into another register first.
847+
if *opnd == NATIVE_STACK_PTR {
848+
asm.load_into(SCRATCH0_OPND, NATIVE_STACK_PTR);
849+
*opnd = SCRATCH0_OPND;
726850
}
851+
asm.store(*out, *opnd);
852+
} else {
853+
asm.push_insn(insn);
727854
}
728855
}
729856
&mut Insn::IncrCounter { mem, value } => {
@@ -741,31 +868,24 @@ impl Assembler {
741868
asm.cmp(SCRATCH1_OPND, 0.into());
742869
asm.jne(label);
743870
}
744-
&mut Insn::Store { dest, src } => {
745-
let Opnd::Mem(Mem { num_bits: dest_num_bits, disp: dest_disp, .. }) = dest else {
746-
panic!("Insn::Store destination must be Opnd::Mem: {dest:?}, {src:?}");
747-
};
748-
749-
// Split dest using a scratch register if necessary.
750-
let dest = if mem_disp_fits_bits(dest_disp) {
751-
dest
752-
} else {
753-
asm.lea_into(SCRATCH0_OPND, dest);
754-
Opnd::mem(dest_num_bits, SCRATCH0_OPND, 0)
755-
};
756-
757-
asm.store(dest, src);
871+
Insn::Store { dest, .. } => {
872+
*dest = split_stack_membase(asm, *dest, SCRATCH0_OPND, &stack_state);
873+
asm.push_insn(insn);
758874
}
759-
&mut Insn::Mov { dest, src } => {
875+
Insn::Mov { dest, src } => {
876+
*src = split_stack_membase(asm, *src, SCRATCH0_OPND, &stack_state);
877+
*dest = split_large_disp(asm, *dest, SCRATCH1_OPND);
760878
match dest {
761-
Opnd::Reg(_) => asm.load_into(dest, src),
762-
Opnd::Mem(_) => asm.store(dest, src),
879+
Opnd::Reg(_) => asm.load_into(*dest, *src),
880+
Opnd::Mem(_) => asm.store(*dest, *src),
763881
_ => asm.push_insn(insn),
764882
}
765883
}
766884
// Resolve ParallelMov that couldn't be handled without a scratch register.
767885
Insn::ParallelMov { moves } => {
768886
for (dst, src) in Self::resolve_parallel_moves(moves, Some(SCRATCH0_OPND)).unwrap() {
887+
let src = split_stack_membase(asm, src, SCRATCH1_OPND, &stack_state);
888+
let dst = split_large_disp(asm, dst, SCRATCH2_OPND);
769889
match dst {
770890
Opnd::Reg(_) => asm.load_into(dst, src),
771891
Opnd::Mem(_) => asm.store(dst, src),
@@ -779,7 +899,7 @@ impl Assembler {
779899
}
780900
}
781901

782-
asm
902+
asm_local
783903
}
784904

785905
/// Emit platform-specific machine code
@@ -1157,10 +1277,11 @@ impl Assembler {
11571277
load_effective_address(cb, Self::EMIT_OPND, src_base_reg_no, src_disp);
11581278
A64Opnd::new_mem(dest.rm_num_bits(), Self::EMIT_OPND, 0)
11591279
};
1280+
let dst = A64Opnd::Reg(Self::EMIT_REG.with_num_bits(src_num_bits));
11601281
match src_num_bits {
1161-
64 | 32 => ldur(cb, Self::EMIT_OPND, src_mem),
1162-
16 => ldurh(cb, Self::EMIT_OPND, src_mem),
1163-
8 => ldurb(cb, Self::EMIT_OPND, src_mem),
1282+
64 | 32 => ldur(cb, dst, src_mem),
1283+
16 => ldurh(cb, dst, src_mem),
1284+
8 => ldurb(cb, dst, src_mem),
11641285
num_bits => panic!("unexpected num_bits: {num_bits}")
11651286
};
11661287
Self::EMIT_REG

0 commit comments

Comments
 (0)