Skip to content

Commit

Permalink
aarch64: Add support for the extr instruction
Browse files Browse the repository at this point in the history
This is pattern-matched from `bor` patterns of a specific shape. I found
this when doing some benchmarking of Wasmtime on aarch64 and I saw LLVM
generating this pattern but Wasmtime didn't. I didn't perform any
benchmarking between wasmtime/native though, so I'm just relying on this
reducing the number of instructions to probably be a wee bit faster.
  • Loading branch information
alexcrichton committed Feb 13, 2025
1 parent 305c3f9 commit 9f4a9e1
Show file tree
Hide file tree
Showing 7 changed files with 326 additions and 1 deletion.
6 changes: 6 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -3302,6 +3302,12 @@
(decl a64_rotr_imm (Type Reg ImmShift) Reg)
(rule (a64_rotr_imm ty x y) (alu_rr_imm_shift (ALUOp.RotR) ty x y))

;; Helpers for generating `extr` instructions
(decl a64_extr (Type Reg Reg ImmShift) Reg)
(rule (a64_extr ty x y shift) (alu_rrr_shift (ALUOp.RotR) ty x y (a64_extr_imm ty shift)))
(decl a64_extr_imm (Type ImmShift) ShiftOpAndAmt)
(extern constructor a64_extr_imm a64_extr_imm)

;; Helpers for generating `rbit` instructions.
(spec (rbit ty a)
(provide
Expand Down
1 change: 1 addition & 0 deletions cranelift/codegen/src/isa/aarch64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -906,6 +906,7 @@ impl MachInstEmit for Inst {
ALUOp::OrrNot => 0b001_01010001,
ALUOp::EorNot => 0b010_01010001,
ALUOp::AndNot => 0b000_01010001,
ALUOp::RotR => 0b000_10011100,
_ => unimplemented!("{:?}", alu_op),
};
let top11 = top11 | size.sf_bit() << 10;
Expand Down
23 changes: 23 additions & 0 deletions cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1426,6 +1426,29 @@
(rule 3 (lower (has_type $I128 (bor x (bnot y)))) (i128_alu_bitop (ALUOp.OrrNot) $I64 x y))
(rule 4 (lower (has_type $I128 (bor (bnot y) x))) (i128_alu_bitop (ALUOp.OrrNot) $I64 x y))

;; Specialized lowerings to generate the `extr` instruction.
;;
;; The `extr` instruction creates `a:b` and then extracts either 32 or 64-bits
;; starting from an immediate index. This is pattern-matched here as a `bor` of
;; the high/low halves of two values shifted around.
;;
;; The immediate used for the `extr` instruction itself is the N for the
;; shift-right. Two patterns are used here to detect either ordering of the
;; `bor`.
(rule 5 (lower (has_type ty (bor (ishl x (u8_from_iconst xs))
(ushr y (u8_from_iconst ys)))))
(if-let shift (a64_extr_shift ty xs ys))
(a64_extr ty x y shift))
(rule 5 (lower (has_type ty (bor (ushr y (u8_from_iconst ys))
(ishl x (u8_from_iconst xs)))))
(if-let shift (a64_extr_shift ty xs ys))
(a64_extr ty x y shift))

;; Helper in Rust to test whether the pair of shifts of `u8` add up to the type
;; width specified. Optionally returns the second value as `ImmShift`.
(decl pure partial a64_extr_shift (Type u8 u8) ImmShift)
(extern constructor a64_extr_shift a64_extr_shift)

;;;; Rules for `bxor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule bxor_fits_in_64 -1 (lower (has_type (fits_in_64 ty) (bxor x y)))
Expand Down
29 changes: 29 additions & 0 deletions cranelift/codegen/src/isa/aarch64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -762,4 +762,33 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
}
Some(bit as u8)
}

/// Used as a helper for generation of `extr` instructions.
///
/// Tests that `xs + ys == ty.bits()` and then casts `ys` to the `ImmShift`
/// return value.
fn a64_extr_shift(&mut self, ty: Type, xs: u8, ys: u8) -> Option<ImmShift> {
if u32::from(xs.checked_add(ys)?) != ty.bits() {
return None;
}
ImmShift::maybe_from_u64(ys.into())
}

/// Use as a helper when generating `AluRRRShift` for `extr` instructions.
fn a64_extr_imm(&mut self, ty: Type, shift: ImmShift) -> ShiftOpAndAmt {
// The `ShiftOpAndAmt` immediate is used with `AluRRRShift` shape which
// requires `ShiftOpAndAmt` so the shift of `ty` and `shift` are
// translated into `ShiftOpAndAmt` here. The `ShiftOp` value here is
// only used for its encoding, not its logical meaning.
let (op, expected) = match ty {
types::I32 => (ShiftOp::LSL, 0b00),
types::I64 => (ShiftOp::LSR, 0b01),
_ => unreachable!(),
};
assert_eq!(op.bits(), expected);
ShiftOpAndAmt::new(
op,
ShiftOpShiftImm::maybe_from_shift(shift.value().into()).unwrap(),
)
}
}
111 changes: 111 additions & 0 deletions cranelift/filetests/filetests/isa/aarch64/extr.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
test compile precise-output
target aarch64

function %a64_extr_i32_12(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = ushr_imm v0, 12
v3 = ishl_imm v1, 20
v4 = bor v2, v3
return v4
}

; VCode:
; block0:
; ror w0, w1, w0, LSL 12
; ret
;
; Disassembled:
; block0: ; offset 0x0
; extr w0, w1, w0, #0xc
; ret

function %a64_extr_i32_12_swap(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = ishl_imm v0, 20
v3 = ushr_imm v1, 12
v4 = bor v2, v3
return v4
}

; VCode:
; block0:
; ror w0, w0, w1, LSL 12
; ret
;
; Disassembled:
; block0: ; offset 0x0
; extr w0, w0, w1, #0xc
; ret

function %a64_extr_i32_28(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = ushr_imm v0, 4
v3 = ishl_imm v1, 28
v4 = bor v2, v3
return v4
}

; VCode:
; block0:
; ror w0, w1, w0, LSL 4
; ret
;
; Disassembled:
; block0: ; offset 0x0
; extr w0, w1, w0, #4
; ret

function %a64_extr_i32_28_swap(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = ishl_imm v0, 4
v3 = ushr_imm v1, 28
v4 = bor v2, v3
return v4
}

; VCode:
; block0:
; ror w0, w0, w1, LSL 28
; ret
;
; Disassembled:
; block0: ; offset 0x0
; extr w0, w0, w1, #0x1c
; ret

function %a64_extr_i64_12(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = ushr_imm v0, 12
v3 = ishl_imm v1, 52
v4 = bor v2, v3
return v4
}

; VCode:
; block0:
; ror x0, x1, x0, LSR 12
; ret
;
; Disassembled:
; block0: ; offset 0x0
; extr x0, x1, x0, #0xc
; ret

function %a64_extr_i64_12_swap(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = ishl_imm v0, 52
v3 = ushr_imm v1, 12
v4 = bor v2, v3
return v4
}

; VCode:
; block0:
; ror x0, x0, x1, LSR 12
; ret
;
; Disassembled:
; block0: ; offset 0x0
; extr x0, x0, x1, #0xc
; ret

61 changes: 60 additions & 1 deletion cranelift/filetests/filetests/runtests/bitops.clif
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
test interpret
test run
set opt_level=none
target aarch64
Expand Down Expand Up @@ -31,7 +32,7 @@ block0:
v4 = band v3, v2
return v4
}
; run
; run: %bnot_band() == 1

;; We have a optimization rule in the midend that turns this into a bmask
;; It's easier to have a runtest to ensure that it is correct than to inspect the output.
Expand All @@ -52,3 +53,61 @@ block0(v0: i16):
; run: %bitops_bmask(1) == -1
; run: %bitops_bmask(0xFFFF) == -1
; run: %bitops_bmask(0x8000) == -1

function %a64_extr_i32_12(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = ushr_imm v0, 12
v3 = ishl_imm v1, 20
v4 = bor v2, v3
return v4
}
; run: %a64_extr_i32_12(0x1234_5678, 0x1234_5678) == 0x678_1234_5
; run: %a64_extr_i32_12(0x1234_5678, 0x9abc_def0) == 0xef0_1234_5

function %a64_extr_i32_12_swap(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = ishl_imm v0, 20
v3 = ushr_imm v1, 12
v4 = bor v2, v3
return v4
}
; run: %a64_extr_i32_12_swap(0x1234_5678, 0x1234_5678) == 0x678_1234_5
; run: %a64_extr_i32_12_swap(0x1234_5678, 0x9abc_def0) == 0x678_9abc_d

function %a64_extr_i32_28(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = ushr_imm v0, 4
v3 = ishl_imm v1, 28
v4 = bor v2, v3
return v4
}
; run: %a64_extr_i32_28(0x1234_5678, 0x1234_5678) == 0x8_1234_567
; run: %a64_extr_i32_28(0x1234_5678, 0x9abc_def0) == 0x0_1234_567

function %a64_extr_i32_28_swap(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = ishl_imm v0, 4
v3 = ushr_imm v1, 28
v4 = bor v2, v3
return v4
}
; run: %a64_extr_i32_28_swap(0x1234_5678, 0x1234_5678) == 0x234_5678_1
; run: %a64_extr_i32_28_swap(0x1234_5678, 0x9abc_def0) == 0x234_5678_9

function %a64_extr_i64_12(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = ushr_imm v0, 12
v3 = ishl_imm v1, 52
v4 = bor v2, v3
return v4
}
; run: %a64_extr_i64_12(0x0102_0304_0506_0708, 0x090a_0b0c_0d0e_0f00) == 0xf00_0102_0304_0506_0

function %a64_extr_i64_12_swap(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = ishl_imm v0, 52
v3 = ushr_imm v1, 12
v4 = bor v2, v3
return v4
}
; run: %a64_extr_i64_12_swap(0x0102_0304_0506_0708, 0x090a_0b0c_0d0e_0f00) == 0x708_090a_0b0c_0d0e_0
96 changes: 96 additions & 0 deletions tests/disas/aarch64-extr.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
;;! target = "aarch64"
;;! test = "compile"

(module
(func $i32_21 (param i32 i32) (result i32)
local.get 0
i32.const 11
i32.shl
local.get 1
i32.const 21
i32.shr_u
i32.or)
(func $i32_21_swapped (param i32 i32) (result i32)
local.get 1
i32.const 21
i32.shr_u
local.get 0
i32.const 11
i32.shl
i32.or)
(func $i32_11 (param i32 i32) (result i32)
local.get 0
i32.const 21
i32.shl
local.get 1
i32.const 11
i32.shr_u
i32.or)

(func $i64_21 (param i64 i64) (result i64)
local.get 0
i64.const 43
i64.shl
local.get 1
i64.const 21
i64.shr_u
i64.or)
(func $i64_21_swapped (param i64 i64) (result i64)
local.get 1
i64.const 21
i64.shr_u
local.get 0
i64.const 43
i64.shl
i64.or)
(func $i64_11 (param i64 i64) (result i64)
local.get 0
i64.const 53
i64.shl
local.get 1
i64.const 11
i64.shr_u
i64.or)
)

;; wasm[0]::function[0]::i32_21:
;; stp x29, x30, [sp, #-0x10]!
;; mov x29, sp
;; extr w2, w5, w4, #0x15
;; ldp x29, x30, [sp], #0x10
;; ret
;;
;; wasm[0]::function[1]::i32_21_swapped:
;; stp x29, x30, [sp, #-0x10]!
;; mov x29, sp
;; extr w2, w5, w4, #0x15
;; ldp x29, x30, [sp], #0x10
;; ret
;;
;; wasm[0]::function[2]::i32_11:
;; stp x29, x30, [sp, #-0x10]!
;; mov x29, sp
;; extr w2, w5, w4, #0xb
;; ldp x29, x30, [sp], #0x10
;; ret
;;
;; wasm[0]::function[3]::i64_21:
;; stp x29, x30, [sp, #-0x10]!
;; mov x29, sp
;; extr x2, x5, x4, #0x15
;; ldp x29, x30, [sp], #0x10
;; ret
;;
;; wasm[0]::function[4]::i64_21_swapped:
;; stp x29, x30, [sp, #-0x10]!
;; mov x29, sp
;; extr x2, x5, x4, #0x15
;; ldp x29, x30, [sp], #0x10
;; ret
;;
;; wasm[0]::function[5]::i64_11:
;; stp x29, x30, [sp, #-0x10]!
;; mov x29, sp
;; extr x2, x5, x4, #0xb
;; ldp x29, x30, [sp], #0x10
;; ret

0 comments on commit 9f4a9e1

Please sign in to comment.