Skip to content

Commit

Permalink
asm: add types for sign-extended immediates
Browse files Browse the repository at this point in the history
As pointed out in bytecodealliance#10200, it could be confusing for users for
`cranelift-assembler-x64` to pass unsigned integers to certain assembler
instructions and have them unexpectedly sign-extended. Well, it can't be
too surprising since these instructions have a `_sx*` suffix, but this
change implements @alexcrichton's additional suggestion to create
separate types for the immediates that may be sign-extended.

These new types (`Simm8`, `Simm16`, `Simm32`) are quite similar to their
vanilla counterparts (`Imm8`, `Imm16`, `Imm32`) but have additional
sign-extension logic when pretty-printed. This means the vanilla
versions can be simplified and the pre-existing `Simm32` is renamed to
the more appropriate `AmodeOffset`.
  • Loading branch information
abrown committed Feb 10, 2025
1 parent d5c54e3 commit 25315a8
Show file tree
Hide file tree
Showing 18 changed files with 274 additions and 137 deletions.
10 changes: 8 additions & 2 deletions cranelift/assembler-x64/meta/src/dsl/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,14 @@ pub enum Extension {
SignExtendQuad,
SignExtendLong,
SignExtendWord,
ZeroExtend,
}

impl Extension {
/// Check if the extension is sign-extended.
#[must_use]
pub fn is_sign_extended(&self) -> bool {
matches!(self, Self::SignExtendQuad | Self::SignExtendLong | Self::SignExtendWord)
}
}

impl Default for Extension {
Expand All @@ -365,7 +372,6 @@ impl core::fmt::Display for Extension {
Extension::SignExtendQuad => write!(f, "sxq"),
Extension::SignExtendLong => write!(f, "sxl"),
Extension::SignExtendWord => write!(f, "sxw"),
Extension::ZeroExtend => write!(f, "zx"),
}
}
}
3 changes: 3 additions & 0 deletions cranelift/assembler-x64/meta/src/generate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,11 @@ pub fn isle_macro(f: &mut Formatter, insts: &[dsl::Inst]) {
/// above.
pub fn isle_definitions(f: &mut Formatter, insts: &[dsl::Inst]) {
f.line("(type AssemblerImm8 extern (enum))", None);
f.line("(type AssemblerSimm8 extern (enum))", None);
f.line("(type AssemblerImm16 extern (enum))", None);
f.line("(type AssemblerSimm16 extern (enum))", None);
f.line("(type AssemblerImm32 extern (enum))", None);
f.line("(type AssemblerSimm32 extern (enum))", None);
f.line("(type AssemblerReadGpr extern (enum))", None);
f.line("(type AssemblerReadWriteGpr extern (enum))", None);
f.line("(type AssemblerReadGprMem extern (enum))", None);
Expand Down
9 changes: 8 additions & 1 deletion cranelift/assembler-x64/meta/src/generate/inst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,14 @@ impl dsl::Inst {
.iter()
.filter_map(|o| match o.location.kind() {
FixedReg(_) => None,
Imm(loc) => Some(format!("AssemblerImm{}", loc.bits())),
Imm(loc) => {
let bits = loc.bits();
if o.extension.is_sign_extended() {
Some(format!("AssemblerSimm{bits}"))
} else {
Some(format!("AssemblerImm{bits}"))
}
}
Reg(_) => Some(format!("Assembler{}Gpr", o.mutability.generate_type())),
RegMem(_) => Some(format!("Assembler{}GprMem", o.mutability.generate_type())),
})
Expand Down
27 changes: 22 additions & 5 deletions cranelift/assembler-x64/meta/src/generate/operand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@ impl dsl::Operand {
use dsl::OperandKind::*;
match self.location.kind() {
FixedReg(_) => None,
Imm(loc) => Some(format!("Imm{}", loc.bits())),
Imm(loc) => {
let bits = loc.bits();
if self.extension.is_sign_extended() {
Some(format!("Simm{bits}"))
} else {
Some(format!("Imm{bits}"))
}
}
Reg(_) => Some(format!("Gpr<R::{}Gpr>", self.mutability.generate_type())),
RegMem(_) => Some(format!("GprMem<R::{}Gpr, R::ReadGpr>", self.mutability.generate_type())),
}
Expand All @@ -22,7 +29,14 @@ impl dsl::Operand {
};
match self.location.kind() {
FixedReg(_) => None,
Imm(loc) => Some(format!("Imm{}", loc.bits())),
Imm(loc) => {
let bits = loc.bits();
if self.extension.is_sign_extended() {
Some(format!("Simm{bits}"))
} else {
Some(format!("Imm{bits}"))
}
}
Reg(_) => Some(format!("Gpr<{pick_ty}>")),
RegMem(_) => Some(format!("GprMem<{pick_ty}, {read_ty}>")),
}
Expand Down Expand Up @@ -58,8 +72,12 @@ impl dsl::Location {
eax => "\"%eax\"".into(),
rax => "\"%rax\"".into(),
imm8 | imm16 | imm32 => {
let variant = extension.generate_variant();
format!("self.{self}.to_string({variant})")
if extension.is_sign_extended() {
let variant = extension.generate_variant();
format!("self.{self}.to_string({variant})")
} else {
format!("self.{self}.to_string()")
}
}
r8 | r16 | r32 | r64 | rm8 | rm16 | rm32 | rm64 => match self.generate_size() {
Some(size) => format!("self.{self}.to_string({size})"),
Expand Down Expand Up @@ -120,7 +138,6 @@ impl dsl::Extension {
SignExtendWord => "Extension::SignExtendWord",
SignExtendLong => "Extension::SignExtendLong",
SignExtendQuad => "Extension::SignExtendQuad",
ZeroExtend => "Extension::ZeroExtend",
}
}
}
6 changes: 3 additions & 3 deletions cranelift/assembler-x64/src/fuzz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
//! throughout this crate to avoid depending on the `arbitrary` crate
//! unconditionally (use the `fuzz` feature instead).
use crate::{AsReg, Gpr, Inst, NonRspGpr, Registers, Simm32, Simm32PlusKnownOffset};
use crate::{AmodeOffset, AsReg, Gpr, Inst, NonRspGpr, Registers, AmodeOffsetPlusKnownOffset};
use arbitrary::{Arbitrary, Result, Unstructured};
use capstone::{arch::x86, arch::BuildsCapstone, arch::BuildsCapstoneSyntax, Capstone};

Expand Down Expand Up @@ -175,11 +175,11 @@ impl AsReg for FuzzReg {
}
}

impl Arbitrary<'_> for Simm32PlusKnownOffset {
impl Arbitrary<'_> for AmodeOffsetPlusKnownOffset {
fn arbitrary(u: &mut Unstructured<'_>) -> Result<Self> {
// For now, we don't generate offsets (TODO).
Ok(Self {
simm32: Simm32::arbitrary(u)?,
simm32: AmodeOffset::arbitrary(u)?,
offset: None,
})
}
Expand Down
157 changes: 78 additions & 79 deletions cranelift/assembler-x64/src/imm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
#![allow(clippy::module_name_repetitions)]

use crate::api::{CodeSink, KnownOffset, KnownOffsetTable};
use crate::api::CodeSink;
use std::fmt;

/// This helper function prints the unsigned hexadecimal representation of the
/// immediate value: e.g., this prints `$0xfe` to represent both the signed `-2`
Expand Down Expand Up @@ -41,16 +42,42 @@ impl Imm8 {
pub fn encode(&self, sink: &mut impl CodeSink) {
sink.put1(self.0);
}
}

impl fmt::Display for Imm8 {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "$0x{:x}", self.0)
}
}

/// A _signed_ 8-bit immediate operand (suitable for sign extension).
#[derive(Clone, Copy, Debug)]
#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
pub struct Simm8(i8);

impl Simm8 {
#[must_use]
pub fn new(value: i8) -> Self {
Self(value)
}

#[must_use]
pub fn value(&self) -> i8 {
self.0
}

pub fn encode(&self, sink: &mut impl CodeSink) {
sink.put1(self.0 as u8);
}

#[must_use]
pub fn to_string(&self, extend: Extension) -> String {
use Extension::{None, SignExtendLong, SignExtendQuad, SignExtendWord, ZeroExtend};
use Extension::{None, SignExtendLong, SignExtendQuad, SignExtendWord};
match extend {
None => hexify!(self.0),
SignExtendWord => hexify_sign_extend!(self.0, i8 => i16),
SignExtendLong => hexify_sign_extend!(self.0, i8 => i32),
SignExtendQuad => hexify_sign_extend!(self.0, i8 => i64),
ZeroExtend => hexify!(u64::from(self.0)),
}
}
}
Expand All @@ -74,16 +101,42 @@ impl Imm16 {
pub fn encode(&self, sink: &mut impl CodeSink) {
sink.put2(self.0);
}
}

impl fmt::Display for Imm16 {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "$0x{:x}", self.0)
}
}

/// A _signed_ 16-bit immediate operand (suitable for sign extension).
#[derive(Clone, Debug)]
#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
pub struct Simm16(i16);

impl Simm16 {
#[must_use]
pub fn new(value: i16) -> Self {
Self(value)
}

#[must_use]
pub fn value(&self) -> i16 {
self.0
}

pub fn encode(&self, sink: &mut impl CodeSink) {
sink.put2(self.0 as u16);
}

#[must_use]
pub fn to_string(&self, extend: Extension) -> String {
use Extension::{None, SignExtendLong, SignExtendQuad, SignExtendWord, ZeroExtend};
use Extension::{None, SignExtendLong, SignExtendQuad, SignExtendWord};
match extend {
None => hexify!(self.0),
SignExtendWord => unreachable!("the 16-bit value is already 16 bits"),
SignExtendLong => hexify_sign_extend!(self.0, i16 => i32),
SignExtendQuad => hexify_sign_extend!(self.0, i16 => i64),
ZeroExtend => hexify!(u64::from(self.0)),
}
}
}
Expand Down Expand Up @@ -111,23 +164,20 @@ impl Imm32 {
pub fn encode(&self, sink: &mut impl CodeSink) {
sink.put4(self.0);
}
}

#[must_use]
pub fn to_string(&self, extend: Extension) -> String {
use Extension::{None, SignExtendLong, SignExtendQuad, SignExtendWord, ZeroExtend};
match extend {
None => hexify!(self.0),
SignExtendWord => unreachable!("cannot sign extend a 32-bit value to 16 bits"),
SignExtendLong => unreachable!("the 32-bit value is already 32 bits"),
SignExtendQuad => hexify_sign_extend!(self.0, i32 => i64),
ZeroExtend => hexify!(u64::from(self.0)),
}
impl fmt::Display for Imm32 {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "$0x{:x}", self.0)
}
}

/// A 32-bit immediate like [`Imm32`], but with slightly different
/// pretty-printing.
#[derive(Clone, Copy, Debug)]
/// A _signed_ 32-bit immediate operand (suitable for sign extension).
///
/// Note that, "in 64-bit mode, the typical size of immediate operands remains
/// 32 bits. When the operand size is 64 bits, the processor sign-extends all
/// immediates to 64 bits prior to their use" (Intel SDM Vol. 2, 2.2.1.5).
#[derive(Clone, Debug)]
#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
pub struct Simm32(i32);

Expand All @@ -138,73 +188,23 @@ impl Simm32 {
}

#[must_use]
pub fn value(self) -> i32 {
pub fn value(&self) -> i32 {
self.0
}
}

impl From<i32> for Simm32 {
fn from(value: i32) -> Self {
Self(value)
}
}

impl std::fmt::LowerHex for Simm32 {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
// This rather complex implementation is necessary to match how
// `capstone` pretty-prints memory immediates.
if self.0 == 0 {
return Ok(());
}
if self.0 < 0 {
write!(f, "-")?;
}
if self.0 > 9 || self.0 < -9 {
write!(f, "0x")?;
}
let abs = match self.0.checked_abs() {
Some(i) => i,
None => -2_147_483_648,
};
std::fmt::LowerHex::fmt(&abs, f)
pub fn encode(&self, sink: &mut impl CodeSink) {
sink.put4(self.0 as u32);
}
}

/// A [`Simm32`] immediate with an optional known offset.
///
/// Cranelift does not know certain offsets until emission time. To accommodate
/// Cranelift, this structure stores an optional [`KnownOffset`]. The following
/// happens immediately before emission:
/// - the [`KnownOffset`] is looked up, mapping it to an offset value
/// - the [`Simm32`] value is added to the offset value
#[derive(Clone, Debug)]
pub struct Simm32PlusKnownOffset {
pub simm32: Simm32,
pub offset: Option<KnownOffset>,
}

impl Simm32PlusKnownOffset {
/// # Panics
///
/// Panics if the sum of the immediate and the known offset value overflows.
#[must_use]
pub fn value(&self, offsets: &impl KnownOffsetTable) -> i32 {
let known_offset = match self.offset {
Some(offset) => offsets[offset],
None => 0,
};
known_offset
.checked_add(self.simm32.value())
.expect("no wrapping")
}
}

impl std::fmt::LowerHex for Simm32PlusKnownOffset {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
if let Some(offset) = self.offset {
write!(f, "<offset:{offset}>+")?;
pub fn to_string(&self, extend: Extension) -> String {
use Extension::{None, SignExtendLong, SignExtendQuad, SignExtendWord};
match extend {
None => hexify!(self.0),
SignExtendWord => unreachable!("cannot sign extend a 32-bit value to 16 bits"),
SignExtendLong => unreachable!("the 32-bit value is already 32 bits"),
SignExtendQuad => hexify_sign_extend!(self.0, i32 => i64),
}
std::fmt::LowerHex::fmt(&self.simm32, f)
}
}

Expand All @@ -215,5 +215,4 @@ pub enum Extension {
SignExtendQuad,
SignExtendLong,
SignExtendWord,
ZeroExtend,
}
2 changes: 1 addition & 1 deletion cranelift/assembler-x64/src/inst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
//! See also: [`Inst`], an `enum` containing all these instructions.
use crate::api::{AsReg, CodeSink, KnownOffsetTable, RegisterVisitor, Registers};
use crate::imm::{Extension, Imm16, Imm32, Imm8};
use crate::imm::{Extension, Imm16, Imm32, Imm8, Simm32, Simm8};
use crate::mem::{emit_modrm_sib_disp, GprMem};
use crate::reg::{self, Gpr, Size};
use crate::rex::{self, RexFlags};
Expand Down
4 changes: 2 additions & 2 deletions cranelift/assembler-x64/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ pub use api::{
AsReg, CodeSink, Constant, KnownOffset, KnownOffsetTable, Label, RegisterVisitor, Registers,
TrapCode,
};
pub use imm::{Extension, Imm16, Imm32, Imm8, Simm32, Simm32PlusKnownOffset};
pub use mem::{Amode, DeferredTarget, GprMem, Scale};
pub use imm::{Extension, Imm16, Imm32, Imm8, Simm16, Simm32, Simm8};
pub use mem::{Amode, AmodeOffset, AmodeOffsetPlusKnownOffset, DeferredTarget, GprMem, Scale};
pub use reg::{Gpr, NonRspGpr, Size};
pub use rex::RexFlags;

Expand Down
Loading

0 comments on commit 25315a8

Please sign in to comment.