Skip to content

Commit

Permalink
asm: pretty print sign-extended immediates
Browse files Browse the repository at this point in the history
As mentioned in bytecodealliance#10200, `capstone` has a peculiar way of pretty-printing
immediates, especially signed immediates. It is simpler (and perhaps
more clear) for us to just print immediates in one consistent format:
`0xffff...`, e.g. This change parses capstone's pretty-printed
immediates and converts them to our simpler format if the first attempt
to match this assembler's output with `capston` fails.
  • Loading branch information
abrown committed Feb 10, 2025
1 parent 20f5db9 commit d5c54e3
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 28 deletions.
1 change: 1 addition & 0 deletions cranelift/assembler-x64/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ pedantic = "warn"
module_name_repetitions = { level = "allow", priority = 1 }
similar_names = { level = "allow", priority = 1 }
wildcard_imports = { level = "allow", priority = 1 }
too_many_lines = { level = "allow", priority = 1 }

[features]
fuzz = ['dep:arbitrary', 'dep:capstone']
76 changes: 75 additions & 1 deletion cranelift/assembler-x64/src/fuzz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ pub fn roundtrip(inst: &Inst<FuzzRegs>) {
// off the instruction offset first.
let expected = expected.split_once(' ').unwrap().1;
let actual = inst.to_string();
if expected != actual {
if expected != actual && expected != replace_signed_immediates(&actual) {
println!("> {inst}");
println!(" debug: {inst:x?}");
println!(" assembled: {}", pretty_print_hexadecimal(&assembled));
Expand Down Expand Up @@ -71,6 +71,80 @@ fn pretty_print_hexadecimal(hex: &[u8]) -> String {
s
}

/// See `replace_signed_immediates`.
macro_rules! hex_print_signed_imm {
($hex:expr, $from:ty => $to:ty) => {{
let imm = <$from>::from_str_radix($hex, 16).unwrap() as $to;
let mut simm = String::new();
if imm < 0 {
simm.push_str("-");
}
let abs = match imm.checked_abs() {
Some(i) => i,
None => <$to>::MIN,
};
if imm > -10 && imm < 10 {
simm.push_str(&format!("{:x}", abs));
} else {
simm.push_str(&format!("0x{:x}", abs));
}
simm
}};
}

/// Replace signed immediates in the disassembly with their unsigned hexadecimal
/// equivalent. This is only necessary to match `capstone`'s complex
/// pretty-printing rules; e.g. `capsone` will:
/// - omit the `0x` prefix when printing `0x0` as `0`.
/// - omit the `0x` prefix when print small values (less than 10)
/// - print negative values as `-0x...` (signed hex) instead of `0xff...`
/// (normal hex)
fn replace_signed_immediates(dis: &str) -> std::borrow::Cow<str> {
match dis.find("$") {
None => dis.into(),
Some(idx) => {
let (prefix, rest) = dis.split_at(idx + 1); // Skip the '$'.
let (_, rest) = chomp("-", rest); // Skip the '-' if it's there.
let (_, rest) = chomp("0x", rest); // Skip the '0x' if it's there.
let n = rest.chars().take_while(char::is_ascii_hexdigit).count();
let (hex, rest) = rest.split_at(n); // Split at next non-hex character.
let simm = match hex.len() {
1 | 2 => hex_print_signed_imm!(hex, u8 => i8),
4 => hex_print_signed_imm!(hex, u16 => i16),
8 => hex_print_signed_imm!(hex, u32 => i32),
16 => hex_print_signed_imm!(hex, u64 => i64),
_ => panic!("unexpected length for hex: {hex}"),
};
format!("{prefix}{simm}{rest}").into()
}
}
}

// See `replace_signed_immediates`.
fn chomp<'a>(pat: &str, s: &'a str) -> (&'a str, &'a str) {
if s.starts_with(pat) {
s.split_at(pat.len())
} else {
("", s)
}
}

#[test]
fn replace() {
assert_eq!(
replace_signed_immediates("andl $0xffffff9a, %r11d"),
"andl $-0x66, %r11d"
);
assert_eq!(
replace_signed_immediates("xorq $0xffffffffffffffbc, 0x7f139ecc(%r9)"),
"xorq $-0x44, 0x7f139ecc(%r9)"
);
assert_eq!(
replace_signed_immediates("subl $0x3ca77a19, -0x1a030f40(%r14)"),
"subl $0x3ca77a19, -0x1a030f40(%r14)"
);
}

/// Fuzz-specific registers.
///
/// For the fuzzer, we do not need any fancy register types; see [`FuzzReg`].
Expand Down
59 changes: 32 additions & 27 deletions cranelift/assembler-x64/src/imm.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,27 @@
//! Immediate operands to instructions.
#![allow(clippy::module_name_repetitions)]
#![allow(unused_comparisons)] // Necessary to use maybe_print_hex! with `u*` values.
#![allow(clippy::cast_possible_wrap)] // Necessary to cast to `i*` for sign extension.

use crate::api::{KnownOffset, KnownOffsetTable};
use crate::api::{CodeSink, KnownOffset, KnownOffsetTable};

/// This helper function prints the hexadecimal representation of the immediate
/// value, but only if the value is greater than or equal to 10. This is
/// necessary to match how Capstone pretty-prints immediate values.
macro_rules! maybe_print_hex {
/// This helper function prints the unsigned hexadecimal representation of the
/// immediate value: e.g., this prints `$0xfe` to represent both the signed `-2`
/// and the unsigned `254`.
macro_rules! hexify {
($n:expr) => {
if $n >= 0 && $n < 10 {
format!("${:x}", $n)
} else {
format!("$0x{:x}", $n)
}
format!("$0x{:x}", $n)
};
}

/// Like `hexify!`, but this performs a sign extension.
macro_rules! hexify_sign_extend {
($n:expr, $from:ty => $to:ty) => {{
#[allow(clippy::cast_possible_wrap)]
let n = <$to>::from($n as $from);
format!("$0x{:x}", n)
}};
}

/// An 8-bit immediate operand.
#[derive(Clone, Copy, Debug)]
#[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))]
Expand All @@ -43,11 +46,11 @@ impl Imm8 {
pub fn to_string(&self, extend: Extension) -> String {
use Extension::{None, SignExtendLong, SignExtendQuad, SignExtendWord, ZeroExtend};
match extend {
None => maybe_print_hex!(self.0),
SignExtendWord => maybe_print_hex!(i16::from(self.0 as i8)),
SignExtendLong => maybe_print_hex!(i32::from(self.0 as i8)),
SignExtendQuad => maybe_print_hex!(i64::from(self.0 as i8)),
ZeroExtend => maybe_print_hex!(u64::from(self.0)),
None => hexify!(self.0),
SignExtendWord => hexify_sign_extend!(self.0, i8 => i16),
SignExtendLong => hexify_sign_extend!(self.0, i8 => i32),
SignExtendQuad => hexify_sign_extend!(self.0, i8 => i64),
ZeroExtend => hexify!(u64::from(self.0)),
}
}
}
Expand Down Expand Up @@ -76,11 +79,11 @@ impl Imm16 {
pub fn to_string(&self, extend: Extension) -> String {
use Extension::{None, SignExtendLong, SignExtendQuad, SignExtendWord, ZeroExtend};
match extend {
None => maybe_print_hex!(self.0),
SignExtendWord => maybe_print_hex!(self.0 as i16),
SignExtendLong => maybe_print_hex!(i32::from(self.0 as i16)),
SignExtendQuad => maybe_print_hex!(i64::from(self.0 as i16)),
ZeroExtend => maybe_print_hex!(u64::from(self.0)),
None => hexify!(self.0),
SignExtendWord => unreachable!("the 16-bit value is already 16 bits"),
SignExtendLong => hexify_sign_extend!(self.0, i16 => i32),
SignExtendQuad => hexify_sign_extend!(self.0, i16 => i64),
ZeroExtend => hexify!(u64::from(self.0)),
}
}
}
Expand Down Expand Up @@ -113,11 +116,11 @@ impl Imm32 {
pub fn to_string(&self, extend: Extension) -> String {
use Extension::{None, SignExtendLong, SignExtendQuad, SignExtendWord, ZeroExtend};
match extend {
None => maybe_print_hex!(self.0),
SignExtendWord => unreachable!("cannot sign extend a 32-bit value"),
SignExtendLong => maybe_print_hex!(self.0 as i32),
SignExtendQuad => maybe_print_hex!(i64::from(self.0 as i32)),
ZeroExtend => maybe_print_hex!(u64::from(self.0)),
None => hexify!(self.0),
SignExtendWord => unreachable!("cannot sign extend a 32-bit value to 16 bits"),
SignExtendLong => unreachable!("the 32-bit value is already 32 bits"),
SignExtendQuad => hexify_sign_extend!(self.0, i32 => i64),
ZeroExtend => hexify!(u64::from(self.0)),
}
}
}
Expand Down Expand Up @@ -148,6 +151,8 @@ impl From<i32> for Simm32 {

impl std::fmt::LowerHex for Simm32 {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
// This rather complex implementation is necessary to match how
// `capstone` pretty-prints memory immediates.
if self.0 == 0 {
return Ok(());
}
Expand Down

0 comments on commit d5c54e3

Please sign in to comment.