From d5c54e3b18164f14e51d76841f1e317084b3f0eb Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Thu, 6 Feb 2025 10:42:43 -0800 Subject: [PATCH] asm: pretty print sign-extended immediates As mentioned in #10200, `capstone` has a peculiar way of pretty-printing immediates, especially signed immediates. It is simpler (and perhaps more clear) for us to just print immediates in one consistent format: `0xffff...`, e.g. This change parses capstone's pretty-printed immediates and converts them to our simpler format if the first attempt to match this assembler's output with `capston` fails. --- cranelift/assembler-x64/Cargo.toml | 1 + cranelift/assembler-x64/src/fuzz.rs | 76 ++++++++++++++++++++++++++++- cranelift/assembler-x64/src/imm.rs | 59 ++++++++++++---------- 3 files changed, 108 insertions(+), 28 deletions(-) diff --git a/cranelift/assembler-x64/Cargo.toml b/cranelift/assembler-x64/Cargo.toml index 8699da401fd6..27924f44dc04 100644 --- a/cranelift/assembler-x64/Cargo.toml +++ b/cranelift/assembler-x64/Cargo.toml @@ -24,6 +24,7 @@ pedantic = "warn" module_name_repetitions = { level = "allow", priority = 1 } similar_names = { level = "allow", priority = 1 } wildcard_imports = { level = "allow", priority = 1 } +too_many_lines = { level = "allow", priority = 1 } [features] fuzz = ['dep:arbitrary', 'dep:capstone'] diff --git a/cranelift/assembler-x64/src/fuzz.rs b/cranelift/assembler-x64/src/fuzz.rs index e2f7fa902121..ae7727266c0c 100644 --- a/cranelift/assembler-x64/src/fuzz.rs +++ b/cranelift/assembler-x64/src/fuzz.rs @@ -25,7 +25,7 @@ pub fn roundtrip(inst: &Inst) { // off the instruction offset first. let expected = expected.split_once(' ').unwrap().1; let actual = inst.to_string(); - if expected != actual { + if expected != actual && expected != replace_signed_immediates(&actual) { println!("> {inst}"); println!(" debug: {inst:x?}"); println!(" assembled: {}", pretty_print_hexadecimal(&assembled)); @@ -71,6 +71,80 @@ fn pretty_print_hexadecimal(hex: &[u8]) -> String { s } +/// See `replace_signed_immediates`. +macro_rules! hex_print_signed_imm { + ($hex:expr, $from:ty => $to:ty) => {{ + let imm = <$from>::from_str_radix($hex, 16).unwrap() as $to; + let mut simm = String::new(); + if imm < 0 { + simm.push_str("-"); + } + let abs = match imm.checked_abs() { + Some(i) => i, + None => <$to>::MIN, + }; + if imm > -10 && imm < 10 { + simm.push_str(&format!("{:x}", abs)); + } else { + simm.push_str(&format!("0x{:x}", abs)); + } + simm + }}; +} + +/// Replace signed immediates in the disassembly with their unsigned hexadecimal +/// equivalent. This is only necessary to match `capstone`'s complex +/// pretty-printing rules; e.g. `capsone` will: +/// - omit the `0x` prefix when printing `0x0` as `0`. +/// - omit the `0x` prefix when print small values (less than 10) +/// - print negative values as `-0x...` (signed hex) instead of `0xff...` +/// (normal hex) +fn replace_signed_immediates(dis: &str) -> std::borrow::Cow { + match dis.find("$") { + None => dis.into(), + Some(idx) => { + let (prefix, rest) = dis.split_at(idx + 1); // Skip the '$'. + let (_, rest) = chomp("-", rest); // Skip the '-' if it's there. + let (_, rest) = chomp("0x", rest); // Skip the '0x' if it's there. + let n = rest.chars().take_while(char::is_ascii_hexdigit).count(); + let (hex, rest) = rest.split_at(n); // Split at next non-hex character. + let simm = match hex.len() { + 1 | 2 => hex_print_signed_imm!(hex, u8 => i8), + 4 => hex_print_signed_imm!(hex, u16 => i16), + 8 => hex_print_signed_imm!(hex, u32 => i32), + 16 => hex_print_signed_imm!(hex, u64 => i64), + _ => panic!("unexpected length for hex: {hex}"), + }; + format!("{prefix}{simm}{rest}").into() + } + } +} + +// See `replace_signed_immediates`. +fn chomp<'a>(pat: &str, s: &'a str) -> (&'a str, &'a str) { + if s.starts_with(pat) { + s.split_at(pat.len()) + } else { + ("", s) + } +} + +#[test] +fn replace() { + assert_eq!( + replace_signed_immediates("andl $0xffffff9a, %r11d"), + "andl $-0x66, %r11d" + ); + assert_eq!( + replace_signed_immediates("xorq $0xffffffffffffffbc, 0x7f139ecc(%r9)"), + "xorq $-0x44, 0x7f139ecc(%r9)" + ); + assert_eq!( + replace_signed_immediates("subl $0x3ca77a19, -0x1a030f40(%r14)"), + "subl $0x3ca77a19, -0x1a030f40(%r14)" + ); +} + /// Fuzz-specific registers. /// /// For the fuzzer, we do not need any fancy register types; see [`FuzzReg`]. diff --git a/cranelift/assembler-x64/src/imm.rs b/cranelift/assembler-x64/src/imm.rs index 494c028f3a81..57b11d24fd67 100644 --- a/cranelift/assembler-x64/src/imm.rs +++ b/cranelift/assembler-x64/src/imm.rs @@ -1,24 +1,27 @@ //! Immediate operands to instructions. #![allow(clippy::module_name_repetitions)] -#![allow(unused_comparisons)] // Necessary to use maybe_print_hex! with `u*` values. -#![allow(clippy::cast_possible_wrap)] // Necessary to cast to `i*` for sign extension. -use crate::api::{KnownOffset, KnownOffsetTable}; +use crate::api::{CodeSink, KnownOffset, KnownOffsetTable}; -/// This helper function prints the hexadecimal representation of the immediate -/// value, but only if the value is greater than or equal to 10. This is -/// necessary to match how Capstone pretty-prints immediate values. -macro_rules! maybe_print_hex { +/// This helper function prints the unsigned hexadecimal representation of the +/// immediate value: e.g., this prints `$0xfe` to represent both the signed `-2` +/// and the unsigned `254`. +macro_rules! hexify { ($n:expr) => { - if $n >= 0 && $n < 10 { - format!("${:x}", $n) - } else { - format!("$0x{:x}", $n) - } + format!("$0x{:x}", $n) }; } +/// Like `hexify!`, but this performs a sign extension. +macro_rules! hexify_sign_extend { + ($n:expr, $from:ty => $to:ty) => {{ + #[allow(clippy::cast_possible_wrap)] + let n = <$to>::from($n as $from); + format!("$0x{:x}", n) + }}; +} + /// An 8-bit immediate operand. #[derive(Clone, Copy, Debug)] #[cfg_attr(any(test, feature = "fuzz"), derive(arbitrary::Arbitrary))] @@ -43,11 +46,11 @@ impl Imm8 { pub fn to_string(&self, extend: Extension) -> String { use Extension::{None, SignExtendLong, SignExtendQuad, SignExtendWord, ZeroExtend}; match extend { - None => maybe_print_hex!(self.0), - SignExtendWord => maybe_print_hex!(i16::from(self.0 as i8)), - SignExtendLong => maybe_print_hex!(i32::from(self.0 as i8)), - SignExtendQuad => maybe_print_hex!(i64::from(self.0 as i8)), - ZeroExtend => maybe_print_hex!(u64::from(self.0)), + None => hexify!(self.0), + SignExtendWord => hexify_sign_extend!(self.0, i8 => i16), + SignExtendLong => hexify_sign_extend!(self.0, i8 => i32), + SignExtendQuad => hexify_sign_extend!(self.0, i8 => i64), + ZeroExtend => hexify!(u64::from(self.0)), } } } @@ -76,11 +79,11 @@ impl Imm16 { pub fn to_string(&self, extend: Extension) -> String { use Extension::{None, SignExtendLong, SignExtendQuad, SignExtendWord, ZeroExtend}; match extend { - None => maybe_print_hex!(self.0), - SignExtendWord => maybe_print_hex!(self.0 as i16), - SignExtendLong => maybe_print_hex!(i32::from(self.0 as i16)), - SignExtendQuad => maybe_print_hex!(i64::from(self.0 as i16)), - ZeroExtend => maybe_print_hex!(u64::from(self.0)), + None => hexify!(self.0), + SignExtendWord => unreachable!("the 16-bit value is already 16 bits"), + SignExtendLong => hexify_sign_extend!(self.0, i16 => i32), + SignExtendQuad => hexify_sign_extend!(self.0, i16 => i64), + ZeroExtend => hexify!(u64::from(self.0)), } } } @@ -113,11 +116,11 @@ impl Imm32 { pub fn to_string(&self, extend: Extension) -> String { use Extension::{None, SignExtendLong, SignExtendQuad, SignExtendWord, ZeroExtend}; match extend { - None => maybe_print_hex!(self.0), - SignExtendWord => unreachable!("cannot sign extend a 32-bit value"), - SignExtendLong => maybe_print_hex!(self.0 as i32), - SignExtendQuad => maybe_print_hex!(i64::from(self.0 as i32)), - ZeroExtend => maybe_print_hex!(u64::from(self.0)), + None => hexify!(self.0), + SignExtendWord => unreachable!("cannot sign extend a 32-bit value to 16 bits"), + SignExtendLong => unreachable!("the 32-bit value is already 32 bits"), + SignExtendQuad => hexify_sign_extend!(self.0, i32 => i64), + ZeroExtend => hexify!(u64::from(self.0)), } } } @@ -148,6 +151,8 @@ impl From for Simm32 { impl std::fmt::LowerHex for Simm32 { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + // This rather complex implementation is necessary to match how + // `capstone` pretty-prints memory immediates. if self.0 == 0 { return Ok(()); }