diff --git a/Cargo.lock b/Cargo.lock index 0ca0ee64d9..022ef913c7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1723,6 +1723,15 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + [[package]] name = "criterion" version = "0.5.1" @@ -2419,6 +2428,12 @@ dependencies = [ "once_cell", ] +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + [[package]] name = "fastrand" version = "1.9.0" @@ -2513,6 +2528,16 @@ version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" +[[package]] +name = "flate2" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "flume" version = "0.9.2" @@ -2792,6 +2817,11 @@ name = "gimli" version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" +dependencies = [ + "fallible-iterator", + "indexmap 2.8.0", + "stable_deref_trait", +] [[package]] name = "git2" @@ -4085,7 +4115,9 @@ version = "0.36.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" dependencies = [ + "flate2", "memchr", + "ruzstd", ] [[package]] @@ -5560,6 +5592,15 @@ dependencies = [ "wait-timeout", ] +[[package]] +name = "ruzstd" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fad02996bfc73da3e301efe90b1837be9ed8f4a462b6ed410aa35d00381de89f" +dependencies = [ + "twox-hash", +] + [[package]] name = "ryu" version = "1.0.20" @@ -6149,6 +6190,7 @@ dependencies = [ "enum-map", "eyre", "gecko_profile", + "gimli", "goblin 0.9.3", "hashbrown 0.14.5", "hex", @@ -6156,6 +6198,7 @@ dependencies = [ "itertools 0.13.0", "nohash-hasher", "num", + "object", "p3-baby-bear", "p3-field", "p3-maybe-rayon", @@ -7604,6 +7647,16 @@ dependencies = [ "url", ] +[[package]] +name = "twox-hash" +version = "1.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +dependencies = [ + "cfg-if", + "static_assertions", +] + [[package]] name = "typenum" version = "1.18.0" diff --git a/crates/core/executor/Cargo.toml b/crates/core/executor/Cargo.toml index 75354c611a..4ecf12721e 100644 --- a/crates/core/executor/Cargo.toml +++ b/crates/core/executor/Cargo.toml @@ -49,6 +49,8 @@ range-set-blaze = "0.1.16" # profiling goblin = { version = "0.9", optional = true } +gimli = { version = "0.31.1", optional = true } +object = { version = "0.36.7", optional = true } rustc-demangle = { version = "0.1.18", optional = true } gecko_profile = { version = "0.4.0", optional = true } indicatif = { version = "0.17.8", optional = true } @@ -61,6 +63,8 @@ test-artifacts = { path = "../../test-artifacts" } bigint-rug = ["sp1-curves/bigint-rug"] profiling = [ "dep:goblin", + "dep:gimli", + "dep:object", "dep:rustc-demangle", "dep:gecko_profile", "dep:indicatif", diff --git a/crates/core/executor/src/executor.rs b/crates/core/executor/src/executor.rs index 9682183f2f..ecfdc41aef 100644 --- a/crates/core/executor/src/executor.rs +++ b/crates/core/executor/src/executor.rs @@ -204,6 +204,31 @@ pub enum ExecutorMode { ShapeCollection, } +/// Represents different types of stack operations that can occur during program execution. +#[derive(Debug)] +pub enum StackEvent { + /// Indicates a push operation onto the stack. + Push, + /// Indicates a pop operation from the stack. + Pop, + /// Indicates a combined pop and push operation on the stack. + PopPush, +} + +impl StackEvent { + /// Returns `true` if the event represents pushing onto the stack. + #[must_use] + pub fn is_push(&self) -> bool { + matches!(self, StackEvent::Push | StackEvent::PopPush) + } + + /// Returns `true` if the event represents popping from the stack. + #[must_use] + pub fn is_pop(&self) -> bool { + matches!(self, StackEvent::Pop | StackEvent::PopPush) + } +} + /// Information about event counts which are relevant for shape fixing. #[derive(Debug, Default, Clone, PartialEq, Eq)] pub struct LocalCounts { @@ -1697,12 +1722,18 @@ impl<'a> Executor<'a> { // Fetch the instruction at the current program counter. let instruction = self.fetch(); + #[cfg(feature = "profiling")] + let previous_pc = self.state.pc; + // Log the current state of the runtime. - self.log(&instruction); + self.log(); // Execute the instruction. self.execute_instruction(&instruction)?; + #[cfg(feature = "profiling")] + self.profile(&instruction, previous_pc.into()); + // Increment the clock. self.state.global_clk += 1; @@ -2319,16 +2350,48 @@ impl<'a> Executor<'a> { } #[inline] - fn log(&mut self, _: &Instruction) { - #[cfg(feature = "profiling")] + fn log(&self) { + if !self.unconstrained && self.state.global_clk % 10_000_000 == 0 { + tracing::info!("clk = {} pc = 0x{:x?}", self.state.global_clk, self.state.pc); + } + } + + #[cfg(feature = "profiling")] + #[inline] + fn profile(&mut self, i: &Instruction, previous_pc: u64) { if let Some((ref mut profiler, _)) = self.profiler { if !self.unconstrained { - profiler.record(self.state.global_clk, self.state.pc as u64); - } - } + let stack_event = match i.opcode { + // A JAL instruction should push the return address onto a return-address + // stack (RAS) only when rd=x1 or x5. + Opcode::JAL => { + if i.op_a == Register::X1 as u8 || i.op_a == Register::X5 as u8 { + Some(StackEvent::Push) + } else { + None + } + } + Opcode::JALR => { + let rd_link = i.op_a == Register::X1 as u8 || i.op_a == Register::X5 as u8; + let rs1_link = + i.op_b == Register::X1 as u32 || i.op_b == Register::X5 as u32; + match (rd_link, rs1_link) { + (false, false) => None, + (false, true) => Some(StackEvent::Pop), + (true, true) if i.op_a as u32 != i.op_b => Some(StackEvent::PopPush), + (true, _) => Some(StackEvent::Push), + } + } + _ => None, + }; - if !self.unconstrained && self.state.global_clk % 10_000_000 == 0 { - tracing::info!("clk = {} pc = 0x{:x?}", self.state.global_clk, self.state.pc); + profiler.record( + self.state.global_clk, + self.state.pc as u64, + previous_pc, + stack_event.as_ref(), + ); + } } } } diff --git a/crates/core/executor/src/profiler.rs b/crates/core/executor/src/profiler.rs index dcf26e5787..0406539e15 100644 --- a/crates/core/executor/src/profiler.rs +++ b/crates/core/executor/src/profiler.rs @@ -1,8 +1,30 @@ +// Copyright 2023 RISC Zero, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use gecko_profile::{Frame, ProfileBuilder, StringIndex, ThreadBuilder}; +use gimli::{ + AttributeValue, DW_AT_high_pc, DW_AT_ranges, DW_TAG_inlined_subroutine, + DebuggingInformationEntry, DwAt, Dwarf, EndianArcSlice, Range, Reader, RunTimeEndian, + SectionId, Unit, UnitOffset, +}; use goblin::elf::{sym::STT_FUNC, Elf}; use indicatif::{ProgressBar, ProgressStyle}; +use object::{File, Object, ObjectSection}; use rustc_demangle::demangle; -use std::collections::HashMap; +use std::{borrow::Cow, collections::HashMap, sync::Arc}; + +use crate::StackEvent; #[derive(Debug, thiserror::Error)] pub enum ProfilerError { @@ -12,6 +34,20 @@ pub enum ProfilerError { Elf(#[from] goblin::error::Error), #[error("Failed to serialize samples {}", .0)] Serde(#[from] serde_json::Error), + #[error("Failed retrieve inline functions {}", .0)] + InlineRetrieval(#[from] gimli::Error), + #[error("No unit for offset {}", .0)] + NoUnitForOffset(usize), + #[error("Invalid abstract origin")] + InvalidAbstractOrigin, + #[error("DwAt {} missing", .0)] + DwAtMissing(DwAt), + #[error("Onvalid attribute abstract origin")] + InvalidAttributeAbstractOrigin, + #[error("Unexpected abstract origin")] + UnexpectedAbstractOrigin, + #[error("Unexpected low pc")] + UnexpectedLowPc, } /// During execution, the profiler always keeps track of the callstack @@ -20,17 +56,14 @@ pub struct Profiler { sample_rate: u64, /// `start_address`-> index in `function_ranges` start_lookup: HashMap, + /// `start_address`-> index in `function_ranges` for inlined functions + inline_functions_start_lookup: HashMap>, /// the start and end of the function function_ranges: Vec<(u64, u64, Frame)>, /// the current known call stack - function_stack: Vec, - /// useful for quick search as to not count recursive calls - function_stack_indices: Vec, - /// The call stacks code ranges, useful for keeping track of unwinds - function_stack_ranges: Vec<(u64, u64)>, - /// The deepest function code range - current_function_range: (u64, u64), + function_stack: Vec, + pop_stack: Vec, main_idx: Option, builder: ThreadBuilder, @@ -41,14 +74,42 @@ struct Sample { stack: Vec, } +#[derive(Debug, PartialEq, Eq)] +struct Function { + pub frame: Frame, + pub kind: FunctionKind, +} + +impl Function { + pub fn regular(frame: Frame) -> Self { + Self { frame, kind: FunctionKind::Regular } + } + + pub fn inline(frame: Frame, start: u64, end: u64) -> Self { + Self { frame, kind: FunctionKind::inline(start, end) } + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum FunctionKind { + Regular, + Inline { start: u64, end: u64 }, +} + +impl FunctionKind { + pub fn inline(start: u64, end: u64) -> Self { + Self::Inline { start, end } + } +} + impl Profiler { pub(super) fn new(elf_bytes: &[u8], sample_rate: u64) -> Result { let elf = Elf::parse(elf_bytes)?; - let mut start_lookup = HashMap::new(); + let mut inline_functions_start_lookup = HashMap::new(); let mut function_ranges = Vec::new(); let mut builder = ThreadBuilder::new(1, 0, std::time::Instant::now(), false, false); - + let mut frames_to_names = HashMap::new(); // We need to extract all the functions from the ELF file // and their corresponding PC ranges. let mut main_idx = None; @@ -71,76 +132,116 @@ impl Profiler { let start_idx = function_ranges.len(); function_ranges.push((start_address, end_address, Frame::Label(string_idx))); + frames_to_names.insert(start_address, demangled_name.clone()); start_lookup.insert(start_address, start_idx); } } + if trace_inline_functions() { + eprintln!("Inline functions profiling enabled"); + + let file = File::parse(elf_bytes).unwrap(); + let dwarf = load_dwarf(&file)?; + let inline_fn_builder = InlineFunctionFrameBuilder::new(&dwarf); + let mut iter = dwarf.units(); + + while let Some(header) = iter.next()? { + let unit = dwarf.unit(header)?; + + let mut entries = unit.entries(); + while let Some((_, entry)) = entries.next_dfs()? { + if entry.tag() == DW_TAG_inlined_subroutine { + inline_fn_builder.build( + &unit, + entry, + &mut builder, + &mut function_ranges, + &mut inline_functions_start_lookup, + )?; + } + } + } + } + Ok(Self { builder, main_idx, sample_rate, samples: Vec::new(), start_lookup, + inline_functions_start_lookup, function_ranges, function_stack: Vec::new(), - function_stack_indices: Vec::new(), - function_stack_ranges: Vec::new(), - current_function_range: (0, 0), + pop_stack: Vec::new(), }) } - pub(super) fn record(&mut self, clk: u64, pc: u64) { - // We are still in the current function. - if pc > self.current_function_range.0 && pc <= self.current_function_range.1 { - if clk % self.sample_rate == 0 { - self.samples.push(Sample { stack: self.function_stack.clone() }); - } + pub(super) fn record( + &mut self, + clk: u64, + pc: u64, + previous_pc: u64, + stack_event: Option<&StackEvent>, + ) { + if let Some(stack_event) = stack_event { + if stack_event.is_pop() { + loop { + // Pop all inline functions + while matches!( + self.function_stack.last().map(|f| f.kind), + Some(FunctionKind::Inline { .. }) + ) { + self.function_stack.pop(); + } - return; - } + self.function_stack.pop(); + let popped = self.pop_stack.pop().unwrap(); - // Jump to a new function (or the same one). - if let Some(f) = self.start_lookup.get(&pc) { - // Jump to a new function (not recursive). - if !self.function_stack_indices.contains(f) { - self.function_stack_indices.push(*f); - let (start, end, name) = self.function_ranges.get(*f).unwrap(); - self.current_function_range = (*start, *end); - self.function_stack_ranges.push((*start, *end)); - self.function_stack.push(name.clone()); + if popped == pc - 4 { + break; + } + } } - } else { - // This means pc now points to an instruction that is - // - // 1. not in the current function's range - // 2. not a new function call - // - // We now account for a new possibility where we're returning to a function in the - // stack this need not be the immediate parent and can be any of the existing - // functions in the stack due to some optimizations that the compiler can make. - let mut unwind_point = 0; - let mut unwind_found = false; - for (c, (s, e)) in self.function_stack_ranges.iter().enumerate() { - if pc > *s && pc <= *e { - unwind_point = c; - unwind_found = true; - break; + + if stack_event.is_push() { + if let Some(f) = self.start_lookup.get(&pc) { + // Jump to a new function. + let (_, _, name) = self.function_ranges.get(*f).unwrap(); + self.function_stack.push(Function::regular(name.clone())); + self.pop_stack.push(previous_pc); } } + } + + // Pop inline functions when the current PC is not in the range of the function stack. + loop { + let Some(FunctionKind::Inline { start, end }) = + self.function_stack.last().map(|f| f.kind) + else { + break; + }; - // Unwinding until the parent. - if unwind_found { - self.function_stack.truncate(unwind_point + 1); - self.function_stack_ranges.truncate(unwind_point + 1); - self.function_stack_indices.truncate(unwind_point + 1); + if start <= pc && pc < end { + break; } + self.function_stack.pop(); + } - // If no unwind point has been found, that means we jumped to some random location - // so we'll just increment the counts for everything in the stack. + // Push all inline functions that starts at the current PC + if let Some(inline_functions) = self.inline_functions_start_lookup.get(&pc) { + for f in inline_functions { + let (start, end, name) = self.function_ranges.get(*f).unwrap(); + let f = Function::inline(name.clone(), *start, *end); + if self.function_stack.last() != Some(&f) { + self.function_stack.push(f); + } + } } if clk % self.sample_rate == 0 { - self.samples.push(Sample { stack: self.function_stack.clone() }); + self.samples.push(Sample { + stack: self.function_stack.iter().map(|f| f.frame.clone()).collect(), + }); } } @@ -226,3 +327,211 @@ impl Profiler { } } } + +struct InlineFunctionFrameBuilder<'a> { + dwarf: &'a Dwarf>, + units: Vec>>, +} + +impl<'a> InlineFunctionFrameBuilder<'a> { + pub fn new(dwarf: &'a Dwarf>) -> Self { + let mut units = vec![]; + let mut iter = dwarf.units(); + + while let Some(header) = iter.next().unwrap() { + units.push(dwarf.unit(header).unwrap()); + } + Self { dwarf, units } + } + + pub fn build( + &self, + unit: &Unit>, + entry: &DebuggingInformationEntry<'_, '_, EndianArcSlice>, + thread_builder: &mut ThreadBuilder, + function_ranges: &mut Vec<(u64, u64, Frame)>, + start_lookup: &mut HashMap>, + ) -> Result<(), ProfilerError> { + let name = self.abstract_origin_name(unit, entry)?; + + let demangled_name = demangle(&name).to_string(); + + if demangled_name.starts_with("core::") || + demangled_name.starts_with(" {high_pc}, {string_idx:?}"); + } + + Ok(()) + } + + fn pc_ranges( + &self, + unit: &Unit>, + entry: &DebuggingInformationEntry<'_, '_, EndianArcSlice>, + ) -> Result, gimli::Error> { + let Ok(value) = entry_attr(entry, DW_AT_ranges) else { + return Ok(vec![]); + }; + + let Some(ranges_offset) = self.dwarf.attr_ranges_offset(unit, value)? else { + return Ok(vec![]); + }; + + let mut ranges = vec![]; + let mut range_iter = self.dwarf.ranges(unit, ranges_offset)?; + while let Some(Range { begin, end }) = range_iter.next()? { + if begin < end && begin != 0 { + ranges.push((begin, end)); + } + } + + Ok(ranges) + } + + fn pc_range( + &self, + unit: &Unit>, + entry: &DebuggingInformationEntry<'_, '_, EndianArcSlice>, + ) -> Result, ProfilerError> { + let Some(low_pc) = self.low_pc(unit, entry)? else { + return Ok(None); + }; + let high_pc = self.high_pc(unit, entry, low_pc)?; + + Ok(Some((low_pc, high_pc))) + } + + fn low_pc( + &self, + unit: &Unit>, + entry: &DebuggingInformationEntry<'_, '_, EndianArcSlice>, + ) -> Result, ProfilerError> { + let Ok(value) = entry_attr(entry, gimli::DW_AT_low_pc) else { + return Ok(None); + }; + let low_pc = match value { + AttributeValue::Addr(val) => Ok(val), + AttributeValue::DebugAddrIndex(index) => Ok(self.dwarf.address(unit, index)?), + _ => Err(ProfilerError::UnexpectedLowPc), + }?; + Ok((low_pc != 0).then_some(low_pc)) + } + + fn high_pc( + &self, + unit: &Unit>, + entry: &DebuggingInformationEntry<'_, '_, EndianArcSlice>, + low_pc: u64, + ) -> Result { + match entry_attr(entry, DW_AT_high_pc)? { + AttributeValue::Addr(val) => Ok(val), + AttributeValue::DebugAddrIndex(index) => Ok(self.dwarf.address(unit, index)?), + AttributeValue::Udata(val) => Ok(low_pc + val), + _ => Err(ProfilerError::UnexpectedAbstractOrigin), + } + } + + fn abstract_origin_name( + &self, + unit: &Unit>, + entry: &DebuggingInformationEntry<'_, '_, EndianArcSlice>, + ) -> Result { + match entry_attr(entry, gimli::DW_AT_abstract_origin)? { + gimli::AttributeValue::UnitRef(unit_offset) => { + Ok(abstract_origin_name(self.dwarf, unit, unit_offset)?) + } + gimli::AttributeValue::DebugInfoRef(debug_info_offset) => { + let unit = match self.units.binary_search_by_key(&debug_info_offset.0, |unit| { + unit.header.offset().as_debug_info_offset().unwrap().0 + }) { + Ok(_) | Err(0) => Err(ProfilerError::NoUnitForOffset(debug_info_offset.0)), + Err(i) => Ok(&self.units[i - 1]), + }?; + let unit_offset = debug_info_offset + .to_unit_offset(&unit.header) + .ok_or(ProfilerError::InvalidAttributeAbstractOrigin)?; + Ok(abstract_origin_name(self.dwarf, unit, unit_offset)?) + } + _ => Err(ProfilerError::UnexpectedAbstractOrigin), + } + } +} + +fn load_dwarf(file: &File) -> Result>, ProfilerError> { + let endian = if file.is_little_endian() { + gimli::RunTimeEndian::Little + } else { + gimli::RunTimeEndian::Big + }; + + let dwarf = Dwarf::load(&|id| Ok::<_, gimli::Error>(load_section(id, file, endian)))?; + + Ok(dwarf) +} + +fn load_section( + id: SectionId, + file: &File, + endian: RunTimeEndian, +) -> EndianArcSlice { + let data = file + .section_by_name(id.name()) + .and_then(|section| section.uncompressed_data().ok()) + .unwrap_or(Cow::Borrowed(&[])); + + EndianArcSlice::new(Arc::from(&*data), endian) +} + +fn entry_attr( + entry: &DebuggingInformationEntry<'_, '_, ReaderT>, + dw_at: DwAt, +) -> Result, ProfilerError> { + Ok(entry.attr(dw_at)?.ok_or(ProfilerError::DwAtMissing(dw_at))?.value()) +} + +fn abstract_origin_name( + dwarf: &Dwarf>, + unit: &Unit>, + abstract_origin: UnitOffset, +) -> Result { + let mut entries = unit.entries_raw(Some(abstract_origin))?; + let abbrev = entries.read_abbreviation()?.ok_or(ProfilerError::InvalidAbstractOrigin)?; + + for spec in abbrev.attributes() { + let attr = entries.read_attribute(*spec)?; + match attr.name() { + gimli::DW_AT_linkage_name | gimli::DW_AT_MIPS_linkage_name | gimli::DW_AT_name => { + return Ok(dwarf.attr_string(unit, attr.value())?.to_string_lossy()?.into()); + } + _ => {} + } + } + + Err(ProfilerError::InvalidAbstractOrigin) +} + +fn trace_inline_functions() -> bool { + let value = std::env::var("TRACE_INLINE_FUNCTIONS").unwrap_or_else(|_| "false".to_string()); + value == "1" || value.to_lowercase() == "true" +}