diff --git a/src/bin/miri.rs b/src/bin/miri.rs index 56ee96502b..dbb8d75f45 100644 --- a/src/bin/miri.rs +++ b/src/bin/miri.rs @@ -33,8 +33,8 @@ use std::sync::atomic::{AtomicI32, AtomicU32, Ordering}; use std::sync::{Arc, Once}; use miri::{ - BacktraceStyle, BorrowTrackerMethod, MiriConfig, MiriEntryFnType, ProvenanceMode, RetagFields, - ValidationMode, + BacktraceStyle, BorrowTrackerMethod, MiriConfig, MiriEntryFnType, ProvenanceGcSettings, + ProvenanceMode, RetagFields, ValidationMode, }; use rustc_abi::ExternAbi; use rustc_data_structures::sync; @@ -646,7 +646,10 @@ fn main() { let interval = param.parse::().unwrap_or_else(|err| { show_error!("-Zmiri-provenance-gc requires a `u32`: {}", err) }); - miri_config.gc_interval = interval; + miri_config.gc_settings = match interval { + 0 => ProvenanceGcSettings::Disabled, + interval => ProvenanceGcSettings::Regularly { interval }, + }; } else if let Some(param) = arg.strip_prefix("-Zmiri-measureme=") { miri_config.measureme_out = Some(param.to_string()); } else if let Some(param) = arg.strip_prefix("-Zmiri-backtrace=") { diff --git a/src/borrow_tracker/tree_borrows/mod.rs b/src/borrow_tracker/tree_borrows/mod.rs index f39a606513..70c276dbc8 100644 --- a/src/borrow_tracker/tree_borrows/mod.rs +++ b/src/borrow_tracker/tree_borrows/mod.rs @@ -61,6 +61,9 @@ impl<'tcx> Tree { }; let global = machine.borrow_tracker.as_ref().unwrap(); let span = machine.current_span(); + if self.tree_grew_significantly_since_last_gc() { + machine.request_gc(); + } self.perform_access( tag, Some((range, access_kind, diagnostics::AccessCause::Explicit(access_kind))), @@ -86,6 +89,9 @@ impl<'tcx> Tree { }; let global = machine.borrow_tracker.as_ref().unwrap(); let span = machine.current_span(); + if self.tree_grew_significantly_since_last_gc() { + machine.request_gc(); + } self.dealloc(tag, alloc_range(Size::ZERO, size), global, alloc_id, span) } @@ -107,6 +113,9 @@ impl<'tcx> Tree { alloc_id: AllocId, // diagnostics ) -> InterpResult<'tcx> { let span = machine.current_span(); + if self.tree_grew_significantly_since_last_gc() { + machine.request_gc(); + } // `None` makes it the magic on-protector-end operation self.perform_access(tag, None, global, alloc_id, span) } @@ -305,6 +314,11 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> { span, new_perm.protector.is_some(), )?; + + // Also request a GC if things are getting too large. + if tree_borrows.tree_grew_significantly_since_last_gc() { + this.machine.request_gc(); + } drop(tree_borrows); // Also inform the data race model (but only if any bytes are actually affected). diff --git a/src/borrow_tracker/tree_borrows/tree.rs b/src/borrow_tracker/tree_borrows/tree.rs index 3389b1c602..fe519834db 100644 --- a/src/borrow_tracker/tree_borrows/tree.rs +++ b/src/borrow_tracker/tree_borrows/tree.rs @@ -249,6 +249,8 @@ pub struct Tree { pub(super) rperms: RangeMap>, /// The index of the root node. pub(super) root: UniIndex, + /// The number of nodes when we last ran the Garbage Collector + nodes_at_last_gc: usize, } /// A node in the borrow tree. Each node is uniquely identified by a tag via @@ -607,7 +609,7 @@ impl Tree { ); RangeMap::new(size, perms) }; - Self { root: root_idx, nodes, rperms, tag_mapping } + Self { root: root_idx, nodes, rperms, tag_mapping, nodes_at_last_gc: 1 } } } @@ -886,6 +888,22 @@ impl<'tcx> Tree { /// Integration with the BorTag garbage collector impl Tree { + /// The number of nodes in this tree + pub fn nodes_count(&self) -> usize { + self.tag_mapping.len() + } + + /// Whether the tree grew significantly since the last provenance GC run + pub fn tree_grew_significantly_since_last_gc(&self) -> bool { + let current = self.nodes_count(); + // do not trigger the GC for small nodes + let last = self.nodes_at_last_gc.max(100); + // trigger the GC if the tree doubled since the last run, + // or otherwise got "significantly" larger. + // Note that for trees < 100 nodes, nothing happens. + current > 2 * last || current > last + 2500 + } + pub fn remove_unreachable_tags(&mut self, live_tags: &FxHashSet) { self.remove_useless_children(self.root, live_tags); // Right after the GC runs is a good moment to check if we can @@ -893,6 +911,7 @@ impl Tree { // tags (this does not necessarily mean that they have identical internal representations, // see the `PartialEq` impl for `UniValMap`) self.rperms.merge_adjacent_thorough(); + self.nodes_at_last_gc = self.nodes_count(); } /// Checks if a node is useless and should be GC'ed. diff --git a/src/eval.rs b/src/eval.rs index 36b15dbf62..617367fbf0 100644 --- a/src/eval.rs +++ b/src/eval.rs @@ -89,6 +89,18 @@ pub enum ValidationMode { Deep, } +/// Settings for the provenance GC +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum ProvenanceGcSettings { + /// The GC is disabled + Disabled, + /// The GC is to run regularly, every `inverval` basic blocks + Regularly { interval: u32 }, + /// The GC runs as needed, determined by heuristics. + /// See `should_run_provenance_gc` + Heuristically, +} + /// Configuration needed to spawn a Miri instance. #[derive(Clone)] pub struct MiriConfig { @@ -151,8 +163,8 @@ pub struct MiriConfig { /// The location of a shared object file to load when calling external functions /// FIXME! consider allowing users to specify paths to multiple files, or to a directory pub native_lib: Option, - /// Run a garbage collector for BorTags every N basic blocks. - pub gc_interval: u32, + /// Settings for the provenance GC (e.g. how often it runs) + pub gc_settings: ProvenanceGcSettings, /// The number of CPUs to be reported by miri. pub num_cpus: u32, /// Requires Miri to emulate pages of a certain size @@ -194,7 +206,7 @@ impl Default for MiriConfig { report_progress: None, retag_fields: RetagFields::Yes, native_lib: None, - gc_interval: 10_000, + gc_settings: ProvenanceGcSettings::Heuristically, num_cpus: 1, page_size: None, collect_leak_backtraces: true, diff --git a/src/lib.rs b/src/lib.rs index 82b2e35fae..205cc71012 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -140,8 +140,8 @@ pub use crate::diagnostics::{ EvalContextExt as _, NonHaltingDiagnostic, TerminationInfo, report_error, }; pub use crate::eval::{ - AlignmentCheck, BacktraceStyle, IsolatedOp, MiriConfig, MiriEntryFnType, RejectOpWith, - ValidationMode, create_ecx, eval_entry, + AlignmentCheck, BacktraceStyle, IsolatedOp, MiriConfig, MiriEntryFnType, ProvenanceGcSettings, + RejectOpWith, ValidationMode, create_ecx, eval_entry, }; pub use crate::helpers::{AccessKind, EvalContextExt as _}; pub use crate::intrinsics::EvalContextExt as _; diff --git a/src/machine.rs b/src/machine.rs index 4ece8f7895..c5c6af5e30 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -562,9 +562,10 @@ pub struct MiriMachine<'tcx> { pub native_lib: Option, /// Run a garbage collector for BorTags every N basic blocks. - pub(crate) gc_interval: u32, + pub(crate) gc_settings: ProvenanceGcSettings, /// The number of blocks that passed since the last BorTag GC pass. pub(crate) since_gc: u32, + pub(crate) gc_requested: Cell, /// The number of CPUs to be reported by miri. pub(crate) num_cpus: u32, @@ -738,8 +739,9 @@ impl<'tcx> MiriMachine<'tcx> { native_lib: config.native_lib.as_ref().map(|_| { panic!("calling functions from native libraries via FFI is only supported on Unix") }), - gc_interval: config.gc_interval, + gc_settings: config.gc_settings, since_gc: 0, + gc_requested: Cell::new(false), num_cpus: config.num_cpus, page_size, stack_addr, @@ -814,6 +816,10 @@ impl<'tcx> MiriMachine<'tcx> { .and_then(|(_allocated, deallocated)| *deallocated) .map(Span::data) } + + pub(crate) fn request_gc(&self) { + self.gc_requested.set(true) + } } impl VisitProvenance for MiriMachine<'_> { @@ -858,8 +864,9 @@ impl VisitProvenance for MiriMachine<'_> { report_progress: _, basic_block_count: _, native_lib: _, - gc_interval: _, + gc_settings: _, since_gc: _, + gc_requested: _, num_cpus: _, page_size: _, stack_addr: _, @@ -1545,8 +1552,7 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> { // stacks. // When debug assertions are enabled, run the GC as often as possible so that any cases // where it mistakenly removes an important tag become visible. - if ecx.machine.gc_interval > 0 && ecx.machine.since_gc >= ecx.machine.gc_interval { - ecx.machine.since_gc = 0; + if ecx.should_run_provenance_gc() { ecx.run_provenance_gc(); } diff --git a/src/provenance_gc.rs b/src/provenance_gc.rs index b3d715db9c..04af7b8cf7 100644 --- a/src/provenance_gc.rs +++ b/src/provenance_gc.rs @@ -220,8 +220,23 @@ fn remove_unreachable_allocs<'tcx>(ecx: &mut MiriInterpCx<'tcx>, allocs: FxHashS impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {} pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> { + fn should_run_provenance_gc(&self) -> bool { + let this = self.eval_context_ref(); + match this.machine.gc_settings { + ProvenanceGcSettings::Disabled => false, + ProvenanceGcSettings::Regularly { interval } => this.machine.since_gc >= interval, + ProvenanceGcSettings::Heuristically => + // no GC run if the last one was recently + this.machine.since_gc >= 75 + // run GC if requested, or every 10000 blocks otherwise + && (this.machine.since_gc >= 10_000 || this.machine.gc_requested.get()), + } + } + fn run_provenance_gc(&mut self) { let this = self.eval_context_mut(); + this.machine.since_gc = 0; + this.machine.gc_requested.set(false); // We collect all tags and AllocId from every part of the interpreter. let mut tags = FxHashSet::default();