Implement -Zmiri-tag-gc a garbage collector for tags

This commit is contained in:
Ben Kimock 2022-08-10 20:50:36 -04:00
parent beed5eddb0
commit d61d4c6af7
10 changed files with 241 additions and 7 deletions

View File

@ -323,6 +323,10 @@ environment variable. We first document the most relevant and most commonly used
ensure alignment. (The standard library `align_to` method works fine in both modes; under ensure alignment. (The standard library `align_to` method works fine in both modes; under
symbolic alignment it only fills the middle slice when the allocation guarantees sufficient symbolic alignment it only fills the middle slice when the allocation guarantees sufficient
alignment.) alignment.)
* `-Zmiri-tag-gc=<blocks>` configures how often the pointer tag garbage collector runs. The default
is to search for and remove unreachable tags once every `10,000` basic blocks. Setting this to
`0` disables the garbage collector, which causes some programs to have explosive memory usage
and/or super-linear runtime.
The remaining flags are for advanced use only, and more likely to change or be removed. The remaining flags are for advanced use only, and more likely to change or be removed.
Some of these are **unsound**, which means they can lead Some of these are **unsound**, which means they can lead

View File

@ -521,6 +521,12 @@ fn main() {
Err(err) => show_error!("-Zmiri-report-progress requires a `u32`: {}", err), Err(err) => show_error!("-Zmiri-report-progress requires a `u32`: {}", err),
}; };
miri_config.report_progress = Some(interval); miri_config.report_progress = Some(interval);
} else if let Some(param) = arg.strip_prefix("-Zmiri-tag-gc=") {
let interval = match param.parse::<u32>() {
Ok(i) => i,
Err(err) => show_error!("-Zmiri-tag-gc requires a `u32`: {}", err),
};
miri_config.gc_interval = interval;
} else if let Some(param) = arg.strip_prefix("-Zmiri-measureme=") { } else if let Some(param) = arg.strip_prefix("-Zmiri-measureme=") {
miri_config.measureme_out = Some(param.to_string()); miri_config.measureme_out = Some(param.to_string());
} else if let Some(param) = arg.strip_prefix("-Zmiri-backtrace=") { } else if let Some(param) = arg.strip_prefix("-Zmiri-backtrace=") {

View File

@ -289,6 +289,10 @@ fn active_thread_stack_mut(
&mut self.threads[self.active_thread].stack &mut self.threads[self.active_thread].stack
} }
pub fn iter(&self) -> impl Iterator<Item = &Thread<'mir, 'tcx>> {
self.threads.iter()
}
pub fn all_stacks( pub fn all_stacks(
&self, &self,
) -> impl Iterator<Item = &[Frame<'mir, 'tcx, Provenance, FrameData<'tcx>>]> { ) -> impl Iterator<Item = &[Frame<'mir, 'tcx, Provenance, FrameData<'tcx>>]> {

View File

@ -132,6 +132,8 @@ pub struct MiriConfig {
/// The location of a shared object file to load when calling external functions /// The location of a shared object file to load when calling external functions
/// FIXME! consider allowing users to specify paths to multiple SO files, or to a directory /// FIXME! consider allowing users to specify paths to multiple SO files, or to a directory
pub external_so_file: Option<PathBuf>, pub external_so_file: Option<PathBuf>,
/// Run a garbage collector for SbTags every N basic blocks.
pub gc_interval: u32,
} }
impl Default for MiriConfig { impl Default for MiriConfig {
@ -164,6 +166,7 @@ fn default() -> MiriConfig {
report_progress: None, report_progress: None,
retag_fields: false, retag_fields: false,
external_so_file: None, external_so_file: None,
gc_interval: 10_000,
} }
} }
} }

View File

@ -62,6 +62,7 @@
mod range_map; mod range_map;
mod shims; mod shims;
mod stacked_borrows; mod stacked_borrows;
mod tag_gc;
// Establish a "crate-wide prelude": we often import `crate::*`. // Establish a "crate-wide prelude": we often import `crate::*`.
@ -110,6 +111,7 @@
pub use crate::stacked_borrows::{ pub use crate::stacked_borrows::{
CallId, EvalContextExt as StackedBorEvalContextExt, Item, Permission, SbTag, Stack, Stacks, CallId, EvalContextExt as StackedBorEvalContextExt, Item, Permission, SbTag, Stack, Stacks,
}; };
pub use crate::tag_gc::EvalContextExt as _;
/// Insert rustc arguments at the beginning of the argument list that Miri wants to be /// Insert rustc arguments at the beginning of the argument list that Miri wants to be
/// set per default, for maximal validation power. /// set per default, for maximal validation power.

View File

@ -394,6 +394,11 @@ pub struct Evaluator<'mir, 'tcx> {
/// Handle of the optional shared object file for external functions. /// Handle of the optional shared object file for external functions.
pub external_so_lib: Option<(libloading::Library, std::path::PathBuf)>, pub external_so_lib: Option<(libloading::Library, std::path::PathBuf)>,
/// Run a garbage collector for SbTags every N basic blocks.
pub(crate) gc_interval: u32,
/// The number of blocks that passed since the last SbTag GC pass.
pub(crate) since_gc: u32,
} }
impl<'mir, 'tcx> Evaluator<'mir, 'tcx> { impl<'mir, 'tcx> Evaluator<'mir, 'tcx> {
@ -469,6 +474,8 @@ pub(crate) fn new(config: &MiriConfig, layout_cx: LayoutCx<'tcx, TyCtxt<'tcx>>)
lib_file_path.clone(), lib_file_path.clone(),
) )
}), }),
gc_interval: config.gc_interval,
since_gc: 0,
} }
} }
@ -1016,6 +1023,20 @@ fn before_terminator(ecx: &mut InterpCx<'mir, 'tcx, Self>) -> InterpResult<'tcx>
}); });
} }
} }
// Search for SbTags to find all live pointers, then remove all other tags from borrow
// stacks.
// When debug assertions are enabled, run the GC as often as possible so that any cases
// where it mistakenly removes an important tag become visible.
if cfg!(debug_assertions)
|| (ecx.machine.gc_interval > 0 && ecx.machine.since_gc >= ecx.machine.gc_interval)
{
ecx.machine.since_gc = 0;
ecx.garbage_collect_tags()?;
} else {
ecx.machine.since_gc += 1;
}
// These are our preemption points. // These are our preemption points.
ecx.maybe_preempt_active_thread(); ecx.maybe_preempt_active_thread();
Ok(()) Ok(())

View File

@ -233,6 +233,12 @@ fn delete_all_thread_tls(&mut self, thread_id: ThreadId) {
data.remove(&thread_id); data.remove(&thread_id);
} }
} }
pub fn iter(&self, mut visitor: impl FnMut(&Scalar<Provenance>)) {
for scalar in self.keys.values().flat_map(|v| v.data.values()) {
visitor(scalar);
}
}
} }
impl<'mir, 'tcx: 'mir> EvalContextPrivExt<'mir, 'tcx> for crate::MiriEvalContext<'mir, 'tcx> {} impl<'mir, 'tcx: 'mir> EvalContextPrivExt<'mir, 'tcx> for crate::MiriEvalContext<'mir, 'tcx> {}

View File

@ -80,6 +80,8 @@ pub struct Stacks {
history: AllocHistory, history: AllocHistory,
/// The set of tags that have been exposed inside this allocation. /// The set of tags that have been exposed inside this allocation.
exposed_tags: FxHashSet<SbTag>, exposed_tags: FxHashSet<SbTag>,
/// Whether this memory has been modified since the last time the tag GC ran
modified_since_last_gc: bool,
} }
/// Extra global state, available to the memory access hooks. /// Extra global state, available to the memory access hooks.
@ -422,6 +424,7 @@ fn dealloc(
let item = self.get(idx).unwrap(); let item = self.get(idx).unwrap();
Stack::item_popped(&item, global, dcx)?; Stack::item_popped(&item, global, dcx)?;
} }
Ok(()) Ok(())
} }
@ -496,6 +499,20 @@ fn grant(
} }
// # Stacked Borrows Core End // # Stacked Borrows Core End
/// Integration with the SbTag garbage collector
impl Stacks {
pub fn remove_unreachable_tags(&mut self, live_tags: &FxHashSet<SbTag>) {
if self.modified_since_last_gc {
for stack in self.stacks.iter_mut_all() {
if stack.len() > 64 {
stack.retain(live_tags);
}
}
self.modified_since_last_gc = false;
}
}
}
/// Map per-stack operations to higher-level per-location-range operations. /// Map per-stack operations to higher-level per-location-range operations.
impl<'tcx> Stacks { impl<'tcx> Stacks {
/// Creates a new stack with an initial tag. For diagnostic purposes, we also need to know /// Creates a new stack with an initial tag. For diagnostic purposes, we also need to know
@ -514,6 +531,7 @@ fn new(
stacks: RangeMap::new(size, stack), stacks: RangeMap::new(size, stack),
history: AllocHistory::new(id, item, current_span), history: AllocHistory::new(id, item, current_span),
exposed_tags: FxHashSet::default(), exposed_tags: FxHashSet::default(),
modified_since_last_gc: false,
} }
} }
@ -528,6 +546,7 @@ fn for_each(
&mut FxHashSet<SbTag>, &mut FxHashSet<SbTag>,
) -> InterpResult<'tcx>, ) -> InterpResult<'tcx>,
) -> InterpResult<'tcx> { ) -> InterpResult<'tcx> {
self.modified_since_last_gc = true;
for (offset, stack) in self.stacks.iter_mut(range.start, range.size) { for (offset, stack) in self.stacks.iter_mut(range.start, range.size) {
let mut dcx = dcx_builder.build(&mut self.history, offset); let mut dcx = dcx_builder.build(&mut self.history, offset);
f(stack, &mut dcx, &mut self.exposed_tags)?; f(stack, &mut dcx, &mut self.exposed_tags)?;

View File

@ -39,6 +39,61 @@ pub struct Stack {
unique_range: Range<usize>, unique_range: Range<usize>,
} }
impl Stack {
pub fn retain(&mut self, tags: &FxHashSet<SbTag>) {
let mut first_removed = None;
let mut read_idx = 1;
let mut write_idx = 1;
while read_idx < self.borrows.len() {
let left = self.borrows[read_idx - 1];
let this = self.borrows[read_idx];
let should_keep = match this.perm() {
// SharedReadWrite is the simplest case, if it's unreachable we can just remove it.
Permission::SharedReadWrite => tags.contains(&this.tag()),
// Only retain a Disabled tag if it is terminating a SharedReadWrite block.
Permission::Disabled => left.perm() == Permission::SharedReadWrite,
// Unique and SharedReadOnly can terminate a SharedReadWrite block, so only remove
// them if they are both unreachable and not directly after a SharedReadWrite.
Permission::Unique | Permission::SharedReadOnly =>
left.perm() == Permission::SharedReadWrite || tags.contains(&this.tag()),
};
if should_keep {
if read_idx != write_idx {
self.borrows[write_idx] = self.borrows[read_idx];
}
write_idx += 1;
} else if first_removed.is_none() {
first_removed = Some(read_idx);
}
read_idx += 1;
}
self.borrows.truncate(write_idx);
#[cfg(not(feature = "stack-cache"))]
drop(first_removed); // This is only needed for the stack-cache
#[cfg(feature = "stack-cache")]
if let Some(first_removed) = first_removed {
// Either end of unique_range may have shifted, all we really know is that we can't
// have introduced a new Unique.
if !self.unique_range.is_empty() {
self.unique_range = 0..self.len();
}
// Replace any Items which have been collected with the base item, a known-good value.
for i in 0..CACHE_LEN {
if self.cache.idx[i] >= first_removed {
self.cache.items[i] = self.borrows[0];
self.cache.idx[i] = 0;
}
}
}
}
}
/// A very small cache of searches of a borrow stack, mapping `Item`s to their position in said stack. /// A very small cache of searches of a borrow stack, mapping `Item`s to their position in said stack.
/// ///
/// It may seem like maintaining this cache is a waste for small stacks, but /// It may seem like maintaining this cache is a waste for small stacks, but
@ -105,14 +160,11 @@ fn verify_cache_consistency(&self) {
// Check that the unique_range is a valid index into the borrow stack. // Check that the unique_range is a valid index into the borrow stack.
// This asserts that the unique_range's start <= end. // This asserts that the unique_range's start <= end.
let uniques = &self.borrows[self.unique_range.clone()]; let _uniques = &self.borrows[self.unique_range.clone()];
// Check that the start of the unique_range is precise. // We cannot assert that the unique range is precise.
if let Some(first_unique) = uniques.first() { // Both ends may shift around when `Stack::retain` is called. Additionally,
assert_eq!(first_unique.perm(), Permission::Unique); // when we pop items within the unique range, setting the end of the range precisely
}
// We cannot assert that the unique range is exact on the upper end.
// When we pop items within the unique range, setting the end of the range precisely
// requires doing a linear search of the borrow stack, which is exactly the kind of // requires doing a linear search of the borrow stack, which is exactly the kind of
// operation that all this caching exists to avoid. // operation that all this caching exists to avoid.
} }

117
src/tag_gc.rs Normal file
View File

@ -0,0 +1,117 @@
use crate::*;
use rustc_data_structures::fx::FxHashSet;
impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriEvalContext<'mir, 'tcx> {}
pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
fn garbage_collect_tags(&mut self) -> InterpResult<'tcx> {
let this = self.eval_context_mut();
// No reason to do anything at all if stacked borrows is off.
if this.machine.stacked_borrows.is_none() {
return Ok(());
}
let mut tags = FxHashSet::default();
for thread in this.machine.threads.iter() {
if let Some(Scalar::Ptr(
Pointer { provenance: Provenance::Concrete { sb, .. }, .. },
_,
)) = thread.panic_payload
{
tags.insert(sb);
}
}
self.find_tags_in_tls(&mut tags);
self.find_tags_in_memory(&mut tags);
self.find_tags_in_locals(&mut tags)?;
self.remove_unreachable_tags(tags);
Ok(())
}
fn find_tags_in_tls(&mut self, tags: &mut FxHashSet<SbTag>) {
let this = self.eval_context_mut();
this.machine.tls.iter(|scalar| {
if let Scalar::Ptr(Pointer { provenance: Provenance::Concrete { sb, .. }, .. }, _) =
scalar
{
tags.insert(*sb);
}
});
}
fn find_tags_in_memory(&mut self, tags: &mut FxHashSet<SbTag>) {
let this = self.eval_context_mut();
this.memory.alloc_map().iter(|it| {
for (_id, (_kind, alloc)) in it {
for (_size, prov) in alloc.provenance().iter() {
if let Provenance::Concrete { sb, .. } = prov {
tags.insert(*sb);
}
}
}
});
}
fn find_tags_in_locals(&mut self, tags: &mut FxHashSet<SbTag>) -> InterpResult<'tcx> {
let this = self.eval_context_mut();
for frame in this.machine.threads.all_stacks().flatten() {
// Handle the return place of each frame
if let Ok(return_place) = frame.return_place.try_as_mplace() {
if let Some(Provenance::Concrete { sb, .. }) = return_place.ptr.provenance {
tags.insert(sb);
}
}
for local in frame.locals.iter() {
let LocalValue::Live(value) = local.value else {
continue;
};
match value {
Operand::Immediate(Immediate::Scalar(Scalar::Ptr(ptr, _))) =>
if let Provenance::Concrete { sb, .. } = ptr.provenance {
tags.insert(sb);
},
Operand::Immediate(Immediate::ScalarPair(s1, s2)) => {
if let Scalar::Ptr(ptr, _) = s1 {
if let Provenance::Concrete { sb, .. } = ptr.provenance {
tags.insert(sb);
}
}
if let Scalar::Ptr(ptr, _) = s2 {
if let Provenance::Concrete { sb, .. } = ptr.provenance {
tags.insert(sb);
}
}
}
Operand::Indirect(MemPlace { ptr, .. }) => {
if let Some(Provenance::Concrete { sb, .. }) = ptr.provenance {
tags.insert(sb);
}
}
Operand::Immediate(Immediate::Uninit)
| Operand::Immediate(Immediate::Scalar(Scalar::Int(_))) => {}
}
}
}
Ok(())
}
fn remove_unreachable_tags(&mut self, tags: FxHashSet<SbTag>) {
let this = self.eval_context_mut();
this.memory.alloc_map().iter(|it| {
for (_id, (_kind, alloc)) in it {
alloc
.extra
.stacked_borrows
.as_ref()
.unwrap()
.borrow_mut()
.remove_unreachable_tags(&tags);
}
});
}
}