Auto merge of #122240 - RalfJung:miri-addr-reuse, r=oli-obk

miri: add some chance to reuse addresses of previously freed allocations

The hope is that this can help us find ABA issues.

Unfortunately this needs rustc changes so I can't easily run the regular benchmark suite. I used `src/tools/miri/tests/pass/float_nan.rs` as a substitute:
```
Before:
Benchmark 1: ./x.py run miri --stage 0 --args src/tools/miri/tests/pass/float_nan.rs --args --edition=2021
  Time (mean ± σ):      9.570 s ±  0.013 s    [User: 9.279 s, System: 0.290 s]
  Range (min … max):    9.561 s …  9.579 s    2 runs

After:
Benchmark 1: ./x.py run miri --stage 0 --args src/tools/miri/tests/pass/float_nan.rs --args --edition=2021
  Time (mean ± σ):      9.698 s ±  0.046 s    [User: 9.413 s, System: 0.279 s]
  Range (min … max):    9.666 s …  9.731 s    2 runs
```
That's a ~1.3% slowdown, which seems fine to me. I have seen a lot of noise in this style of benchmarking so I don't quite trust this anyway; we can make further experiments in the Miri repo after this migrated there.

r? `@oli-obk`
This commit is contained in:
bors 2024-03-13 09:22:55 +00:00
commit 9ce37dc729
14 changed files with 214 additions and 65 deletions

View File

@ -443,7 +443,8 @@ fn before_memory_deallocation(
_machine: &mut Self,
_alloc_extra: &mut Self::AllocExtra,
_prov: (AllocId, Self::ProvenanceExtra),
_range: AllocRange,
_size: Size,
_align: Align,
) -> InterpResult<'tcx> {
Ok(())
}

View File

@ -353,7 +353,8 @@ pub fn deallocate_ptr(
&mut self.machine,
&mut alloc.extra,
(alloc_id, prov),
alloc_range(Size::ZERO, size),
size,
alloc.align,
)?;
// Don't forget to remember size and align of this now-dead allocation

View File

@ -1,3 +1,8 @@
//! This module is responsible for managing the absolute addresses that allocations are located at,
//! and for casting between pointers and integers based on those addresses.
mod reuse_pool;
use std::cell::RefCell;
use std::cmp::max;
use std::collections::hash_map::Entry;
@ -6,9 +11,10 @@
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_span::Span;
use rustc_target::abi::{HasDataLayout, Size};
use rustc_target::abi::{Align, HasDataLayout, Size};
use crate::*;
use reuse_pool::ReusePool;
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum ProvenanceMode {
@ -23,7 +29,7 @@ pub enum ProvenanceMode {
pub type GlobalState = RefCell<GlobalStateInner>;
#[derive(Clone, Debug)]
#[derive(Debug)]
pub struct GlobalStateInner {
/// This is used as a map between the address of each allocation and its `AllocId`. It is always
/// sorted by address. We cannot use a `HashMap` since we can be given an address that is offset
@ -35,6 +41,8 @@ pub struct GlobalStateInner {
/// they do not have an `AllocExtra`.
/// This is the inverse of `int_to_ptr_map`.
base_addr: FxHashMap<AllocId, u64>,
/// A pool of addresses we can reuse for future allocations.
reuse: ReusePool,
/// Whether an allocation has been exposed or not. This cannot be put
/// into `AllocExtra` for the same reason as `base_addr`.
exposed: FxHashSet<AllocId>,
@ -50,6 +58,7 @@ fn visit_provenance(&self, _visit: &mut VisitWith<'_>) {
let GlobalStateInner {
int_to_ptr_map: _,
base_addr: _,
reuse: _,
exposed: _,
next_base_addr: _,
provenance_mode: _,
@ -68,6 +77,7 @@ pub fn new(config: &MiriConfig, stack_addr: u64) -> Self {
GlobalStateInner {
int_to_ptr_map: Vec::default(),
base_addr: FxHashMap::default(),
reuse: ReusePool::new(),
exposed: FxHashSet::default(),
next_base_addr: stack_addr,
provenance_mode: config.provenance_mode,
@ -96,7 +106,7 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
// or `None` if the addr is out of bounds
fn alloc_id_from_addr(&self, addr: u64) -> Option<AllocId> {
let ecx = self.eval_context_ref();
let global_state = ecx.machine.intptrcast.borrow();
let global_state = ecx.machine.alloc_addresses.borrow();
assert!(global_state.provenance_mode != ProvenanceMode::Strict);
let pos = global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr);
@ -133,12 +143,13 @@ fn alloc_id_from_addr(&self, addr: u64) -> Option<AllocId> {
fn addr_from_alloc_id(&self, alloc_id: AllocId) -> InterpResult<'tcx, u64> {
let ecx = self.eval_context_ref();
let mut global_state = ecx.machine.intptrcast.borrow_mut();
let mut global_state = ecx.machine.alloc_addresses.borrow_mut();
let global_state = &mut *global_state;
Ok(match global_state.base_addr.entry(alloc_id) {
Entry::Occupied(entry) => *entry.get(),
Entry::Vacant(entry) => {
let mut rng = ecx.machine.rng.borrow_mut();
let (size, align, kind) = ecx.get_alloc_info(alloc_id);
// This is either called immediately after allocation (and then cached), or when
// adjusting `tcx` pointers (which never get freed). So assert that we are looking
@ -147,44 +158,63 @@ fn addr_from_alloc_id(&self, alloc_id: AllocId) -> InterpResult<'tcx, u64> {
// information was removed.
assert!(!matches!(kind, AllocKind::Dead));
// This allocation does not have a base address yet, pick one.
// Leave some space to the previous allocation, to give it some chance to be less aligned.
let slack = {
let mut rng = ecx.machine.rng.borrow_mut();
// This means that `(global_state.next_base_addr + slack) % 16` is uniformly distributed.
rng.gen_range(0..16)
// This allocation does not have a base address yet, pick or reuse one.
let base_addr = if let Some(reuse_addr) =
global_state.reuse.take_addr(&mut *rng, size, align)
{
reuse_addr
} else {
// We have to pick a fresh address.
// Leave some space to the previous allocation, to give it some chance to be less aligned.
// We ensure that `(global_state.next_base_addr + slack) % 16` is uniformly distributed.
let slack = rng.gen_range(0..16);
// From next_base_addr + slack, round up to adjust for alignment.
let base_addr = global_state
.next_base_addr
.checked_add(slack)
.ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
let base_addr = align_addr(base_addr, align.bytes());
// Remember next base address. If this allocation is zero-sized, leave a gap
// of at least 1 to avoid two allocations having the same base address.
// (The logic in `alloc_id_from_addr` assumes unique addresses, and different
// function/vtable pointers need to be distinguishable!)
global_state.next_base_addr = base_addr
.checked_add(max(size.bytes(), 1))
.ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
// Even if `Size` didn't overflow, we might still have filled up the address space.
if global_state.next_base_addr > ecx.target_usize_max() {
throw_exhaust!(AddressSpaceFull);
}
base_addr
};
// From next_base_addr + slack, round up to adjust for alignment.
let base_addr = global_state
.next_base_addr
.checked_add(slack)
.ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
let base_addr = align_addr(base_addr, align.bytes());
entry.insert(base_addr);
trace!(
"Assigning base address {:#x} to allocation {:?} (size: {}, align: {}, slack: {})",
"Assigning base address {:#x} to allocation {:?} (size: {}, align: {})",
base_addr,
alloc_id,
size.bytes(),
align.bytes(),
slack,
);
// Remember next base address. If this allocation is zero-sized, leave a gap
// of at least 1 to avoid two allocations having the same base address.
// (The logic in `alloc_id_from_addr` assumes unique addresses, and different
// function/vtable pointers need to be distinguishable!)
global_state.next_base_addr = base_addr
.checked_add(max(size.bytes(), 1))
.ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
// Even if `Size` didn't overflow, we might still have filled up the address space.
if global_state.next_base_addr > ecx.target_usize_max() {
throw_exhaust!(AddressSpaceFull);
}
// Also maintain the opposite mapping in `int_to_ptr_map`.
// Given that `next_base_addr` increases in each allocation, pushing the
// corresponding tuple keeps `int_to_ptr_map` sorted
global_state.int_to_ptr_map.push((base_addr, alloc_id));
// Store address in cache.
entry.insert(base_addr);
// Also maintain the opposite mapping in `int_to_ptr_map`, ensuring we keep it sorted.
// We have a fast-path for the common case that this address is bigger than all previous ones.
let pos = if global_state
.int_to_ptr_map
.last()
.is_some_and(|(last_addr, _)| *last_addr < base_addr)
{
global_state.int_to_ptr_map.len()
} else {
global_state
.int_to_ptr_map
.binary_search_by_key(&base_addr, |(addr, _)| *addr)
.unwrap_err()
};
global_state.int_to_ptr_map.insert(pos, (base_addr, alloc_id));
base_addr
}
@ -196,7 +226,7 @@ impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir,
pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
fn expose_ptr(&mut self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
let ecx = self.eval_context_mut();
let global_state = ecx.machine.intptrcast.get_mut();
let global_state = ecx.machine.alloc_addresses.get_mut();
// In strict mode, we don't need this, so we can save some cycles by not tracking it.
if global_state.provenance_mode == ProvenanceMode::Strict {
return Ok(());
@ -207,7 +237,7 @@ fn expose_ptr(&mut self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
return Ok(());
}
trace!("Exposing allocation id {alloc_id:?}");
let global_state = ecx.machine.intptrcast.get_mut();
let global_state = ecx.machine.alloc_addresses.get_mut();
global_state.exposed.insert(alloc_id);
if ecx.machine.borrow_tracker.is_some() {
ecx.expose_tag(alloc_id, tag)?;
@ -219,7 +249,7 @@ fn ptr_from_addr_cast(&self, addr: u64) -> InterpResult<'tcx, Pointer<Option<Pro
trace!("Casting {:#x} to a pointer", addr);
let ecx = self.eval_context_ref();
let global_state = ecx.machine.intptrcast.borrow();
let global_state = ecx.machine.alloc_addresses.borrow();
// Potentially emit a warning.
match global_state.provenance_mode {
@ -299,7 +329,13 @@ fn ptr_get_alloc(&self, ptr: Pointer<Provenance>) -> Option<(AllocId, Size)> {
}
impl GlobalStateInner {
pub fn free_alloc_id(&mut self, dead_id: AllocId) {
pub fn free_alloc_id(
&mut self,
rng: &mut impl Rng,
dead_id: AllocId,
size: Size,
align: Align,
) {
// We can *not* remove this from `base_addr`, since the interpreter design requires that we
// be able to retrieve an AllocId + offset for any memory access *before* we check if the
// access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory
@ -319,6 +355,8 @@ pub fn free_alloc_id(&mut self, dead_id: AllocId) {
// We can also remove it from `exposed`, since this allocation can anyway not be returned by
// `alloc_id_from_addr` any more.
self.exposed.remove(&dead_id);
// Also remember this address for future reuse.
self.reuse.add_addr(rng, addr, size, align)
}
}

View File

@ -0,0 +1,87 @@
//! Manages a pool of addresses that can be reused.
use rand::Rng;
use rustc_target::abi::{Align, Size};
const MAX_POOL_SIZE: usize = 64;
// Just use fair coins, until we have evidence that other numbers are better.
const ADDR_REMEMBER_CHANCE: f64 = 0.5;
const ADDR_TAKE_CHANCE: f64 = 0.5;
/// The pool strikes a balance between exploring more possible executions and making it more likely
/// to find bugs. The hypothesis is that bugs are more likely to occur when reuse happens for
/// allocations with the same layout, since that can trigger e.g. ABA issues in a concurrent data
/// structure. Therefore we only reuse allocations when size and alignment match exactly.
#[derive(Debug)]
pub struct ReusePool {
/// The i-th element in `pool` stores allocations of alignment `2^i`. We store these reusable
/// allocations as address-size pairs, the list must be sorted by the size.
///
/// Each of these maps has at most MAX_POOL_SIZE elements, and since alignment is limited to
/// less than 64 different possible value, that bounds the overall size of the pool.
pool: Vec<Vec<(u64, Size)>>,
}
impl ReusePool {
pub fn new() -> Self {
ReusePool { pool: vec![] }
}
fn subpool(&mut self, align: Align) -> &mut Vec<(u64, Size)> {
let pool_idx: usize = align.bytes().trailing_zeros().try_into().unwrap();
if self.pool.len() <= pool_idx {
self.pool.resize(pool_idx + 1, Vec::new());
}
&mut self.pool[pool_idx]
}
pub fn add_addr(&mut self, rng: &mut impl Rng, addr: u64, size: Size, align: Align) {
// Let's see if we even want to remember this address.
if !rng.gen_bool(ADDR_REMEMBER_CHANCE) {
return;
}
// Determine the pool to add this to, and where in the pool to put it.
let subpool = self.subpool(align);
let pos = subpool.partition_point(|(_addr, other_size)| *other_size < size);
// Make sure the pool does not grow too big.
if subpool.len() >= MAX_POOL_SIZE {
// Pool full. Replace existing element, or last one if this would be even bigger.
let clamped_pos = pos.min(subpool.len() - 1);
subpool[clamped_pos] = (addr, size);
return;
}
// Add address to pool, at the right position.
subpool.insert(pos, (addr, size));
}
pub fn take_addr(&mut self, rng: &mut impl Rng, size: Size, align: Align) -> Option<u64> {
// Determine whether we'll even attempt a reuse.
if !rng.gen_bool(ADDR_TAKE_CHANCE) {
return None;
}
// Determine the pool to take this from.
let subpool = self.subpool(align);
// Let's see if we can find something of the right size. We want to find the full range of
// such items, beginning with the first, so we can't use `binary_search_by_key`.
let begin = subpool.partition_point(|(_addr, other_size)| *other_size < size);
let mut end = begin;
while let Some((_addr, other_size)) = subpool.get(end) {
if *other_size != size {
break;
}
end += 1;
}
if end == begin {
// Could not find any item of the right size.
return None;
}
// Pick a random element with the desired size.
let idx = rng.gen_range(begin..end);
// Remove it from the pool and return.
let (chosen_addr, chosen_size) = subpool.remove(idx);
debug_assert!(chosen_size >= size && chosen_addr % align.bytes() == 0);
Some(chosen_addr)
}
}

View File

@ -485,14 +485,14 @@ pub fn before_memory_deallocation<'tcx>(
&mut self,
alloc_id: AllocId,
prov_extra: ProvenanceExtra,
range: AllocRange,
size: Size,
machine: &MiriMachine<'_, 'tcx>,
) -> InterpResult<'tcx> {
match self {
AllocState::StackedBorrows(sb) =>
sb.get_mut().before_memory_deallocation(alloc_id, prov_extra, range, machine),
sb.get_mut().before_memory_deallocation(alloc_id, prov_extra, size, machine),
AllocState::TreeBorrows(tb) =>
tb.get_mut().before_memory_deallocation(alloc_id, prov_extra, range, machine),
tb.get_mut().before_memory_deallocation(alloc_id, prov_extra, size, machine),
}
}

View File

@ -574,13 +574,13 @@ pub fn before_memory_deallocation<'tcx>(
&mut self,
alloc_id: AllocId,
tag: ProvenanceExtra,
range: AllocRange,
size: Size,
machine: &MiriMachine<'_, 'tcx>,
) -> InterpResult<'tcx> {
trace!("deallocation with tag {:?}: {:?}, size {}", tag, alloc_id, range.size.bytes());
trace!("deallocation with tag {:?}: {:?}, size {}", tag, alloc_id, size.bytes());
let dcx = DiagnosticCxBuilder::dealloc(machine, tag);
let state = machine.borrow_tracker.as_ref().unwrap().borrow();
self.for_each(range, dcx, |stack, dcx, exposed_tags| {
self.for_each(alloc_range(Size::ZERO, size), dcx, |stack, dcx, exposed_tags| {
stack.dealloc(tag, &state, dcx, exposed_tags)
})?;
Ok(())

View File

@ -80,7 +80,7 @@ pub fn before_memory_deallocation(
&mut self,
alloc_id: AllocId,
prov: ProvenanceExtra,
range: AllocRange,
size: Size,
machine: &MiriMachine<'_, 'tcx>,
) -> InterpResult<'tcx> {
// TODO: for now we bail out on wildcard pointers. Eventually we should
@ -91,7 +91,7 @@ pub fn before_memory_deallocation(
};
let global = machine.borrow_tracker.as_ref().unwrap();
let span = machine.current_span();
self.dealloc(tag, range, global, alloc_id, span)
self.dealloc(tag, alloc_range(Size::ZERO, size), global, alloc_id, span)
}
pub fn expose_tag(&mut self, _tag: BorTag) {

View File

@ -1071,10 +1071,10 @@ pub fn write<'tcx>(
pub fn deallocate<'tcx>(
&mut self,
alloc_id: AllocId,
range: AllocRange,
size: Size,
machine: &mut MiriMachine<'_, '_>,
) -> InterpResult<'tcx> {
self.unique_access(alloc_id, range, NaWriteType::Deallocate, machine)
self.unique_access(alloc_id, alloc_range(Size::ZERO, size), NaWriteType::Deallocate, machine)
}
}

View File

@ -5,6 +5,8 @@
use std::sync::Mutex;
use std::time::Duration;
use rand::RngCore;
use rustc_apfloat::ieee::{Double, Single};
use rustc_apfloat::Float;
use rustc_hir::def::{DefKind, Namespace};
@ -20,8 +22,6 @@
use rustc_target::abi::{Align, FieldIdx, FieldsShape, Size, Variants};
use rustc_target::spec::abi::Abi;
use rand::RngCore;
use crate::*;
/// Indicates which kind of access is being performed.

View File

@ -72,13 +72,13 @@
#[allow(unused_extern_crates)]
extern crate rustc_driver;
mod alloc_addresses;
mod borrow_tracker;
mod clock;
mod concurrency;
mod diagnostics;
mod eval;
mod helpers;
mod intptrcast;
mod machine;
mod mono_hash_map;
mod operator;
@ -101,6 +101,7 @@
pub use crate::shims::time::EvalContextExt as _;
pub use crate::shims::tls::TlsData;
pub use crate::alloc_addresses::{EvalContextExt as _, ProvenanceMode};
pub use crate::borrow_tracker::stacked_borrows::{
EvalContextExt as _, Item, Permission, Stack, Stacks,
};
@ -122,7 +123,6 @@
create_ecx, eval_entry, AlignmentCheck, BacktraceStyle, IsolatedOp, MiriConfig, RejectOpWith,
};
pub use crate::helpers::{AccessKind, EvalContextExt as _};
pub use crate::intptrcast::{EvalContextExt as _, ProvenanceMode};
pub use crate::machine::{
AllocExtra, FrameExtra, MiriInterpCx, MiriInterpCxExt, MiriMachine, MiriMemoryKind,
PrimitiveLayouts, Provenance, ProvenanceExtra,

View File

@ -435,7 +435,7 @@ pub struct MiriMachine<'mir, 'tcx> {
pub data_race: Option<data_race::GlobalState>,
/// Ptr-int-cast module global data.
pub intptrcast: intptrcast::GlobalState,
pub alloc_addresses: alloc_addresses::GlobalState,
/// Environment variables set by `setenv`.
/// Miri does not expose env vars from the host to the emulated program.
@ -630,7 +630,7 @@ pub(crate) fn new(config: &MiriConfig, layout_cx: LayoutCx<'tcx, TyCtxt<'tcx>>)
tcx,
borrow_tracker,
data_race,
intptrcast: RefCell::new(intptrcast::GlobalStateInner::new(config, stack_addr)),
alloc_addresses: RefCell::new(alloc_addresses::GlobalStateInner::new(config, stack_addr)),
// `env_vars` depends on a full interpreter so we cannot properly initialize it yet.
env_vars: EnvVars::default(),
main_fn_ret_place: None,
@ -777,7 +777,7 @@ fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
dir_handler,
borrow_tracker,
data_race,
intptrcast,
alloc_addresses,
file_handler,
tcx: _,
isolated_op: _,
@ -821,7 +821,7 @@ fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
file_handler.visit_provenance(visit);
data_race.visit_provenance(visit);
borrow_tracker.visit_provenance(visit);
intptrcast.visit_provenance(visit);
alloc_addresses.visit_provenance(visit);
main_fn_ret_place.visit_provenance(visit);
argc.visit_provenance(visit);
argv.visit_provenance(visit);
@ -1282,22 +1282,28 @@ fn before_memory_deallocation(
machine: &mut Self,
alloc_extra: &mut AllocExtra<'tcx>,
(alloc_id, prove_extra): (AllocId, Self::ProvenanceExtra),
range: AllocRange,
size: Size,
align: Align,
) -> InterpResult<'tcx> {
if machine.tracked_alloc_ids.contains(&alloc_id) {
machine.emit_diagnostic(NonHaltingDiagnostic::FreedAlloc(alloc_id));
}
if let Some(data_race) = &mut alloc_extra.data_race {
data_race.deallocate(alloc_id, range, machine)?;
data_race.deallocate(alloc_id, size, machine)?;
}
if let Some(borrow_tracker) = &mut alloc_extra.borrow_tracker {
borrow_tracker.before_memory_deallocation(alloc_id, prove_extra, range, machine)?;
borrow_tracker.before_memory_deallocation(alloc_id, prove_extra, size, machine)?;
}
if let Some((_, deallocated_at)) = machine.allocation_spans.borrow_mut().get_mut(&alloc_id)
{
*deallocated_at = Some(machine.current_span());
}
machine.intptrcast.get_mut().free_alloc_id(alloc_id);
machine.alloc_addresses.get_mut().free_alloc_id(
machine.rng.get_mut(),
alloc_id,
size,
align,
);
Ok(())
}

View File

@ -197,7 +197,7 @@ fn remove_unreachable_allocs(&mut self, allocs: FxHashSet<AllocId>) {
let allocs = LiveAllocs { ecx: this, collected: allocs };
this.machine.allocation_spans.borrow_mut().retain(|id, _| allocs.is_live(*id));
this.machine.symbolic_alignment.borrow_mut().retain(|id, _| allocs.is_live(*id));
this.machine.intptrcast.borrow_mut().remove_unreachable_allocs(&allocs);
this.machine.alloc_addresses.borrow_mut().remove_unreachable_allocs(&allocs);
if let Some(borrow_tracker) = &this.machine.borrow_tracker {
borrow_tracker.borrow_mut().remove_unreachable_allocs(&allocs);
}

View File

@ -0,0 +1,16 @@
//! Check that we do sometimes reuse addresses.
use std::collections::HashSet;
fn main() {
let count = 100;
let mut addrs = HashSet::<usize>::new();
for _ in 0..count {
// We make a `Box` with a layout that's hopefully not used by tons of things inside the
// allocator itself, so that we are more likely to get reuse. (With `i32` or `usize`, on
// Windows the reuse chances are very low.)
let b = Box::new([42usize; 4]);
addrs.insert(&*b as *const [usize; 4] as usize);
}
// dbg!(addrs.len());
assert!(addrs.len() > 1 && addrs.len() < count);
}

View File

@ -67,8 +67,8 @@ fn ptr_eq_dangling() {
drop(b);
let b = Box::new(0);
let y = &*b as *const i32; // different allocation
// They *could* be equal if memory was reused, but probably are not.
assert!(x != y);
// They *could* be equal if memory is reused...
assert!(x != y || x == y);
}
fn ptr_eq_out_of_bounds() {