From c01676835125747b5fff3143ca2a71d2bf7b94c4 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 9 Mar 2024 16:05:37 +0100 Subject: [PATCH] miri: add some chance to reuse addresses of previously freed allocations --- src/tools/miri/src/alloc_addresses/mod.rs | 101 ++++++++++++------ .../miri/src/alloc_addresses/reuse_pool.rs | 87 +++++++++++++++ src/tools/miri/src/helpers.rs | 4 +- src/tools/miri/src/machine.rs | 9 +- src/tools/miri/tests/pass/address-reuse.rs | 16 +++ src/tools/miri/tests/pass/intptrcast.rs | 4 +- 6 files changed, 182 insertions(+), 39 deletions(-) create mode 100644 src/tools/miri/src/alloc_addresses/reuse_pool.rs create mode 100644 src/tools/miri/tests/pass/address-reuse.rs diff --git a/src/tools/miri/src/alloc_addresses/mod.rs b/src/tools/miri/src/alloc_addresses/mod.rs index 3177a1297c8..e1714aa9e46 100644 --- a/src/tools/miri/src/alloc_addresses/mod.rs +++ b/src/tools/miri/src/alloc_addresses/mod.rs @@ -1,6 +1,8 @@ //! This module is responsible for managing the absolute addresses that allocations are located at, //! and for casting between pointers and integers based on those addresses. +mod reuse_pool; + use std::cell::RefCell; use std::cmp::max; use std::collections::hash_map::Entry; @@ -9,9 +11,10 @@ use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_span::Span; -use rustc_target::abi::{HasDataLayout, Size}; +use rustc_target::abi::{Align, HasDataLayout, Size}; use crate::*; +use reuse_pool::ReusePool; #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum ProvenanceMode { @@ -26,7 +29,7 @@ pub enum ProvenanceMode { pub type GlobalState = RefCell; -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct GlobalStateInner { /// This is used as a map between the address of each allocation and its `AllocId`. It is always /// sorted by address. We cannot use a `HashMap` since we can be given an address that is offset @@ -38,6 +41,8 @@ pub struct GlobalStateInner { /// they do not have an `AllocExtra`. /// This is the inverse of `int_to_ptr_map`. base_addr: FxHashMap, + /// A pool of addresses we can reuse for future allocations. + reuse: ReusePool, /// Whether an allocation has been exposed or not. This cannot be put /// into `AllocExtra` for the same reason as `base_addr`. exposed: FxHashSet, @@ -53,6 +58,7 @@ fn visit_provenance(&self, _visit: &mut VisitWith<'_>) { let GlobalStateInner { int_to_ptr_map: _, base_addr: _, + reuse: _, exposed: _, next_base_addr: _, provenance_mode: _, @@ -71,6 +77,7 @@ pub fn new(config: &MiriConfig, stack_addr: u64) -> Self { GlobalStateInner { int_to_ptr_map: Vec::default(), base_addr: FxHashMap::default(), + reuse: ReusePool::new(), exposed: FxHashSet::default(), next_base_addr: stack_addr, provenance_mode: config.provenance_mode, @@ -142,6 +149,7 @@ fn addr_from_alloc_id(&self, alloc_id: AllocId) -> InterpResult<'tcx, u64> { Ok(match global_state.base_addr.entry(alloc_id) { Entry::Occupied(entry) => *entry.get(), Entry::Vacant(entry) => { + let mut rng = ecx.machine.rng.borrow_mut(); let (size, align, kind) = ecx.get_alloc_info(alloc_id); // This is either called immediately after allocation (and then cached), or when // adjusting `tcx` pointers (which never get freed). So assert that we are looking @@ -150,44 +158,63 @@ fn addr_from_alloc_id(&self, alloc_id: AllocId) -> InterpResult<'tcx, u64> { // information was removed. assert!(!matches!(kind, AllocKind::Dead)); - // This allocation does not have a base address yet, pick one. - // Leave some space to the previous allocation, to give it some chance to be less aligned. - let slack = { - let mut rng = ecx.machine.rng.borrow_mut(); - // This means that `(global_state.next_base_addr + slack) % 16` is uniformly distributed. - rng.gen_range(0..16) + // This allocation does not have a base address yet, pick or reuse one. + let base_addr = if let Some(reuse_addr) = + global_state.reuse.take_addr(&mut *rng, size, align) + { + reuse_addr + } else { + // We have to pick a fresh address. + // Leave some space to the previous allocation, to give it some chance to be less aligned. + // We ensure that `(global_state.next_base_addr + slack) % 16` is uniformly distributed. + let slack = rng.gen_range(0..16); + // From next_base_addr + slack, round up to adjust for alignment. + let base_addr = global_state + .next_base_addr + .checked_add(slack) + .ok_or_else(|| err_exhaust!(AddressSpaceFull))?; + let base_addr = align_addr(base_addr, align.bytes()); + + // Remember next base address. If this allocation is zero-sized, leave a gap + // of at least 1 to avoid two allocations having the same base address. + // (The logic in `alloc_id_from_addr` assumes unique addresses, and different + // function/vtable pointers need to be distinguishable!) + global_state.next_base_addr = base_addr + .checked_add(max(size.bytes(), 1)) + .ok_or_else(|| err_exhaust!(AddressSpaceFull))?; + // Even if `Size` didn't overflow, we might still have filled up the address space. + if global_state.next_base_addr > ecx.target_usize_max() { + throw_exhaust!(AddressSpaceFull); + } + + base_addr }; - // From next_base_addr + slack, round up to adjust for alignment. - let base_addr = global_state - .next_base_addr - .checked_add(slack) - .ok_or_else(|| err_exhaust!(AddressSpaceFull))?; - let base_addr = align_addr(base_addr, align.bytes()); - entry.insert(base_addr); trace!( - "Assigning base address {:#x} to allocation {:?} (size: {}, align: {}, slack: {})", + "Assigning base address {:#x} to allocation {:?} (size: {}, align: {})", base_addr, alloc_id, size.bytes(), align.bytes(), - slack, ); - // Remember next base address. If this allocation is zero-sized, leave a gap - // of at least 1 to avoid two allocations having the same base address. - // (The logic in `alloc_id_from_addr` assumes unique addresses, and different - // function/vtable pointers need to be distinguishable!) - global_state.next_base_addr = base_addr - .checked_add(max(size.bytes(), 1)) - .ok_or_else(|| err_exhaust!(AddressSpaceFull))?; - // Even if `Size` didn't overflow, we might still have filled up the address space. - if global_state.next_base_addr > ecx.target_usize_max() { - throw_exhaust!(AddressSpaceFull); - } - // Also maintain the opposite mapping in `int_to_ptr_map`. - // Given that `next_base_addr` increases in each allocation, pushing the - // corresponding tuple keeps `int_to_ptr_map` sorted - global_state.int_to_ptr_map.push((base_addr, alloc_id)); + // Store address in cache. + entry.insert(base_addr); + + // Also maintain the opposite mapping in `int_to_ptr_map`, ensuring we keep it sorted. + // We have a fast-path for the common case that this address is bigger than all previous ones. + let pos = if global_state + .int_to_ptr_map + .last() + .is_some_and(|(last_addr, _)| *last_addr < base_addr) + { + global_state.int_to_ptr_map.len() + } else { + global_state + .int_to_ptr_map + .binary_search_by_key(&base_addr, |(addr, _)| *addr) + .unwrap_err() + }; + global_state.int_to_ptr_map.insert(pos, (base_addr, alloc_id)); base_addr } @@ -302,7 +329,13 @@ fn ptr_get_alloc(&self, ptr: Pointer) -> Option<(AllocId, Size)> { } impl GlobalStateInner { - pub fn free_alloc_id(&mut self, dead_id: AllocId) { + pub fn free_alloc_id( + &mut self, + rng: &mut impl Rng, + dead_id: AllocId, + size: Size, + align: Align, + ) { // We can *not* remove this from `base_addr`, since the interpreter design requires that we // be able to retrieve an AllocId + offset for any memory access *before* we check if the // access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory @@ -322,6 +355,8 @@ pub fn free_alloc_id(&mut self, dead_id: AllocId) { // We can also remove it from `exposed`, since this allocation can anyway not be returned by // `alloc_id_from_addr` any more. self.exposed.remove(&dead_id); + // Also remember this address for future reuse. + self.reuse.add_addr(rng, addr, size, align) } } diff --git a/src/tools/miri/src/alloc_addresses/reuse_pool.rs b/src/tools/miri/src/alloc_addresses/reuse_pool.rs new file mode 100644 index 00000000000..8374d0ec605 --- /dev/null +++ b/src/tools/miri/src/alloc_addresses/reuse_pool.rs @@ -0,0 +1,87 @@ +//! Manages a pool of addresses that can be reused. + +use rand::Rng; + +use rustc_target::abi::{Align, Size}; + +const MAX_POOL_SIZE: usize = 64; + +// Just use fair coins, until we have evidence that other numbers are better. +const ADDR_REMEMBER_CHANCE: f64 = 0.5; +const ADDR_TAKE_CHANCE: f64 = 0.5; + +/// The pool strikes a balance between exploring more possible executions and making it more likely +/// to find bugs. The hypothesis is that bugs are more likely to occur when reuse happens for +/// allocations with the same layout, since that can trigger e.g. ABA issues in a concurrent data +/// structure. Therefore we only reuse allocations when size and alignment match exactly. +#[derive(Debug)] +pub struct ReusePool { + /// The i-th element in `pool` stores allocations of alignment `2^i`. We store these reusable + /// allocations as address-size pairs, the list must be sorted by the size. + /// + /// Each of these maps has at most MAX_POOL_SIZE elements, and since alignment is limited to + /// less than 64 different possible value, that bounds the overall size of the pool. + pool: Vec>, +} + +impl ReusePool { + pub fn new() -> Self { + ReusePool { pool: vec![] } + } + + fn subpool(&mut self, align: Align) -> &mut Vec<(u64, Size)> { + let pool_idx: usize = align.bytes().trailing_zeros().try_into().unwrap(); + if self.pool.len() <= pool_idx { + self.pool.resize(pool_idx + 1, Vec::new()); + } + &mut self.pool[pool_idx] + } + + pub fn add_addr(&mut self, rng: &mut impl Rng, addr: u64, size: Size, align: Align) { + // Let's see if we even want to remember this address. + if !rng.gen_bool(ADDR_REMEMBER_CHANCE) { + return; + } + // Determine the pool to add this to, and where in the pool to put it. + let subpool = self.subpool(align); + let pos = subpool.partition_point(|(_addr, other_size)| *other_size < size); + // Make sure the pool does not grow too big. + if subpool.len() >= MAX_POOL_SIZE { + // Pool full. Replace existing element, or last one if this would be even bigger. + let clamped_pos = pos.min(subpool.len() - 1); + subpool[clamped_pos] = (addr, size); + return; + } + // Add address to pool, at the right position. + subpool.insert(pos, (addr, size)); + } + + pub fn take_addr(&mut self, rng: &mut impl Rng, size: Size, align: Align) -> Option { + // Determine whether we'll even attempt a reuse. + if !rng.gen_bool(ADDR_TAKE_CHANCE) { + return None; + } + // Determine the pool to take this from. + let subpool = self.subpool(align); + // Let's see if we can find something of the right size. We want to find the full range of + // such items, beginning with the first, so we can't use `binary_search_by_key`. + let begin = subpool.partition_point(|(_addr, other_size)| *other_size < size); + let mut end = begin; + while let Some((_addr, other_size)) = subpool.get(end) { + if *other_size != size { + break; + } + end += 1; + } + if end == begin { + // Could not find any item of the right size. + return None; + } + // Pick a random element with the desired size. + let idx = rng.gen_range(begin..end); + // Remove it from the pool and return. + let (chosen_addr, chosen_size) = subpool.remove(idx); + debug_assert!(chosen_size >= size && chosen_addr % align.bytes() == 0); + Some(chosen_addr) + } +} diff --git a/src/tools/miri/src/helpers.rs b/src/tools/miri/src/helpers.rs index 9e4b5fe8ad7..371dc6edda3 100644 --- a/src/tools/miri/src/helpers.rs +++ b/src/tools/miri/src/helpers.rs @@ -3,6 +3,8 @@ use std::num::NonZero; use std::time::Duration; +use rand::RngCore; + use rustc_apfloat::ieee::{Double, Single}; use rustc_apfloat::Float; use rustc_hir::def::{DefKind, Namespace}; @@ -18,8 +20,6 @@ use rustc_target::abi::{Align, FieldIdx, FieldsShape, Size, Variants}; use rustc_target::spec::abi::Abi; -use rand::RngCore; - use crate::*; /// Indicates which kind of access is being performed. diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs index 3a54629222d..53f30ec2ba4 100644 --- a/src/tools/miri/src/machine.rs +++ b/src/tools/miri/src/machine.rs @@ -1289,7 +1289,7 @@ fn before_memory_deallocation( alloc_extra: &mut AllocExtra<'tcx>, (alloc_id, prove_extra): (AllocId, Self::ProvenanceExtra), size: Size, - _align: Align, + align: Align, ) -> InterpResult<'tcx> { if machine.tracked_alloc_ids.contains(&alloc_id) { machine.emit_diagnostic(NonHaltingDiagnostic::FreedAlloc(alloc_id)); @@ -1304,7 +1304,12 @@ fn before_memory_deallocation( { *deallocated_at = Some(machine.current_span()); } - machine.alloc_addresses.get_mut().free_alloc_id(alloc_id); + machine.alloc_addresses.get_mut().free_alloc_id( + machine.rng.get_mut(), + alloc_id, + size, + align, + ); Ok(()) } diff --git a/src/tools/miri/tests/pass/address-reuse.rs b/src/tools/miri/tests/pass/address-reuse.rs new file mode 100644 index 00000000000..9b5c8c38b8f --- /dev/null +++ b/src/tools/miri/tests/pass/address-reuse.rs @@ -0,0 +1,16 @@ +//! Check that we do sometimes reuse addresses. +use std::collections::HashSet; + +fn main() { + let count = 100; + let mut addrs = HashSet::::new(); + for _ in 0..count { + // We make a `Box` with a layout that's hopefully not used by tons of things inside the + // allocator itself, so that we are more likely to get reuse. (With `i32` or `usize`, on + // Windows the reuse chances are very low.) + let b = Box::new([42usize; 4]); + addrs.insert(&*b as *const [usize; 4] as usize); + } + // dbg!(addrs.len()); + assert!(addrs.len() > 1 && addrs.len() < count); +} diff --git a/src/tools/miri/tests/pass/intptrcast.rs b/src/tools/miri/tests/pass/intptrcast.rs index 42b6f433420..370b09f512c 100644 --- a/src/tools/miri/tests/pass/intptrcast.rs +++ b/src/tools/miri/tests/pass/intptrcast.rs @@ -67,8 +67,8 @@ fn ptr_eq_dangling() { drop(b); let b = Box::new(0); let y = &*b as *const i32; // different allocation - // They *could* be equal if memory was reused, but probably are not. - assert!(x != y); + // They *could* be equal if memory is reused... + assert!(x != y || x == y); } fn ptr_eq_out_of_bounds() {