miri: add some chance to reuse addresses of previously freed allocations
This commit is contained in:
parent
092bd53c1e
commit
c016768351
@ -1,6 +1,8 @@
|
|||||||
//! This module is responsible for managing the absolute addresses that allocations are located at,
|
//! This module is responsible for managing the absolute addresses that allocations are located at,
|
||||||
//! and for casting between pointers and integers based on those addresses.
|
//! and for casting between pointers and integers based on those addresses.
|
||||||
|
|
||||||
|
mod reuse_pool;
|
||||||
|
|
||||||
use std::cell::RefCell;
|
use std::cell::RefCell;
|
||||||
use std::cmp::max;
|
use std::cmp::max;
|
||||||
use std::collections::hash_map::Entry;
|
use std::collections::hash_map::Entry;
|
||||||
@ -9,9 +11,10 @@
|
|||||||
|
|
||||||
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
|
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
|
||||||
use rustc_span::Span;
|
use rustc_span::Span;
|
||||||
use rustc_target::abi::{HasDataLayout, Size};
|
use rustc_target::abi::{Align, HasDataLayout, Size};
|
||||||
|
|
||||||
use crate::*;
|
use crate::*;
|
||||||
|
use reuse_pool::ReusePool;
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||||
pub enum ProvenanceMode {
|
pub enum ProvenanceMode {
|
||||||
@ -26,7 +29,7 @@ pub enum ProvenanceMode {
|
|||||||
|
|
||||||
pub type GlobalState = RefCell<GlobalStateInner>;
|
pub type GlobalState = RefCell<GlobalStateInner>;
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Debug)]
|
||||||
pub struct GlobalStateInner {
|
pub struct GlobalStateInner {
|
||||||
/// This is used as a map between the address of each allocation and its `AllocId`. It is always
|
/// This is used as a map between the address of each allocation and its `AllocId`. It is always
|
||||||
/// sorted by address. We cannot use a `HashMap` since we can be given an address that is offset
|
/// sorted by address. We cannot use a `HashMap` since we can be given an address that is offset
|
||||||
@ -38,6 +41,8 @@ pub struct GlobalStateInner {
|
|||||||
/// they do not have an `AllocExtra`.
|
/// they do not have an `AllocExtra`.
|
||||||
/// This is the inverse of `int_to_ptr_map`.
|
/// This is the inverse of `int_to_ptr_map`.
|
||||||
base_addr: FxHashMap<AllocId, u64>,
|
base_addr: FxHashMap<AllocId, u64>,
|
||||||
|
/// A pool of addresses we can reuse for future allocations.
|
||||||
|
reuse: ReusePool,
|
||||||
/// Whether an allocation has been exposed or not. This cannot be put
|
/// Whether an allocation has been exposed or not. This cannot be put
|
||||||
/// into `AllocExtra` for the same reason as `base_addr`.
|
/// into `AllocExtra` for the same reason as `base_addr`.
|
||||||
exposed: FxHashSet<AllocId>,
|
exposed: FxHashSet<AllocId>,
|
||||||
@ -53,6 +58,7 @@ fn visit_provenance(&self, _visit: &mut VisitWith<'_>) {
|
|||||||
let GlobalStateInner {
|
let GlobalStateInner {
|
||||||
int_to_ptr_map: _,
|
int_to_ptr_map: _,
|
||||||
base_addr: _,
|
base_addr: _,
|
||||||
|
reuse: _,
|
||||||
exposed: _,
|
exposed: _,
|
||||||
next_base_addr: _,
|
next_base_addr: _,
|
||||||
provenance_mode: _,
|
provenance_mode: _,
|
||||||
@ -71,6 +77,7 @@ pub fn new(config: &MiriConfig, stack_addr: u64) -> Self {
|
|||||||
GlobalStateInner {
|
GlobalStateInner {
|
||||||
int_to_ptr_map: Vec::default(),
|
int_to_ptr_map: Vec::default(),
|
||||||
base_addr: FxHashMap::default(),
|
base_addr: FxHashMap::default(),
|
||||||
|
reuse: ReusePool::new(),
|
||||||
exposed: FxHashSet::default(),
|
exposed: FxHashSet::default(),
|
||||||
next_base_addr: stack_addr,
|
next_base_addr: stack_addr,
|
||||||
provenance_mode: config.provenance_mode,
|
provenance_mode: config.provenance_mode,
|
||||||
@ -142,6 +149,7 @@ fn addr_from_alloc_id(&self, alloc_id: AllocId) -> InterpResult<'tcx, u64> {
|
|||||||
Ok(match global_state.base_addr.entry(alloc_id) {
|
Ok(match global_state.base_addr.entry(alloc_id) {
|
||||||
Entry::Occupied(entry) => *entry.get(),
|
Entry::Occupied(entry) => *entry.get(),
|
||||||
Entry::Vacant(entry) => {
|
Entry::Vacant(entry) => {
|
||||||
|
let mut rng = ecx.machine.rng.borrow_mut();
|
||||||
let (size, align, kind) = ecx.get_alloc_info(alloc_id);
|
let (size, align, kind) = ecx.get_alloc_info(alloc_id);
|
||||||
// This is either called immediately after allocation (and then cached), or when
|
// This is either called immediately after allocation (and then cached), or when
|
||||||
// adjusting `tcx` pointers (which never get freed). So assert that we are looking
|
// adjusting `tcx` pointers (which never get freed). So assert that we are looking
|
||||||
@ -150,28 +158,22 @@ fn addr_from_alloc_id(&self, alloc_id: AllocId) -> InterpResult<'tcx, u64> {
|
|||||||
// information was removed.
|
// information was removed.
|
||||||
assert!(!matches!(kind, AllocKind::Dead));
|
assert!(!matches!(kind, AllocKind::Dead));
|
||||||
|
|
||||||
// This allocation does not have a base address yet, pick one.
|
// This allocation does not have a base address yet, pick or reuse one.
|
||||||
|
let base_addr = if let Some(reuse_addr) =
|
||||||
|
global_state.reuse.take_addr(&mut *rng, size, align)
|
||||||
|
{
|
||||||
|
reuse_addr
|
||||||
|
} else {
|
||||||
|
// We have to pick a fresh address.
|
||||||
// Leave some space to the previous allocation, to give it some chance to be less aligned.
|
// Leave some space to the previous allocation, to give it some chance to be less aligned.
|
||||||
let slack = {
|
// We ensure that `(global_state.next_base_addr + slack) % 16` is uniformly distributed.
|
||||||
let mut rng = ecx.machine.rng.borrow_mut();
|
let slack = rng.gen_range(0..16);
|
||||||
// This means that `(global_state.next_base_addr + slack) % 16` is uniformly distributed.
|
|
||||||
rng.gen_range(0..16)
|
|
||||||
};
|
|
||||||
// From next_base_addr + slack, round up to adjust for alignment.
|
// From next_base_addr + slack, round up to adjust for alignment.
|
||||||
let base_addr = global_state
|
let base_addr = global_state
|
||||||
.next_base_addr
|
.next_base_addr
|
||||||
.checked_add(slack)
|
.checked_add(slack)
|
||||||
.ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
|
.ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
|
||||||
let base_addr = align_addr(base_addr, align.bytes());
|
let base_addr = align_addr(base_addr, align.bytes());
|
||||||
entry.insert(base_addr);
|
|
||||||
trace!(
|
|
||||||
"Assigning base address {:#x} to allocation {:?} (size: {}, align: {}, slack: {})",
|
|
||||||
base_addr,
|
|
||||||
alloc_id,
|
|
||||||
size.bytes(),
|
|
||||||
align.bytes(),
|
|
||||||
slack,
|
|
||||||
);
|
|
||||||
|
|
||||||
// Remember next base address. If this allocation is zero-sized, leave a gap
|
// Remember next base address. If this allocation is zero-sized, leave a gap
|
||||||
// of at least 1 to avoid two allocations having the same base address.
|
// of at least 1 to avoid two allocations having the same base address.
|
||||||
@ -184,10 +186,35 @@ fn addr_from_alloc_id(&self, alloc_id: AllocId) -> InterpResult<'tcx, u64> {
|
|||||||
if global_state.next_base_addr > ecx.target_usize_max() {
|
if global_state.next_base_addr > ecx.target_usize_max() {
|
||||||
throw_exhaust!(AddressSpaceFull);
|
throw_exhaust!(AddressSpaceFull);
|
||||||
}
|
}
|
||||||
// Also maintain the opposite mapping in `int_to_ptr_map`.
|
|
||||||
// Given that `next_base_addr` increases in each allocation, pushing the
|
base_addr
|
||||||
// corresponding tuple keeps `int_to_ptr_map` sorted
|
};
|
||||||
global_state.int_to_ptr_map.push((base_addr, alloc_id));
|
trace!(
|
||||||
|
"Assigning base address {:#x} to allocation {:?} (size: {}, align: {})",
|
||||||
|
base_addr,
|
||||||
|
alloc_id,
|
||||||
|
size.bytes(),
|
||||||
|
align.bytes(),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Store address in cache.
|
||||||
|
entry.insert(base_addr);
|
||||||
|
|
||||||
|
// Also maintain the opposite mapping in `int_to_ptr_map`, ensuring we keep it sorted.
|
||||||
|
// We have a fast-path for the common case that this address is bigger than all previous ones.
|
||||||
|
let pos = if global_state
|
||||||
|
.int_to_ptr_map
|
||||||
|
.last()
|
||||||
|
.is_some_and(|(last_addr, _)| *last_addr < base_addr)
|
||||||
|
{
|
||||||
|
global_state.int_to_ptr_map.len()
|
||||||
|
} else {
|
||||||
|
global_state
|
||||||
|
.int_to_ptr_map
|
||||||
|
.binary_search_by_key(&base_addr, |(addr, _)| *addr)
|
||||||
|
.unwrap_err()
|
||||||
|
};
|
||||||
|
global_state.int_to_ptr_map.insert(pos, (base_addr, alloc_id));
|
||||||
|
|
||||||
base_addr
|
base_addr
|
||||||
}
|
}
|
||||||
@ -302,7 +329,13 @@ fn ptr_get_alloc(&self, ptr: Pointer<Provenance>) -> Option<(AllocId, Size)> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl GlobalStateInner {
|
impl GlobalStateInner {
|
||||||
pub fn free_alloc_id(&mut self, dead_id: AllocId) {
|
pub fn free_alloc_id(
|
||||||
|
&mut self,
|
||||||
|
rng: &mut impl Rng,
|
||||||
|
dead_id: AllocId,
|
||||||
|
size: Size,
|
||||||
|
align: Align,
|
||||||
|
) {
|
||||||
// We can *not* remove this from `base_addr`, since the interpreter design requires that we
|
// We can *not* remove this from `base_addr`, since the interpreter design requires that we
|
||||||
// be able to retrieve an AllocId + offset for any memory access *before* we check if the
|
// be able to retrieve an AllocId + offset for any memory access *before* we check if the
|
||||||
// access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory
|
// access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory
|
||||||
@ -322,6 +355,8 @@ pub fn free_alloc_id(&mut self, dead_id: AllocId) {
|
|||||||
// We can also remove it from `exposed`, since this allocation can anyway not be returned by
|
// We can also remove it from `exposed`, since this allocation can anyway not be returned by
|
||||||
// `alloc_id_from_addr` any more.
|
// `alloc_id_from_addr` any more.
|
||||||
self.exposed.remove(&dead_id);
|
self.exposed.remove(&dead_id);
|
||||||
|
// Also remember this address for future reuse.
|
||||||
|
self.reuse.add_addr(rng, addr, size, align)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
87
src/tools/miri/src/alloc_addresses/reuse_pool.rs
Normal file
87
src/tools/miri/src/alloc_addresses/reuse_pool.rs
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
//! Manages a pool of addresses that can be reused.
|
||||||
|
|
||||||
|
use rand::Rng;
|
||||||
|
|
||||||
|
use rustc_target::abi::{Align, Size};
|
||||||
|
|
||||||
|
const MAX_POOL_SIZE: usize = 64;
|
||||||
|
|
||||||
|
// Just use fair coins, until we have evidence that other numbers are better.
|
||||||
|
const ADDR_REMEMBER_CHANCE: f64 = 0.5;
|
||||||
|
const ADDR_TAKE_CHANCE: f64 = 0.5;
|
||||||
|
|
||||||
|
/// The pool strikes a balance between exploring more possible executions and making it more likely
|
||||||
|
/// to find bugs. The hypothesis is that bugs are more likely to occur when reuse happens for
|
||||||
|
/// allocations with the same layout, since that can trigger e.g. ABA issues in a concurrent data
|
||||||
|
/// structure. Therefore we only reuse allocations when size and alignment match exactly.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct ReusePool {
|
||||||
|
/// The i-th element in `pool` stores allocations of alignment `2^i`. We store these reusable
|
||||||
|
/// allocations as address-size pairs, the list must be sorted by the size.
|
||||||
|
///
|
||||||
|
/// Each of these maps has at most MAX_POOL_SIZE elements, and since alignment is limited to
|
||||||
|
/// less than 64 different possible value, that bounds the overall size of the pool.
|
||||||
|
pool: Vec<Vec<(u64, Size)>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ReusePool {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
ReusePool { pool: vec![] }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn subpool(&mut self, align: Align) -> &mut Vec<(u64, Size)> {
|
||||||
|
let pool_idx: usize = align.bytes().trailing_zeros().try_into().unwrap();
|
||||||
|
if self.pool.len() <= pool_idx {
|
||||||
|
self.pool.resize(pool_idx + 1, Vec::new());
|
||||||
|
}
|
||||||
|
&mut self.pool[pool_idx]
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add_addr(&mut self, rng: &mut impl Rng, addr: u64, size: Size, align: Align) {
|
||||||
|
// Let's see if we even want to remember this address.
|
||||||
|
if !rng.gen_bool(ADDR_REMEMBER_CHANCE) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Determine the pool to add this to, and where in the pool to put it.
|
||||||
|
let subpool = self.subpool(align);
|
||||||
|
let pos = subpool.partition_point(|(_addr, other_size)| *other_size < size);
|
||||||
|
// Make sure the pool does not grow too big.
|
||||||
|
if subpool.len() >= MAX_POOL_SIZE {
|
||||||
|
// Pool full. Replace existing element, or last one if this would be even bigger.
|
||||||
|
let clamped_pos = pos.min(subpool.len() - 1);
|
||||||
|
subpool[clamped_pos] = (addr, size);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Add address to pool, at the right position.
|
||||||
|
subpool.insert(pos, (addr, size));
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn take_addr(&mut self, rng: &mut impl Rng, size: Size, align: Align) -> Option<u64> {
|
||||||
|
// Determine whether we'll even attempt a reuse.
|
||||||
|
if !rng.gen_bool(ADDR_TAKE_CHANCE) {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
// Determine the pool to take this from.
|
||||||
|
let subpool = self.subpool(align);
|
||||||
|
// Let's see if we can find something of the right size. We want to find the full range of
|
||||||
|
// such items, beginning with the first, so we can't use `binary_search_by_key`.
|
||||||
|
let begin = subpool.partition_point(|(_addr, other_size)| *other_size < size);
|
||||||
|
let mut end = begin;
|
||||||
|
while let Some((_addr, other_size)) = subpool.get(end) {
|
||||||
|
if *other_size != size {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
end += 1;
|
||||||
|
}
|
||||||
|
if end == begin {
|
||||||
|
// Could not find any item of the right size.
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
// Pick a random element with the desired size.
|
||||||
|
let idx = rng.gen_range(begin..end);
|
||||||
|
// Remove it from the pool and return.
|
||||||
|
let (chosen_addr, chosen_size) = subpool.remove(idx);
|
||||||
|
debug_assert!(chosen_size >= size && chosen_addr % align.bytes() == 0);
|
||||||
|
Some(chosen_addr)
|
||||||
|
}
|
||||||
|
}
|
@ -3,6 +3,8 @@
|
|||||||
use std::num::NonZero;
|
use std::num::NonZero;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use rand::RngCore;
|
||||||
|
|
||||||
use rustc_apfloat::ieee::{Double, Single};
|
use rustc_apfloat::ieee::{Double, Single};
|
||||||
use rustc_apfloat::Float;
|
use rustc_apfloat::Float;
|
||||||
use rustc_hir::def::{DefKind, Namespace};
|
use rustc_hir::def::{DefKind, Namespace};
|
||||||
@ -18,8 +20,6 @@
|
|||||||
use rustc_target::abi::{Align, FieldIdx, FieldsShape, Size, Variants};
|
use rustc_target::abi::{Align, FieldIdx, FieldsShape, Size, Variants};
|
||||||
use rustc_target::spec::abi::Abi;
|
use rustc_target::spec::abi::Abi;
|
||||||
|
|
||||||
use rand::RngCore;
|
|
||||||
|
|
||||||
use crate::*;
|
use crate::*;
|
||||||
|
|
||||||
/// Indicates which kind of access is being performed.
|
/// Indicates which kind of access is being performed.
|
||||||
|
@ -1289,7 +1289,7 @@ fn before_memory_deallocation(
|
|||||||
alloc_extra: &mut AllocExtra<'tcx>,
|
alloc_extra: &mut AllocExtra<'tcx>,
|
||||||
(alloc_id, prove_extra): (AllocId, Self::ProvenanceExtra),
|
(alloc_id, prove_extra): (AllocId, Self::ProvenanceExtra),
|
||||||
size: Size,
|
size: Size,
|
||||||
_align: Align,
|
align: Align,
|
||||||
) -> InterpResult<'tcx> {
|
) -> InterpResult<'tcx> {
|
||||||
if machine.tracked_alloc_ids.contains(&alloc_id) {
|
if machine.tracked_alloc_ids.contains(&alloc_id) {
|
||||||
machine.emit_diagnostic(NonHaltingDiagnostic::FreedAlloc(alloc_id));
|
machine.emit_diagnostic(NonHaltingDiagnostic::FreedAlloc(alloc_id));
|
||||||
@ -1304,7 +1304,12 @@ fn before_memory_deallocation(
|
|||||||
{
|
{
|
||||||
*deallocated_at = Some(machine.current_span());
|
*deallocated_at = Some(machine.current_span());
|
||||||
}
|
}
|
||||||
machine.alloc_addresses.get_mut().free_alloc_id(alloc_id);
|
machine.alloc_addresses.get_mut().free_alloc_id(
|
||||||
|
machine.rng.get_mut(),
|
||||||
|
alloc_id,
|
||||||
|
size,
|
||||||
|
align,
|
||||||
|
);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
16
src/tools/miri/tests/pass/address-reuse.rs
Normal file
16
src/tools/miri/tests/pass/address-reuse.rs
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
//! Check that we do sometimes reuse addresses.
|
||||||
|
use std::collections::HashSet;
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let count = 100;
|
||||||
|
let mut addrs = HashSet::<usize>::new();
|
||||||
|
for _ in 0..count {
|
||||||
|
// We make a `Box` with a layout that's hopefully not used by tons of things inside the
|
||||||
|
// allocator itself, so that we are more likely to get reuse. (With `i32` or `usize`, on
|
||||||
|
// Windows the reuse chances are very low.)
|
||||||
|
let b = Box::new([42usize; 4]);
|
||||||
|
addrs.insert(&*b as *const [usize; 4] as usize);
|
||||||
|
}
|
||||||
|
// dbg!(addrs.len());
|
||||||
|
assert!(addrs.len() > 1 && addrs.len() < count);
|
||||||
|
}
|
@ -67,8 +67,8 @@ fn ptr_eq_dangling() {
|
|||||||
drop(b);
|
drop(b);
|
||||||
let b = Box::new(0);
|
let b = Box::new(0);
|
||||||
let y = &*b as *const i32; // different allocation
|
let y = &*b as *const i32; // different allocation
|
||||||
// They *could* be equal if memory was reused, but probably are not.
|
// They *could* be equal if memory is reused...
|
||||||
assert!(x != y);
|
assert!(x != y || x == y);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn ptr_eq_out_of_bounds() {
|
fn ptr_eq_out_of_bounds() {
|
||||||
|
Loading…
Reference in New Issue
Block a user