rust/src/intptrcast.rs

262 lines
10 KiB
Rust
Raw Normal View History

use std::cell::RefCell;
2020-03-02 22:36:15 +01:00
use std::collections::hash_map::Entry;
2019-06-20 14:21:47 -05:00
use log::trace;
use rand::Rng;
2022-05-22 15:22:05 -05:00
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
2021-05-16 11:28:01 +02:00
use rustc_target::abi::{HasDataLayout, Size};
2019-06-20 14:21:47 -05:00
use crate::*;
2022-05-22 15:22:05 -05:00
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum ProvenanceMode {
/// Int2ptr casts return pointers with "wildcard" provenance
/// that basically matches that of all exposed pointers
/// (and SB tags, if enabled).
Permissive,
/// Int2ptr casts return pointers with an invalid provenance,
/// i.e., not valid for any memory access.
Strict,
/// Int2ptr casts determine the allocation they point to at cast time.
/// All allocations are considered exposed.
Legacy,
}
pub type GlobalState = RefCell<GlobalStateInner>;
2019-06-20 14:21:47 -05:00
#[derive(Clone, Debug)]
pub struct GlobalStateInner {
/// This is used as a map between the address of each allocation and its `AllocId`.
/// It is always sorted
2022-04-01 14:10:24 -04:00
int_to_ptr_map: Vec<(u64, AllocId)>,
/// The base address for each allocation. We cannot put that into
/// `AllocExtra` because function pointers also have a base address, and
/// they do not have an `AllocExtra`.
/// This is the inverse of `int_to_ptr_map`.
2022-04-01 14:10:24 -04:00
base_addr: FxHashMap<AllocId, u64>,
2022-05-22 15:22:05 -05:00
/// Whether an allocation has been exposed or not. This cannot be put
/// into `AllocExtra` for the same reason as `base_addr`.
exposed: FxHashSet<AllocId>,
/// This is used as a memory address when a new pointer is casted to an integer. It
/// is always larger than any address that was previously made part of a block.
2022-04-01 14:10:24 -04:00
next_base_addr: u64,
2022-05-22 15:22:05 -05:00
/// The provenance to use for int2ptr casts
provenance_mode: ProvenanceMode,
2019-06-20 14:21:47 -05:00
}
impl GlobalStateInner {
2022-04-01 14:10:24 -04:00
pub fn new(config: &MiriConfig) -> Self {
GlobalStateInner {
int_to_ptr_map: Vec::default(),
2020-03-02 22:36:15 +01:00
base_addr: FxHashMap::default(),
2022-05-22 15:22:05 -05:00
exposed: FxHashSet::default(),
2019-06-29 13:33:47 +02:00
next_base_addr: STACK_ADDR,
2022-05-22 15:22:05 -05:00
provenance_mode: config.provenance_mode,
2019-06-20 14:21:47 -05:00
}
}
}
impl<'mir, 'tcx> GlobalStateInner {
2022-05-22 15:22:05 -05:00
// Returns the exposed `AllocId` that corresponds to the specified addr,
// or `None` if the addr is out of bounds
fn alloc_id_from_addr(ecx: &MiriEvalContext<'mir, 'tcx>, addr: u64) -> Option<AllocId> {
let global_state = ecx.machine.intptrcast.borrow();
2022-05-22 15:22:05 -05:00
assert!(global_state.provenance_mode != ProvenanceMode::Strict);
2022-04-01 14:10:24 -04:00
let pos = global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr);
2022-05-22 15:22:05 -05:00
2021-07-15 20:33:08 +02:00
let alloc_id = match pos {
Ok(pos) => Some(global_state.int_to_ptr_map[pos].1),
Err(0) => None,
Err(pos) => {
2019-06-25 14:07:23 -05:00
// This is the largest of the adresses smaller than `int`,
// i.e. the greatest lower bound (glb)
let (glb, alloc_id) = global_state.int_to_ptr_map[pos - 1];
2021-07-15 20:33:08 +02:00
// This never overflows because `addr >= glb`
let offset = addr - glb;
// If the offset exceeds the size of the allocation, don't use this `alloc_id`.
2022-05-22 15:22:05 -05:00
2021-07-15 20:33:08 +02:00
if offset
<= ecx
.get_alloc_size_and_align(alloc_id, AllocCheck::MaybeDead)
.unwrap()
.0
.bytes()
2021-07-15 20:33:08 +02:00
{
Some(alloc_id)
} else {
2021-07-15 20:33:08 +02:00
None
2019-09-05 18:17:58 +02:00
}
}
2022-05-22 15:22:05 -05:00
}?;
// In legacy mode, we consider all allocations exposed.
if global_state.provenance_mode == ProvenanceMode::Legacy
|| global_state.exposed.contains(&alloc_id)
{
Some(alloc_id)
} else {
None
}
}
2022-06-24 16:45:22 -04:00
pub fn expose_ptr(ecx: &mut MiriEvalContext<'mir, 'tcx>, alloc_id: AllocId, sb: SbTag) {
let global_state = ecx.machine.intptrcast.get_mut();
2022-05-23 09:17:04 +02:00
// In legacy and strict mode, we don't need this, so we can save some cycles
// by not tracking it.
2022-05-22 15:22:05 -05:00
if global_state.provenance_mode == ProvenanceMode::Permissive {
trace!("Exposing allocation id {alloc_id:?}");
2022-05-22 15:22:05 -05:00
global_state.exposed.insert(alloc_id);
2022-06-24 16:45:22 -04:00
if ecx.machine.stacked_borrows.is_some() {
ecx.expose_tag(alloc_id, sb);
}
2022-05-22 15:22:05 -05:00
}
}
pub fn ptr_from_addr_transmute(
ecx: &MiriEvalContext<'mir, 'tcx>,
addr: u64,
) -> Pointer<Option<Tag>> {
trace!("Transmuting 0x{:x} to a pointer", addr);
if ecx.machine.allow_ptr_int_transmute {
// When we allow transmutes, treat them like casts.
Self::ptr_from_addr_cast(ecx, addr)
} else {
// We consider transmuted pointers to be "invalid" (`None` provenance).
Pointer::new(None, Size::from_bytes(addr))
2022-05-22 15:22:05 -05:00
}
}
pub fn ptr_from_addr_cast(
ecx: &MiriEvalContext<'mir, 'tcx>,
addr: u64,
) -> Pointer<Option<Tag>> {
trace!("Casting 0x{:x} to a pointer", addr);
let global_state = ecx.machine.intptrcast.borrow();
2022-05-23 09:17:04 +02:00
match global_state.provenance_mode {
ProvenanceMode::Legacy => {
// Determine the allocation this points to at cast time.
let alloc_id = Self::alloc_id_from_addr(ecx, addr);
Pointer::new(
alloc_id.map(|alloc_id| Tag::Concrete { alloc_id, sb: SbTag::Untagged }),
2022-05-23 09:17:04 +02:00
Size::from_bytes(addr),
)
}
ProvenanceMode::Strict => {
// We don't support int2ptr casts in this mode (i.e., we treat them like
// transmutes).
Pointer::new(None, Size::from_bytes(addr))
}
ProvenanceMode::Permissive => {
// This is how wildcard pointers are born.
Pointer::new(Some(Tag::Wildcard), Size::from_bytes(addr))
}
2022-05-22 15:22:05 -05:00
}
}
fn alloc_base_addr(ecx: &MiriEvalContext<'mir, 'tcx>, alloc_id: AllocId) -> u64 {
let mut global_state = ecx.machine.intptrcast.borrow_mut();
let global_state = &mut *global_state;
2021-07-15 20:33:08 +02:00
match global_state.base_addr.entry(alloc_id) {
Entry::Occupied(entry) => *entry.get(),
Entry::Vacant(entry) => {
2021-07-15 20:33:08 +02:00
// There is nothing wrong with a raw pointer being cast to an integer only after
// it became dangling. Hence `MaybeDead`.
let (size, align) =
ecx.get_alloc_size_and_align(alloc_id, AllocCheck::MaybeDead).unwrap();
2021-07-15 20:33:08 +02:00
2019-06-25 14:07:23 -05:00
// This allocation does not have a base address yet, pick one.
// Leave some space to the previous allocation, to give it some chance to be less aligned.
let slack = {
let mut rng = ecx.machine.rng.borrow_mut();
2019-06-28 10:23:29 +02:00
// This means that `(global_state.next_base_addr + slack) % 16` is uniformly distributed.
2021-04-04 11:45:09 +02:00
rng.gen_range(0..16)
};
// From next_base_addr + slack, round up to adjust for alignment.
2019-07-23 23:43:37 +02:00
let base_addr = global_state.next_base_addr.checked_add(slack).unwrap();
let base_addr = Self::align_addr(base_addr, align.bytes());
entry.insert(base_addr);
2019-06-30 21:06:32 +02:00
trace!(
2021-07-15 20:33:08 +02:00
"Assigning base address {:#x} to allocation {:?} (size: {}, align: {}, slack: {})",
2019-12-23 12:56:23 +01:00
base_addr,
2021-07-15 20:33:08 +02:00
alloc_id,
size.bytes(),
2019-12-23 12:56:23 +01:00
align.bytes(),
2021-07-15 20:33:08 +02:00
slack,
2019-06-30 21:06:32 +02:00
);
// Remember next base address. Leave a gap of at least 1 to avoid two zero-sized allocations
// having the same base address, and to avoid ambiguous provenance for the address between two
2021-12-05 20:33:20 -05:00
// allocations (also see https://github.com/rust-lang/unsafe-code-guidelines/issues/313).
let size_plus_1 = size.bytes().checked_add(1).unwrap();
global_state.next_base_addr = base_addr.checked_add(size_plus_1).unwrap();
// Given that `next_base_addr` increases in each allocation, pushing the
// corresponding tuple keeps `int_to_ptr_map` sorted
2021-07-15 20:33:08 +02:00
global_state.int_to_ptr_map.push((base_addr, alloc_id));
base_addr
}
2021-07-15 20:33:08 +02:00
}
}
/// Convert a relative (tcx) pointer to an absolute address.
pub fn rel_ptr_to_addr(ecx: &MiriEvalContext<'mir, 'tcx>, ptr: Pointer<AllocId>) -> u64 {
let (alloc_id, offset) = ptr.into_parts(); // offset is relative (AllocId provenance)
let base_addr = GlobalStateInner::alloc_base_addr(ecx, alloc_id);
// Add offset with the right kind of pointer-overflowing arithmetic.
let dl = ecx.data_layout();
2021-07-15 20:33:08 +02:00
dl.overflowing_offset(base_addr, offset.bytes()).0
}
2022-05-22 15:22:05 -05:00
pub fn abs_ptr_to_rel(
ecx: &MiriEvalContext<'mir, 'tcx>,
ptr: Pointer<Tag>,
) -> Option<(AllocId, Size)> {
let (tag, addr) = ptr.into_parts(); // addr is absolute (Tag provenance)
2022-05-22 15:22:05 -05:00
let alloc_id = if let Tag::Concrete { alloc_id, .. } = tag {
alloc_id
2022-05-22 15:22:05 -05:00
} else {
2022-05-23 09:17:04 +02:00
// A wildcard pointer.
assert_eq!(ecx.machine.intptrcast.borrow().provenance_mode, ProvenanceMode::Permissive);
2022-05-22 15:22:05 -05:00
GlobalStateInner::alloc_id_from_addr(ecx, addr.bytes())?
};
let base_addr = GlobalStateInner::alloc_base_addr(ecx, alloc_id);
2021-07-15 20:33:08 +02:00
// Wrapping "addr - base_addr"
let dl = ecx.data_layout();
2021-07-15 20:33:08 +02:00
let neg_base_addr = (base_addr as i64).wrapping_neg();
2022-05-22 15:22:05 -05:00
Some((
alloc_id,
Size::from_bytes(dl.overflowing_signed_offset(addr.bytes(), neg_base_addr).0),
))
}
2019-06-25 14:07:23 -05:00
/// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple
/// of `align` that is larger or equal to `addr`
2019-06-25 14:07:23 -05:00
fn align_addr(addr: u64, align: u64) -> u64 {
2019-06-28 10:24:16 +02:00
match addr % align {
0 => addr,
2019-12-23 12:56:23 +01:00
rem => addr.checked_add(align).unwrap() - rem,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_align_addr() {
assert_eq!(GlobalStateInner::align_addr(37, 4), 40);
assert_eq!(GlobalStateInner::align_addr(44, 4), 44);
2019-06-25 14:07:23 -05:00
}
}