Rollup merge of #128531 - RalfJung:miri-recursive-validity, r=saethlin

Miri: add a flag to do recursive validity checking

The point of this flag is to allow gathering experimental data for https://github.com/rust-lang/unsafe-code-guidelines/issues/412.
This commit is contained in:
Matthias Krüger 2024-08-04 11:32:34 +02:00 committed by GitHub
commit 19bceedd78
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 186 additions and 107 deletions

View File

@ -396,7 +396,7 @@ fn const_validate_mplace<'tcx>(
let alloc_id = mplace.ptr().provenance.unwrap().alloc_id();
let mut ref_tracking = RefTracking::new(mplace.clone());
let mut inner = false;
while let Some((mplace, path)) = ref_tracking.todo.pop() {
while let Some((mplace, path)) = ref_tracking.next() {
let mode = match ecx.tcx.static_mutability(cid.instance.def_id()) {
_ if cid.promoted.is_some() => CtfeValidationMode::Promoted,
Some(mutbl) => CtfeValidationMode::Static { mutbl }, // a `static`

View File

@ -165,6 +165,13 @@ fn alignment_check(
/// Whether to enforce the validity invariant for a specific layout.
fn enforce_validity(ecx: &InterpCx<'tcx, Self>, layout: TyAndLayout<'tcx>) -> bool;
/// Whether to enforce the validity invariant *recursively*.
fn enforce_validity_recursively(
_ecx: &InterpCx<'tcx, Self>,
_layout: TyAndLayout<'tcx>,
) -> bool {
false
}
/// Whether function calls should be [ABI](CallAbi)-checked.
fn enforce_abi(_ecx: &InterpCx<'tcx, Self>) -> bool {

View File

@ -1006,8 +1006,11 @@ pub fn find_leaked_allocations(
})
}
/// Runs the close in "validation" mode, which means the machine's memory read hooks will be
/// Runs the closure in "validation" mode, which means the machine's memory read hooks will be
/// suppressed. Needless to say, this must only be set with great care! Cannot be nested.
///
/// We do this so Miri's allocation access tracking does not show the validation
/// reads as spurious accesses.
pub(super) fn run_for_validation<R>(&self, f: impl FnOnce() -> R) -> R {
// This deliberately uses `==` on `bool` to follow the pattern
// `assert!(val.replace(new) == old)`.

View File

@ -572,7 +572,10 @@ pub fn write_immediate(
if M::enforce_validity(self, dest.layout()) {
// Data got changed, better make sure it matches the type!
self.validate_operand(&dest.to_op(self)?)?;
self.validate_operand(
&dest.to_op(self)?,
M::enforce_validity_recursively(self, dest.layout()),
)?;
}
Ok(())
@ -811,7 +814,10 @@ fn copy_op_inner(
// Generally for transmutation, data must be valid both at the old and new type.
// But if the types are the same, the 2nd validation below suffices.
if src.layout().ty != dest.layout().ty && M::enforce_validity(self, src.layout()) {
self.validate_operand(&src.to_op(self)?)?;
self.validate_operand(
&src.to_op(self)?,
M::enforce_validity_recursively(self, src.layout()),
)?;
}
// Do the actual copy.
@ -819,7 +825,10 @@ fn copy_op_inner(
if validate_dest && M::enforce_validity(self, dest.layout()) {
// Data got changed, better make sure it matches the type!
self.validate_operand(&dest.to_op(self)?)?;
self.validate_operand(
&dest.to_op(self)?,
M::enforce_validity_recursively(self, dest.layout()),
)?;
}
Ok(())

View File

@ -155,8 +155,8 @@ fn allow_immutable_unsafe_cell(self) -> bool {
/// State for tracking recursive validation of references
pub struct RefTracking<T, PATH = ()> {
pub seen: FxHashSet<T>,
pub todo: Vec<(T, PATH)>,
seen: FxHashSet<T>,
todo: Vec<(T, PATH)>,
}
impl<T: Clone + Eq + Hash + std::fmt::Debug, PATH: Default> RefTracking<T, PATH> {
@ -169,8 +169,11 @@ pub fn new(op: T) -> Self {
ref_tracking_for_consts.seen.insert(op);
ref_tracking_for_consts
}
pub fn next(&mut self) -> Option<(T, PATH)> {
self.todo.pop()
}
pub fn track(&mut self, op: T, path: impl FnOnce() -> PATH) {
fn track(&mut self, op: T, path: impl FnOnce() -> PATH) {
if self.seen.insert(op.clone()) {
trace!("Recursing below ptr {:#?}", op);
let path = path();
@ -435,88 +438,96 @@ fn check_safe_pointer(
if self.ecx.scalar_may_be_null(Scalar::from_maybe_pointer(place.ptr(), self.ecx))? {
throw_validation_failure!(self.path, NullPtr { ptr_kind })
}
// Do not allow pointers to uninhabited types.
// Do not allow references to uninhabited types.
if place.layout.abi.is_uninhabited() {
let ty = place.layout.ty;
throw_validation_failure!(self.path, PtrToUninhabited { ptr_kind, ty })
}
// Recursive checking
if let Some(ref_tracking) = self.ref_tracking.as_deref_mut() {
// Determine whether this pointer expects to be pointing to something mutable.
let ptr_expected_mutbl = match ptr_kind {
PointerKind::Box => Mutability::Mut,
PointerKind::Ref(mutbl) => {
// We do not take into account interior mutability here since we cannot know if
// there really is an `UnsafeCell` inside `Option<UnsafeCell>` -- so we check
// that in the recursive descent behind this reference (controlled by
// `allow_immutable_unsafe_cell`).
mutbl
}
};
// Proceed recursively even for ZST, no reason to skip them!
// `!` is a ZST and we want to validate it.
if let Ok((alloc_id, _offset, _prov)) = self.ecx.ptr_try_get_alloc_id(place.ptr(), 0) {
if let Some(ctfe_mode) = self.ctfe_mode {
let mut skip_recursive_check = false;
if let Some(GlobalAlloc::Static(did)) = self.ecx.tcx.try_get_global_alloc(alloc_id)
// CTFE imposes restrictions on what references can point to.
if let Ok((alloc_id, _offset, _prov)) =
self.ecx.ptr_try_get_alloc_id(place.ptr(), 0)
{
let DefKind::Static { nested, .. } = self.ecx.tcx.def_kind(did) else { bug!() };
// Special handling for pointers to statics (irrespective of their type).
assert!(!self.ecx.tcx.is_thread_local_static(did));
assert!(self.ecx.tcx.is_static(did));
// Mode-specific checks
match self.ctfe_mode {
Some(
CtfeValidationMode::Static { .. } | CtfeValidationMode::Promoted { .. },
) => {
// We skip recursively checking other statics. These statics must be sound by
// themselves, and the only way to get broken statics here is by using
// unsafe code.
// The reasons we don't check other statics is twofold. For one, in all
// sound cases, the static was already validated on its own, and second, we
// trigger cycle errors if we try to compute the value of the other static
// and that static refers back to us (potentially through a promoted).
// This could miss some UB, but that's fine.
// We still walk nested allocations, as they are fundamentally part of this validation run.
// This means we will also recurse into nested statics of *other*
// statics, even though we do not recurse into other statics directly.
// That's somewhat inconsistent but harmless.
skip_recursive_check = !nested;
}
Some(CtfeValidationMode::Const { .. }) => {
// We can't recursively validate `extern static`, so we better reject them.
if self.ecx.tcx.is_foreign_item(did) {
throw_validation_failure!(self.path, ConstRefToExtern);
if let Some(GlobalAlloc::Static(did)) =
self.ecx.tcx.try_get_global_alloc(alloc_id)
{
let DefKind::Static { nested, .. } = self.ecx.tcx.def_kind(did) else {
bug!()
};
// Special handling for pointers to statics (irrespective of their type).
assert!(!self.ecx.tcx.is_thread_local_static(did));
assert!(self.ecx.tcx.is_static(did));
// Mode-specific checks
match ctfe_mode {
CtfeValidationMode::Static { .. }
| CtfeValidationMode::Promoted { .. } => {
// We skip recursively checking other statics. These statics must be sound by
// themselves, and the only way to get broken statics here is by using
// unsafe code.
// The reasons we don't check other statics is twofold. For one, in all
// sound cases, the static was already validated on its own, and second, we
// trigger cycle errors if we try to compute the value of the other static
// and that static refers back to us (potentially through a promoted).
// This could miss some UB, but that's fine.
// We still walk nested allocations, as they are fundamentally part of this validation run.
// This means we will also recurse into nested statics of *other*
// statics, even though we do not recurse into other statics directly.
// That's somewhat inconsistent but harmless.
skip_recursive_check = !nested;
}
CtfeValidationMode::Const { .. } => {
// We can't recursively validate `extern static`, so we better reject them.
if self.ecx.tcx.is_foreign_item(did) {
throw_validation_failure!(self.path, ConstRefToExtern);
}
}
}
None => {}
}
}
// Dangling and Mutability check.
let (size, _align, alloc_kind) = self.ecx.get_alloc_info(alloc_id);
if alloc_kind == AllocKind::Dead {
// This can happen for zero-sized references. We can't have *any* references to non-existing
// allocations though, interning rejects them all as the rest of rustc isn't happy with them...
// so we throw an error, even though this isn't really UB.
// A potential future alternative would be to resurrect this as a zero-sized allocation
// (which codegen will then compile to an aligned dummy pointer anyway).
throw_validation_failure!(self.path, DanglingPtrUseAfterFree { ptr_kind });
}
// If this allocation has size zero, there is no actual mutability here.
if size != Size::ZERO {
let alloc_actual_mutbl = mutability(self.ecx, alloc_id);
// Mutable pointer to immutable memory is no good.
if ptr_expected_mutbl == Mutability::Mut
&& alloc_actual_mutbl == Mutability::Not
{
throw_validation_failure!(self.path, MutableRefToImmutable);
// Dangling and Mutability check.
let (size, _align, alloc_kind) = self.ecx.get_alloc_info(alloc_id);
if alloc_kind == AllocKind::Dead {
// This can happen for zero-sized references. We can't have *any* references to
// non-existing allocations in const-eval though, interning rejects them all as
// the rest of rustc isn't happy with them... so we throw an error, even though
// this isn't really UB.
// A potential future alternative would be to resurrect this as a zero-sized allocation
// (which codegen will then compile to an aligned dummy pointer anyway).
throw_validation_failure!(self.path, DanglingPtrUseAfterFree { ptr_kind });
}
// In a const, everything must be completely immutable.
if matches!(self.ctfe_mode, Some(CtfeValidationMode::Const { .. })) {
// If this allocation has size zero, there is no actual mutability here.
if size != Size::ZERO {
// Determine whether this pointer expects to be pointing to something mutable.
let ptr_expected_mutbl = match ptr_kind {
PointerKind::Box => Mutability::Mut,
PointerKind::Ref(mutbl) => {
// We do not take into account interior mutability here since we cannot know if
// there really is an `UnsafeCell` inside `Option<UnsafeCell>` -- so we check
// that in the recursive descent behind this reference (controlled by
// `allow_immutable_unsafe_cell`).
mutbl
}
};
// Determine what it actually points to.
let alloc_actual_mutbl = mutability(self.ecx, alloc_id);
// Mutable pointer to immutable memory is no good.
if ptr_expected_mutbl == Mutability::Mut
|| alloc_actual_mutbl == Mutability::Mut
&& alloc_actual_mutbl == Mutability::Not
{
throw_validation_failure!(self.path, ConstRefToMutable);
throw_validation_failure!(self.path, MutableRefToImmutable);
}
// In a const, everything must be completely immutable.
if matches!(self.ctfe_mode, Some(CtfeValidationMode::Const { .. })) {
if ptr_expected_mutbl == Mutability::Mut
|| alloc_actual_mutbl == Mutability::Mut
{
throw_validation_failure!(self.path, ConstRefToMutable);
}
}
}
}
@ -524,6 +535,15 @@ fn check_safe_pointer(
if skip_recursive_check {
return Ok(());
}
} else {
// This is not CTFE, so it's Miri with recursive checking.
// FIXME: we do *not* check behind boxes, since creating a new box first creates it uninitialized
// and then puts the value in there, so briefly we have a box with uninit contents.
// FIXME: should we also skip `UnsafeCell` behind shared references? Currently that is not
// needed since validation reads bypass Stacked Borrows and data race checks.
if matches!(ptr_kind, PointerKind::Box) {
return Ok(());
}
}
let path = &self.path;
ref_tracking.track(place, || {
@ -1072,11 +1092,23 @@ pub(crate) fn const_validate_operand(
/// `op` is assumed to cover valid memory if it is an indirect operand.
/// It will error if the bits at the destination do not match the ones described by the layout.
#[inline(always)]
pub fn validate_operand(&self, op: &OpTy<'tcx, M::Provenance>) -> InterpResult<'tcx> {
pub fn validate_operand(
&self,
op: &OpTy<'tcx, M::Provenance>,
recursive: bool,
) -> InterpResult<'tcx> {
// Note that we *could* actually be in CTFE here with `-Zextra-const-ub-checks`, but it's
// still correct to not use `ctfe_mode`: that mode is for validation of the final constant
// value, it rules out things like `UnsafeCell` in awkward places. It also can make checking
// recurse through references which, for now, we don't want here, either.
self.validate_operand_internal(op, vec![], None, None)
// value, it rules out things like `UnsafeCell` in awkward places.
if !recursive {
return self.validate_operand_internal(op, vec![], None, None);
}
// Do a recursive check.
let mut ref_tracking = RefTracking::empty();
self.validate_operand_internal(op, vec![], Some(&mut ref_tracking), None)?;
while let Some((mplace, path)) = ref_tracking.todo.pop() {
self.validate_operand_internal(&mplace.into(), path, Some(&mut ref_tracking), None)?;
}
Ok(())
}
}

View File

@ -67,7 +67,7 @@ fn might_permit_raw_init_strict<'tcx>(
// This does *not* actually check that references are dereferenceable, but since all types that
// require dereferenceability also require non-null, we don't actually get any false negatives
// due to this.
Ok(cx.validate_operand(&ot).is_ok())
Ok(cx.validate_operand(&ot, /*recursive*/ false).is_ok())
}
/// Implements the 'lax' (default) version of the `might_permit_raw_init` checks; see that function for

View File

@ -398,6 +398,8 @@ to Miri failing to detect cases of undefined behavior in a program.
application instead of raising an error within the context of Miri (and halting
execution). Note that code might not expect these operations to ever panic, so
this flag can lead to strange (mis)behavior.
* `-Zmiri-recursive-validation` is a *highly experimental* flag that makes validity checking
recurse below references.
* `-Zmiri-retag-fields[=<all|none|scalar>]` controls when Stacked Borrows retagging recurses into
fields. `all` means it always recurses (the default, and equivalent to `-Zmiri-retag-fields`
without an explicit value), `none` means it never recurses, `scalar` means it only recurses for

View File

@ -44,7 +44,7 @@
use rustc_session::search_paths::PathKind;
use rustc_session::{CtfeBacktrace, EarlyDiagCtxt};
use miri::{BacktraceStyle, BorrowTrackerMethod, ProvenanceMode, RetagFields};
use miri::{BacktraceStyle, BorrowTrackerMethod, ProvenanceMode, RetagFields, ValidationMode};
struct MiriCompilerCalls {
miri_config: miri::MiriConfig,
@ -421,7 +421,9 @@ fn main() {
} else if arg == "--" {
after_dashdash = true;
} else if arg == "-Zmiri-disable-validation" {
miri_config.validate = false;
miri_config.validation = ValidationMode::No;
} else if arg == "-Zmiri-recursive-validation" {
miri_config.validation = ValidationMode::Deep;
} else if arg == "-Zmiri-disable-stacked-borrows" {
miri_config.borrow_tracker = None;
} else if arg == "-Zmiri-tree-borrows" {

View File

@ -68,7 +68,7 @@ pub enum IsolatedOp {
Allow,
}
#[derive(Copy, Clone, PartialEq, Eq)]
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum BacktraceStyle {
/// Prints a terser backtrace which ideally only contains relevant information.
Short,
@ -78,6 +78,16 @@ pub enum BacktraceStyle {
Off,
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum ValidationMode {
/// Do not perform any kind of validation.
No,
/// Validate the interior of the value, but not things behind references.
Shallow,
/// Fully recursively validate references.
Deep,
}
/// Configuration needed to spawn a Miri instance.
#[derive(Clone)]
pub struct MiriConfig {
@ -85,7 +95,7 @@ pub struct MiriConfig {
/// (This is still subject to isolation as well as `forwarded_env_vars`.)
pub env: Vec<(OsString, OsString)>,
/// Determine if validity checking is enabled.
pub validate: bool,
pub validation: ValidationMode,
/// Determines if Stacked Borrows or Tree Borrows is enabled.
pub borrow_tracker: Option<BorrowTrackerMethod>,
/// Whether `core::ptr::Unique` receives special treatment.
@ -162,7 +172,7 @@ impl Default for MiriConfig {
fn default() -> MiriConfig {
MiriConfig {
env: vec![],
validate: true,
validation: ValidationMode::Shallow,
borrow_tracker: Some(BorrowTrackerMethod::StackedBorrows),
unique_is_unique: false,
check_alignment: AlignmentCheck::Int,

View File

@ -153,7 +153,7 @@ fn emulate_intrinsic_by_name(
// Would not be considered UB, or the other way around (`is_val_statically_known(0)`).
"is_val_statically_known" => {
let [arg] = check_arg_count(args)?;
this.validate_operand(arg)?;
this.validate_operand(arg, /*recursive*/ false)?;
let branch: bool = this.machine.rng.get_mut().gen();
this.write_scalar(Scalar::from_bool(branch), dest)?;
}

View File

@ -142,6 +142,7 @@
};
pub use crate::eval::{
create_ecx, eval_entry, AlignmentCheck, BacktraceStyle, IsolatedOp, MiriConfig, RejectOpWith,
ValidationMode,
};
pub use crate::helpers::{AccessKind, EvalContextExt as _};
pub use crate::machine::{

View File

@ -187,7 +187,11 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
pub type MemoryKind = interpret::MemoryKind<MiriMemoryKind>;
/// Pointer provenance.
#[derive(Clone, Copy)]
// This needs to be `Eq`+`Hash` because the `Machine` trait needs that because validity checking
// *might* be recursive and then it has to track which places have already been visited.
// These implementations are a bit questionable, and it means we may check the same place multiple
// times with different provenance, but that is in general not wrong.
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
pub enum Provenance {
/// For pointers with concrete provenance. we exactly know which allocation they are attached to
/// and what their borrow tag is.
@ -215,24 +219,6 @@ pub enum Provenance {
Wildcard,
}
// This needs to be `Eq`+`Hash` because the `Machine` trait needs that because validity checking
// *might* be recursive and then it has to track which places have already been visited.
// However, comparing provenance is meaningless, since `Wildcard` might be any provenance -- and of
// course we don't actually do recursive checking.
// We could change `RefTracking` to strip provenance for its `seen` set but that type is generic so that is quite annoying.
// Instead owe add the required instances but make them panic.
impl PartialEq for Provenance {
fn eq(&self, _other: &Self) -> bool {
panic!("Provenance must not be compared")
}
}
impl Eq for Provenance {}
impl std::hash::Hash for Provenance {
fn hash<H: std::hash::Hasher>(&self, _state: &mut H) {
panic!("Provenance must not be hashed")
}
}
/// The "extra" information a pointer has over a regular AllocId.
#[derive(Copy, Clone, PartialEq)]
pub enum ProvenanceExtra {
@ -460,7 +446,7 @@ pub struct MiriMachine<'tcx> {
pub(crate) isolated_op: IsolatedOp,
/// Whether to enforce the validity invariant.
pub(crate) validate: bool,
pub(crate) validation: ValidationMode,
/// The table of file descriptors.
pub(crate) fds: shims::FdTable,
@ -659,7 +645,7 @@ pub(crate) fn new(config: &MiriConfig, layout_cx: LayoutCx<'tcx, TyCtxt<'tcx>>)
cmd_line: None,
tls: TlsData::default(),
isolated_op: config.isolated_op,
validate: config.validate,
validation: config.validation,
fds: shims::FdTable::init(config.mute_stdout_stderr),
dirs: Default::default(),
layouts,
@ -801,7 +787,7 @@ fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
fds,
tcx: _,
isolated_op: _,
validate: _,
validation: _,
clock: _,
layouts: _,
static_roots: _,
@ -943,7 +929,11 @@ fn alignment_check(
#[inline(always)]
fn enforce_validity(ecx: &MiriInterpCx<'tcx>, _layout: TyAndLayout<'tcx>) -> bool {
ecx.machine.validate
ecx.machine.validation != ValidationMode::No
}
#[inline(always)]
fn enforce_validity_recursively(ecx: &InterpCx<'tcx, Self>, _layout: TyAndLayout<'tcx>) -> bool {
ecx.machine.validation == ValidationMode::Deep
}
#[inline(always)]

View File

@ -0,0 +1,8 @@
//@compile-flags: -Zmiri-recursive-validation
fn main() {
let x = 3u8;
let xref = &x;
let xref_wrong_type: &bool = unsafe { std::mem::transmute(xref) }; //~ERROR: encountered 0x03, but expected a boolean
let _val = *xref_wrong_type;
}

View File

@ -0,0 +1,15 @@
error: Undefined Behavior: constructing invalid value at .<deref>: encountered 0x03, but expected a boolean
--> $DIR/recursive-validity-ref-bool.rs:LL:CC
|
LL | let xref_wrong_type: &bool = unsafe { std::mem::transmute(xref) };
| ^^^^^^^^^^^^^^^^^^^^^^^^^ constructing invalid value at .<deref>: encountered 0x03, but expected a boolean
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/recursive-validity-ref-bool.rs:LL:CC
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
error: aborting due to 1 previous error