interpret: reset provenance on typed copies

This commit is contained in:
Ralf Jung 2024-08-29 08:59:52 +02:00
parent 85dc22f2cf
commit cbdcbf0d6a
23 changed files with 489 additions and 135 deletions

View File

@ -94,7 +94,7 @@ fn eval_body_using_ecx<'tcx, R: InterpretationResult<'tcx>>(
let intern_result = intern_const_alloc_recursive(ecx, intern_kind, &ret);
// Since evaluation had no errors, validate the resulting constant.
const_validate_mplace(&ecx, &ret, cid)?;
const_validate_mplace(ecx, &ret, cid)?;
// Only report this after validation, as validaiton produces much better diagnostics.
// FIXME: ensure validation always reports this and stop making interning care about it.
@ -391,7 +391,7 @@ fn eval_in_interpreter<'tcx, R: InterpretationResult<'tcx>>(
#[inline(always)]
fn const_validate_mplace<'tcx>(
ecx: &InterpCx<'tcx, CompileTimeMachine<'tcx>>,
ecx: &mut InterpCx<'tcx, CompileTimeMachine<'tcx>>,
mplace: &MPlaceTy<'tcx>,
cid: GlobalId<'tcx>,
) -> Result<(), ErrorHandled> {

View File

@ -8,9 +8,8 @@
use std::assert_matches::assert_matches;
use std::borrow::Cow;
use std::cell::Cell;
use std::collections::VecDeque;
use std::{fmt, ptr};
use std::{fmt, mem, ptr};
use rustc_ast::Mutability;
use rustc_data_structures::fx::{FxHashSet, FxIndexMap};
@ -118,7 +117,7 @@ pub struct Memory<'tcx, M: Machine<'tcx>> {
/// This stores whether we are currently doing reads purely for the purpose of validation.
/// Those reads do not trigger the machine's hooks for memory reads.
/// Needless to say, this must only be set with great care!
validation_in_progress: Cell<bool>,
validation_in_progress: bool,
}
/// A reference to some allocation that was already bounds-checked for the given region
@ -145,7 +144,7 @@ pub fn new() -> Self {
alloc_map: M::MemoryMap::default(),
extra_fn_ptr_map: FxIndexMap::default(),
dead_alloc_map: FxIndexMap::default(),
validation_in_progress: Cell::new(false),
validation_in_progress: false,
}
}
@ -682,7 +681,7 @@ pub fn get_ptr_alloc<'a>(
// We want to call the hook on *all* accesses that involve an AllocId, including zero-sized
// accesses. That means we cannot rely on the closure above or the `Some` branch below. We
// do this after `check_and_deref_ptr` to ensure some basic sanity has already been checked.
if !self.memory.validation_in_progress.get() {
if !self.memory.validation_in_progress {
if let Ok((alloc_id, ..)) = self.ptr_try_get_alloc_id(ptr, size_i64) {
M::before_alloc_read(self, alloc_id)?;
}
@ -690,7 +689,7 @@ pub fn get_ptr_alloc<'a>(
if let Some((alloc_id, offset, prov, alloc)) = ptr_and_alloc {
let range = alloc_range(offset, size);
if !self.memory.validation_in_progress.get() {
if !self.memory.validation_in_progress {
M::before_memory_read(
self.tcx,
&self.machine,
@ -766,11 +765,14 @@ pub fn get_ptr_alloc_mut<'a>(
let parts = self.get_ptr_access(ptr, size)?;
if let Some((alloc_id, offset, prov)) = parts {
let tcx = self.tcx;
let validation_in_progress = self.memory.validation_in_progress;
// FIXME: can we somehow avoid looking up the allocation twice here?
// We cannot call `get_raw_mut` inside `check_and_deref_ptr` as that would duplicate `&mut self`.
let (alloc, machine) = self.get_alloc_raw_mut(alloc_id)?;
let range = alloc_range(offset, size);
M::before_memory_write(tcx, machine, &mut alloc.extra, (alloc_id, prov), range)?;
if !validation_in_progress {
M::before_memory_write(tcx, machine, &mut alloc.extra, (alloc_id, prov), range)?;
}
Ok(Some(AllocRefMut { alloc, range, tcx: *tcx, alloc_id }))
} else {
Ok(None)
@ -1014,16 +1016,16 @@ pub fn find_leaked_allocations(
///
/// We do this so Miri's allocation access tracking does not show the validation
/// reads as spurious accesses.
pub fn run_for_validation<R>(&self, f: impl FnOnce() -> R) -> R {
pub fn run_for_validation<R>(&mut self, f: impl FnOnce(&mut Self) -> R) -> R {
// This deliberately uses `==` on `bool` to follow the pattern
// `assert!(val.replace(new) == old)`.
assert!(
self.memory.validation_in_progress.replace(true) == false,
mem::replace(&mut self.memory.validation_in_progress, true) == false,
"`validation_in_progress` was already set"
);
let res = f();
let res = f(self);
assert!(
self.memory.validation_in_progress.replace(false) == true,
mem::replace(&mut self.memory.validation_in_progress, false) == true,
"`validation_in_progress` was unset by someone else"
);
res
@ -1115,6 +1117,10 @@ fn write_allocation_track_relocs<'tcx, Prov: Provenance, Extra, Bytes: AllocByte
impl<'tcx, 'a, Prov: Provenance, Extra, Bytes: AllocBytes>
AllocRefMut<'a, 'tcx, Prov, Extra, Bytes>
{
pub fn as_ref<'b>(&'b self) -> AllocRef<'b, 'tcx, Prov, Extra, Bytes> {
AllocRef { alloc: self.alloc, range: self.range, tcx: self.tcx, alloc_id: self.alloc_id }
}
/// `range` is relative to this allocation reference, not the base of the allocation.
pub fn write_scalar(&mut self, range: AllocRange, val: Scalar<Prov>) -> InterpResult<'tcx> {
let range = self.range.subrange(range);
@ -1137,6 +1143,14 @@ pub fn write_uninit(&mut self) -> InterpResult<'tcx> {
.write_uninit(&self.tcx, self.range)
.map_err(|e| e.to_interp_error(self.alloc_id))?)
}
/// Remove all provenance in the reference range.
pub fn clear_provenance(&mut self) -> InterpResult<'tcx> {
Ok(self
.alloc
.clear_provenance(&self.tcx, self.range)
.map_err(|e| e.to_interp_error(self.alloc_id))?)
}
}
impl<'tcx, 'a, Prov: Provenance, Extra, Bytes: AllocBytes> AllocRef<'a, 'tcx, Prov, Extra, Bytes> {
@ -1278,7 +1292,7 @@ pub fn mem_copy_repeatedly(
};
let src_alloc = self.get_alloc_raw(src_alloc_id)?;
let src_range = alloc_range(src_offset, size);
assert!(!self.memory.validation_in_progress.get(), "we can't be copying during validation");
assert!(!self.memory.validation_in_progress, "we can't be copying during validation");
M::before_memory_read(
tcx,
&self.machine,

View File

@ -137,6 +137,20 @@ pub fn assert_matches_abi(self, abi: Abi, cx: &impl HasDataLayout) {
}
}
}
pub fn clear_provenance<'tcx>(&mut self) -> InterpResult<'tcx> {
match self {
Immediate::Scalar(s) => {
s.clear_provenance()?;
}
Immediate::ScalarPair(a, b) => {
a.clear_provenance()?;
b.clear_provenance()?;
}
Immediate::Uninit => {}
}
Ok(())
}
}
// ScalarPair needs a type to interpret, so we often have an immediate and a type together

View File

@ -605,8 +605,9 @@ pub fn write_immediate(
if M::enforce_validity(self, dest.layout()) {
// Data got changed, better make sure it matches the type!
self.validate_operand(
&dest.to_op(self)?,
&dest.to_place(),
M::enforce_validity_recursively(self, dest.layout()),
/*reset_provenance*/ true,
)?;
}
@ -636,7 +637,7 @@ pub fn write_pointer(
/// Write an immediate to a place.
/// If you use this you are responsible for validating that things got copied at the
/// right type.
fn write_immediate_no_validate(
pub(super) fn write_immediate_no_validate(
&mut self,
src: Immediate<M::Provenance>,
dest: &impl Writeable<'tcx, M::Provenance>,
@ -684,15 +685,7 @@ fn write_immediate_to_mplace_no_validate(
match value {
Immediate::Scalar(scalar) => {
let Abi::Scalar(s) = layout.abi else {
span_bug!(
self.cur_span(),
"write_immediate_to_mplace: invalid Scalar layout: {layout:#?}",
)
};
let size = s.size(&tcx);
assert_eq!(size, layout.size, "abi::Scalar size does not match layout size");
alloc.write_scalar(alloc_range(Size::ZERO, size), scalar)
alloc.write_scalar(alloc_range(Size::ZERO, scalar.size()), scalar)
}
Immediate::ScalarPair(a_val, b_val) => {
let Abi::ScalarPair(a, b) = layout.abi else {
@ -702,16 +695,15 @@ fn write_immediate_to_mplace_no_validate(
layout
)
};
let (a_size, b_size) = (a.size(&tcx), b.size(&tcx));
let b_offset = a_size.align_to(b.align(&tcx).abi);
let b_offset = a.size(&tcx).align_to(b.align(&tcx).abi);
assert!(b_offset.bytes() > 0); // in `operand_field` we use the offset to tell apart the fields
// It is tempting to verify `b_offset` against `layout.fields.offset(1)`,
// but that does not work: We could be a newtype around a pair, then the
// fields do not match the `ScalarPair` components.
alloc.write_scalar(alloc_range(Size::ZERO, a_size), a_val)?;
alloc.write_scalar(alloc_range(b_offset, b_size), b_val)
alloc.write_scalar(alloc_range(Size::ZERO, a_val.size()), a_val)?;
alloc.write_scalar(alloc_range(b_offset, b_val.size()), b_val)
}
Immediate::Uninit => alloc.write_uninit(),
}
@ -736,6 +728,26 @@ pub fn write_uninit(
Ok(())
}
/// Remove all provenance in the given place.
pub fn clear_provenance(
&mut self,
dest: &impl Writeable<'tcx, M::Provenance>,
) -> InterpResult<'tcx> {
match self.as_mplace_or_mutable_local(&dest.to_place())? {
Right((local_val, _local_layout)) => {
local_val.clear_provenance()?;
}
Left(mplace) => {
let Some(mut alloc) = self.get_place_alloc_mut(&mplace)? else {
// Zero-sized access
return Ok(());
};
alloc.clear_provenance()?;
}
}
Ok(())
}
/// Copies the data from an operand to a place.
/// The layouts of the `src` and `dest` may disagree.
/// Does not perform validation of the destination.
@ -789,23 +801,30 @@ fn copy_op_inner(
allow_transmute: bool,
validate_dest: bool,
) -> InterpResult<'tcx> {
// Generally for transmutation, data must be valid both at the old and new type.
// But if the types are the same, the 2nd validation below suffices.
if src.layout().ty != dest.layout().ty && M::enforce_validity(self, src.layout()) {
self.validate_operand(
&src.to_op(self)?,
M::enforce_validity_recursively(self, src.layout()),
)?;
}
// These are technically *two* typed copies: `src` is a not-yet-loaded value,
// so we're going a typed copy at `src` type from there to some intermediate storage.
// And then we're doing a second typed copy from that intermediate storage to `dest`.
// But as an optimization, we only make a single direct copy here.
// Do the actual copy.
self.copy_op_no_validate(src, dest, allow_transmute)?;
if validate_dest && M::enforce_validity(self, dest.layout()) {
// Data got changed, better make sure it matches the type!
let dest = dest.to_place();
// Given that there were two typed copies, we have to ensure this is valid at both types,
// and we have to ensure this loses provenance and padding according to both types.
// But if the types are identical, we only do one pass.
if src.layout().ty != dest.layout().ty {
self.validate_operand(
&dest.transmute(src.layout(), self)?,
M::enforce_validity_recursively(self, src.layout()),
/*reset_provenance*/ true,
)?;
}
self.validate_operand(
&dest.to_op(self)?,
&dest,
M::enforce_validity_recursively(self, dest.layout()),
/*reset_provenance*/ true,
)?;
}

View File

@ -30,8 +30,8 @@
use super::machine::AllocMap;
use super::{
err_ub, format_interp_error, throw_ub, AllocId, AllocKind, CheckInAllocMsg, GlobalAlloc, ImmTy,
Immediate, InterpCx, InterpResult, MPlaceTy, Machine, MemPlaceMeta, OpTy, Pointer, Projectable,
Scalar, ValueVisitor,
Immediate, InterpCx, InterpResult, MPlaceTy, Machine, MemPlaceMeta, PlaceTy, Pointer,
Projectable, Scalar, ValueVisitor,
};
// for the validation errors
@ -163,22 +163,22 @@ impl<T: Clone + Eq + Hash + std::fmt::Debug, PATH: Default> RefTracking<T, PATH>
pub fn empty() -> Self {
RefTracking { seen: FxHashSet::default(), todo: vec![] }
}
pub fn new(op: T) -> Self {
pub fn new(val: T) -> Self {
let mut ref_tracking_for_consts =
RefTracking { seen: FxHashSet::default(), todo: vec![(op.clone(), PATH::default())] };
ref_tracking_for_consts.seen.insert(op);
RefTracking { seen: FxHashSet::default(), todo: vec![(val.clone(), PATH::default())] };
ref_tracking_for_consts.seen.insert(val);
ref_tracking_for_consts
}
pub fn next(&mut self) -> Option<(T, PATH)> {
self.todo.pop()
}
fn track(&mut self, op: T, path: impl FnOnce() -> PATH) {
if self.seen.insert(op.clone()) {
trace!("Recursing below ptr {:#?}", op);
fn track(&mut self, val: T, path: impl FnOnce() -> PATH) {
if self.seen.insert(val.clone()) {
trace!("Recursing below ptr {:#?}", val);
let path = path();
// Remember to come back to this later.
self.todo.push((op, path));
self.todo.push((val, path));
}
}
}
@ -217,7 +217,10 @@ struct ValidityVisitor<'rt, 'tcx, M: Machine<'tcx>> {
ref_tracking: Option<&'rt mut RefTracking<MPlaceTy<'tcx, M::Provenance>, Vec<PathElem>>>,
/// `None` indicates this is not validating for CTFE (but for runtime).
ctfe_mode: Option<CtfeValidationMode>,
ecx: &'rt InterpCx<'tcx, M>,
ecx: &'rt mut InterpCx<'tcx, M>,
/// Whether provenance should be reset outside of pointers (emulating the effect of a typed
/// copy).
reset_provenance: bool,
}
impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> {
@ -314,11 +317,11 @@ fn with_elem<R>(
fn read_immediate(
&self,
op: &OpTy<'tcx, M::Provenance>,
val: &PlaceTy<'tcx, M::Provenance>,
expected: ExpectedKind,
) -> InterpResult<'tcx, ImmTy<'tcx, M::Provenance>> {
Ok(try_validation!(
self.ecx.read_immediate(op),
self.ecx.read_immediate(val),
self.path,
Ub(InvalidUninitBytes(None)) =>
Uninit { expected },
@ -332,10 +335,38 @@ fn read_immediate(
fn read_scalar(
&self,
op: &OpTy<'tcx, M::Provenance>,
val: &PlaceTy<'tcx, M::Provenance>,
expected: ExpectedKind,
) -> InterpResult<'tcx, Scalar<M::Provenance>> {
Ok(self.read_immediate(op, expected)?.to_scalar())
Ok(self.read_immediate(val, expected)?.to_scalar())
}
fn deref_pointer(
&mut self,
val: &PlaceTy<'tcx, M::Provenance>,
expected: ExpectedKind,
) -> InterpResult<'tcx, MPlaceTy<'tcx, M::Provenance>> {
// Not using `ecx.deref_pointer` since we want to use our `read_immediate` wrapper.
let imm = self.read_immediate(val, expected)?;
// Reset provenance: ensure slice tail metadata does not preserve provenance,
// and ensure all pointers do not preserve partial provenance.
if self.reset_provenance {
if matches!(imm.layout.abi, Abi::Scalar(..)) {
// A thin pointer. If it has provenance, we don't have to do anything.
// If it does not, ensure we clear the provenance in memory.
if matches!(imm.to_scalar(), Scalar::Int(..)) {
self.ecx.clear_provenance(val)?;
}
} else {
// A wide pointer. This means we have to worry both about the pointer itself and the
// metadata. We do the lazy thing and just write back the value we got. Just
// clearing provenance in a targeted manner would be more efficient, but unless this
// is a perf hotspot it's just not worth the effort.
self.ecx.write_immediate_no_validate(*imm, val)?;
}
}
// Now turn it into a place.
self.ecx.ref_to_mplace(&imm)
}
fn check_wide_ptr_meta(
@ -376,11 +407,10 @@ fn check_wide_ptr_meta(
/// Check a reference or `Box`.
fn check_safe_pointer(
&mut self,
value: &OpTy<'tcx, M::Provenance>,
value: &PlaceTy<'tcx, M::Provenance>,
ptr_kind: PointerKind,
) -> InterpResult<'tcx> {
// Not using `deref_pointer` since we want to use our `read_immediate` wrapper.
let place = self.ecx.ref_to_mplace(&self.read_immediate(value, ptr_kind.into())?)?;
let place = self.deref_pointer(value, ptr_kind.into())?;
// Handle wide pointers.
// Check metadata early, for better diagnostics
if place.layout.is_unsized() {
@ -564,31 +594,37 @@ fn check_safe_pointer(
/// Note that not all of these have `FieldsShape::Primitive`, e.g. wide references.
fn try_visit_primitive(
&mut self,
value: &OpTy<'tcx, M::Provenance>,
value: &PlaceTy<'tcx, M::Provenance>,
) -> InterpResult<'tcx, bool> {
// Go over all the primitive types
let ty = value.layout.ty;
match ty.kind() {
ty::Bool => {
let value = self.read_scalar(value, ExpectedKind::Bool)?;
let scalar = self.read_scalar(value, ExpectedKind::Bool)?;
try_validation!(
value.to_bool(),
scalar.to_bool(),
self.path,
Ub(InvalidBool(..)) => ValidationErrorKind::InvalidBool {
value: format!("{value:x}"),
value: format!("{scalar:x}"),
}
);
if self.reset_provenance {
self.ecx.clear_provenance(value)?;
}
Ok(true)
}
ty::Char => {
let value = self.read_scalar(value, ExpectedKind::Char)?;
let scalar = self.read_scalar(value, ExpectedKind::Char)?;
try_validation!(
value.to_char(),
scalar.to_char(),
self.path,
Ub(InvalidChar(..)) => ValidationErrorKind::InvalidChar {
value: format!("{value:x}"),
value: format!("{scalar:x}"),
}
);
if self.reset_provenance {
self.ecx.clear_provenance(value)?;
}
Ok(true)
}
ty::Float(_) | ty::Int(_) | ty::Uint(_) => {
@ -602,11 +638,13 @@ fn try_visit_primitive(
ExpectedKind::Int
},
)?;
if self.reset_provenance {
self.ecx.clear_provenance(value)?;
}
Ok(true)
}
ty::RawPtr(..) => {
let place =
self.ecx.ref_to_mplace(&self.read_immediate(value, ExpectedKind::RawPtr)?)?;
let place = self.deref_pointer(value, ExpectedKind::RawPtr)?;
if place.layout.is_unsized() {
self.check_wide_ptr_meta(place.meta(), place.layout)?;
}
@ -617,11 +655,11 @@ fn try_visit_primitive(
Ok(true)
}
ty::FnPtr(..) => {
let value = self.read_scalar(value, ExpectedKind::FnPtr)?;
let scalar = self.read_scalar(value, ExpectedKind::FnPtr)?;
// If we check references recursively, also check that this points to a function.
if let Some(_) = self.ref_tracking {
let ptr = value.to_pointer(self.ecx)?;
let ptr = scalar.to_pointer(self.ecx)?;
let _fn = try_validation!(
self.ecx.get_ptr_fn(ptr),
self.path,
@ -631,10 +669,17 @@ fn try_visit_primitive(
// FIXME: Check if the signature matches
} else {
// Otherwise (for standalone Miri), we have to still check it to be non-null.
if self.ecx.scalar_may_be_null(value)? {
if self.ecx.scalar_may_be_null(scalar)? {
throw_validation_failure!(self.path, NullFnPtr);
}
}
if self.reset_provenance {
// Make sure we do not preserve partial provenance. This matches the thin
// pointer handling in `deref_pointer`.
if matches!(scalar, Scalar::Int(..)) {
self.ecx.clear_provenance(value)?;
}
}
Ok(true)
}
ty::Never => throw_validation_failure!(self.path, NeverVal),
@ -716,13 +761,18 @@ fn visit_scalar(
}
}
fn in_mutable_memory(&self, op: &OpTy<'tcx, M::Provenance>) -> bool {
if let Some(mplace) = op.as_mplace_or_imm().left() {
fn in_mutable_memory(&self, val: &PlaceTy<'tcx, M::Provenance>) -> bool {
if let Some(mplace) = val.as_mplace_or_local().left() {
if let Some(alloc_id) = mplace.ptr().provenance.and_then(|p| p.get_alloc_id()) {
return mutability(self.ecx, alloc_id).is_mut();
mutability(self.ecx, alloc_id).is_mut()
} else {
// No memory at all.
false
}
} else {
// A local variable -- definitely mutable.
true
}
false
}
}
@ -774,7 +824,7 @@ fn mutability<'tcx>(ecx: &InterpCx<'tcx, impl Machine<'tcx>>, alloc_id: AllocId)
}
impl<'rt, 'tcx, M: Machine<'tcx>> ValueVisitor<'tcx, M> for ValidityVisitor<'rt, 'tcx, M> {
type V = OpTy<'tcx, M::Provenance>;
type V = PlaceTy<'tcx, M::Provenance>;
#[inline(always)]
fn ecx(&self) -> &InterpCx<'tcx, M> {
@ -783,11 +833,11 @@ fn ecx(&self) -> &InterpCx<'tcx, M> {
fn read_discriminant(
&mut self,
op: &OpTy<'tcx, M::Provenance>,
val: &PlaceTy<'tcx, M::Provenance>,
) -> InterpResult<'tcx, VariantIdx> {
self.with_elem(PathElem::EnumTag, move |this| {
Ok(try_validation!(
this.ecx.read_discriminant(op),
this.ecx.read_discriminant(val),
this.path,
Ub(InvalidTag(val)) => InvalidEnumTag {
value: format!("{val:x}"),
@ -802,40 +852,40 @@ fn read_discriminant(
#[inline]
fn visit_field(
&mut self,
old_op: &OpTy<'tcx, M::Provenance>,
old_val: &PlaceTy<'tcx, M::Provenance>,
field: usize,
new_op: &OpTy<'tcx, M::Provenance>,
new_val: &PlaceTy<'tcx, M::Provenance>,
) -> InterpResult<'tcx> {
let elem = self.aggregate_field_path_elem(old_op.layout, field);
self.with_elem(elem, move |this| this.visit_value(new_op))
let elem = self.aggregate_field_path_elem(old_val.layout, field);
self.with_elem(elem, move |this| this.visit_value(new_val))
}
#[inline]
fn visit_variant(
&mut self,
old_op: &OpTy<'tcx, M::Provenance>,
old_val: &PlaceTy<'tcx, M::Provenance>,
variant_id: VariantIdx,
new_op: &OpTy<'tcx, M::Provenance>,
new_val: &PlaceTy<'tcx, M::Provenance>,
) -> InterpResult<'tcx> {
let name = match old_op.layout.ty.kind() {
let name = match old_val.layout.ty.kind() {
ty::Adt(adt, _) => PathElem::Variant(adt.variant(variant_id).name),
// Coroutines also have variants
ty::Coroutine(..) => PathElem::CoroutineState(variant_id),
_ => bug!("Unexpected type with variant: {:?}", old_op.layout.ty),
_ => bug!("Unexpected type with variant: {:?}", old_val.layout.ty),
};
self.with_elem(name, move |this| this.visit_value(new_op))
self.with_elem(name, move |this| this.visit_value(new_val))
}
#[inline(always)]
fn visit_union(
&mut self,
op: &OpTy<'tcx, M::Provenance>,
val: &PlaceTy<'tcx, M::Provenance>,
_fields: NonZero<usize>,
) -> InterpResult<'tcx> {
// Special check for CTFE validation, preventing `UnsafeCell` inside unions in immutable memory.
if self.ctfe_mode.is_some_and(|c| !c.allow_immutable_unsafe_cell()) {
if !op.layout.is_zst() && !op.layout.ty.is_freeze(*self.ecx.tcx, self.ecx.param_env) {
if !self.in_mutable_memory(op) {
if !val.layout.is_zst() && !val.layout.ty.is_freeze(*self.ecx.tcx, self.ecx.param_env) {
if !self.in_mutable_memory(val) {
throw_validation_failure!(self.path, UnsafeCellInImmutable);
}
}
@ -847,39 +897,41 @@ fn visit_union(
fn visit_box(
&mut self,
_box_ty: Ty<'tcx>,
op: &OpTy<'tcx, M::Provenance>,
val: &PlaceTy<'tcx, M::Provenance>,
) -> InterpResult<'tcx> {
self.check_safe_pointer(op, PointerKind::Box)?;
self.check_safe_pointer(val, PointerKind::Box)?;
Ok(())
}
#[inline]
fn visit_value(&mut self, op: &OpTy<'tcx, M::Provenance>) -> InterpResult<'tcx> {
trace!("visit_value: {:?}, {:?}", *op, op.layout);
fn visit_value(&mut self, val: &PlaceTy<'tcx, M::Provenance>) -> InterpResult<'tcx> {
trace!("visit_value: {:?}, {:?}", *val, val.layout);
// Check primitive types -- the leaves of our recursive descent.
// This is called even for enum discriminants (which are "fields" of their enum),
// so for integer-typed discriminants the provenance reset will happen here.
// We assume that the Scalar validity range does not restrict these values
// any further than `try_visit_primitive` does!
if self.try_visit_primitive(op)? {
if self.try_visit_primitive(val)? {
return Ok(());
}
// Special check preventing `UnsafeCell` in the inner part of constants
if self.ctfe_mode.is_some_and(|c| !c.allow_immutable_unsafe_cell()) {
if !op.layout.is_zst()
&& let Some(def) = op.layout.ty.ty_adt_def()
if !val.layout.is_zst()
&& let Some(def) = val.layout.ty.ty_adt_def()
&& def.is_unsafe_cell()
{
if !self.in_mutable_memory(op) {
if !self.in_mutable_memory(val) {
throw_validation_failure!(self.path, UnsafeCellInImmutable);
}
}
}
// Recursively walk the value at its type. Apply optimizations for some large types.
match op.layout.ty.kind() {
match val.layout.ty.kind() {
ty::Str => {
let mplace = op.assert_mem_place(); // strings are unsized and hence never immediate
let mplace = val.assert_mem_place(); // strings are unsized and hence never immediate
let len = mplace.len(self.ecx)?;
try_validation!(
self.ecx.read_bytes_ptr_strip_provenance(mplace.ptr(), Size::from_bytes(len)),
@ -889,11 +941,10 @@ fn visit_value(&mut self, op: &OpTy<'tcx, M::Provenance>) -> InterpResult<'tcx>
);
}
ty::Array(tys, ..) | ty::Slice(tys)
// This optimization applies for types that can hold arbitrary bytes (such as
// integer and floating point types) or for structs or tuples with no fields.
// FIXME(wesleywiser) This logic could be extended further to arbitrary structs
// or tuples made up of integer/floating point types or inhabited ZSTs with no
// padding.
// This optimization applies for types that can hold arbitrary non-provenance bytes (such as
// integer and floating point types).
// FIXME(wesleywiser) This logic could be extended further to arbitrary structs or
// tuples made up of integer/floating point types or inhabited ZSTs with no padding.
if matches!(tys.kind(), ty::Int(..) | ty::Uint(..) | ty::Float(..))
=>
{
@ -901,7 +952,7 @@ fn visit_value(&mut self, op: &OpTy<'tcx, M::Provenance>) -> InterpResult<'tcx>
// Optimized handling for arrays of integer/float type.
// This is the length of the array/slice.
let len = op.len(self.ecx)?;
let len = val.len(self.ecx)?;
// This is the element type size.
let layout = self.ecx.layout_of(*tys)?;
// This is the size in bytes of the whole array. (This checks for overflow.)
@ -911,8 +962,9 @@ fn visit_value(&mut self, op: &OpTy<'tcx, M::Provenance>) -> InterpResult<'tcx>
if size == Size::ZERO {
return Ok(());
}
// Now that we definitely have a non-ZST array, we know it lives in memory.
let mplace = match op.as_mplace_or_imm() {
// Now that we definitely have a non-ZST array, we know it lives in memory -- except it may
// be an uninitialized local variable, those are also "immediate".
let mplace = match val.to_op(self.ecx)?.as_mplace_or_imm() {
Left(mplace) => mplace,
Right(imm) => match *imm {
Immediate::Uninit =>
@ -958,20 +1010,28 @@ fn visit_value(&mut self, op: &OpTy<'tcx, M::Provenance>) -> InterpResult<'tcx>
}
}
}
// Don't forget that these are all non-pointer types, and thus do not preserve
// provenance.
if self.reset_provenance {
// We can't share this with above as above, we might be looking at read-only memory.
let mut alloc = self.ecx.get_ptr_alloc_mut(mplace.ptr(), size)?.expect("we already excluded size 0");
alloc.clear_provenance()?;
}
}
// Fast path for arrays and slices of ZSTs. We only need to check a single ZST element
// of an array and not all of them, because there's only a single value of a specific
// ZST type, so either validation fails for all elements or none.
ty::Array(tys, ..) | ty::Slice(tys) if self.ecx.layout_of(*tys)?.is_zst() => {
// Validate just the first element (if any).
if op.len(self.ecx)? > 0 {
self.visit_field(op, 0, &self.ecx.project_index(op, 0)?)?;
if val.len(self.ecx)? > 0 {
self.visit_field(val, 0, &self.ecx.project_index(val, 0)?)?;
}
}
_ => {
// default handler
try_validation!(
self.walk_value(op),
self.walk_value(val),
self.path,
// It's not great to catch errors here, since we can't give a very good path,
// but it's better than ICEing.
@ -992,15 +1052,15 @@ fn visit_value(&mut self, op: &OpTy<'tcx, M::Provenance>) -> InterpResult<'tcx>
// FIXME: We could avoid some redundant checks here. For newtypes wrapping
// scalars, we do the same check on every "level" (e.g., first we check
// MyNewtype and then the scalar in there).
match op.layout.abi {
match val.layout.abi {
Abi::Uninhabited => {
let ty = op.layout.ty;
let ty = val.layout.ty;
throw_validation_failure!(self.path, UninhabitedVal { ty });
}
Abi::Scalar(scalar_layout) => {
if !scalar_layout.is_uninit_valid() {
// There is something to check here.
let scalar = self.read_scalar(op, ExpectedKind::InitScalar)?;
let scalar = self.read_scalar(val, ExpectedKind::InitScalar)?;
self.visit_scalar(scalar, scalar_layout)?;
}
}
@ -1010,7 +1070,7 @@ fn visit_value(&mut self, op: &OpTy<'tcx, M::Provenance>) -> InterpResult<'tcx>
// the other must be init.
if !a_layout.is_uninit_valid() && !b_layout.is_uninit_valid() {
let (a, b) =
self.read_immediate(op, ExpectedKind::InitScalar)?.to_scalar_pair();
self.read_immediate(val, ExpectedKind::InitScalar)?.to_scalar_pair();
self.visit_scalar(a, a_layout)?;
self.visit_scalar(b, b_layout)?;
}
@ -1031,19 +1091,20 @@ fn visit_value(&mut self, op: &OpTy<'tcx, M::Provenance>) -> InterpResult<'tcx>
impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
fn validate_operand_internal(
&self,
op: &OpTy<'tcx, M::Provenance>,
&mut self,
val: &PlaceTy<'tcx, M::Provenance>,
path: Vec<PathElem>,
ref_tracking: Option<&mut RefTracking<MPlaceTy<'tcx, M::Provenance>, Vec<PathElem>>>,
ctfe_mode: Option<CtfeValidationMode>,
reset_provenance: bool,
) -> InterpResult<'tcx> {
trace!("validate_operand_internal: {:?}, {:?}", *op, op.layout.ty);
trace!("validate_operand_internal: {:?}, {:?}", *val, val.layout.ty);
// Construct a visitor
let mut visitor = ValidityVisitor { path, ref_tracking, ctfe_mode, ecx: self };
// Run it.
match self.run_for_validation(|| visitor.visit_value(op)) {
// Run the visitor.
match self.run_for_validation(|ecx| {
let mut v = ValidityVisitor { path, ref_tracking, ctfe_mode, ecx, reset_provenance };
v.visit_value(val)
}) {
Ok(()) => Ok(()),
// Pass through validation failures and "invalid program" issues.
Err(err)
@ -1079,13 +1140,19 @@ fn validate_operand_internal(
/// - no `UnsafeCell` or non-ZST `&mut`.
#[inline(always)]
pub(crate) fn const_validate_operand(
&self,
op: &OpTy<'tcx, M::Provenance>,
&mut self,
val: &PlaceTy<'tcx, M::Provenance>,
path: Vec<PathElem>,
ref_tracking: &mut RefTracking<MPlaceTy<'tcx, M::Provenance>, Vec<PathElem>>,
ctfe_mode: CtfeValidationMode,
) -> InterpResult<'tcx> {
self.validate_operand_internal(op, path, Some(ref_tracking), Some(ctfe_mode))
self.validate_operand_internal(
val,
path,
Some(ref_tracking),
Some(ctfe_mode),
/*reset_provenance*/ false,
)
}
/// This function checks the data at `op` to be runtime-valid.
@ -1093,21 +1160,35 @@ pub(crate) fn const_validate_operand(
/// It will error if the bits at the destination do not match the ones described by the layout.
#[inline(always)]
pub fn validate_operand(
&self,
op: &OpTy<'tcx, M::Provenance>,
&mut self,
val: &PlaceTy<'tcx, M::Provenance>,
recursive: bool,
reset_provenance: bool,
) -> InterpResult<'tcx> {
// Note that we *could* actually be in CTFE here with `-Zextra-const-ub-checks`, but it's
// still correct to not use `ctfe_mode`: that mode is for validation of the final constant
// value, it rules out things like `UnsafeCell` in awkward places.
if !recursive {
return self.validate_operand_internal(op, vec![], None, None);
return self.validate_operand_internal(val, vec![], None, None, reset_provenance);
}
// Do a recursive check.
let mut ref_tracking = RefTracking::empty();
self.validate_operand_internal(op, vec![], Some(&mut ref_tracking), None)?;
self.validate_operand_internal(
val,
vec![],
Some(&mut ref_tracking),
None,
reset_provenance,
)?;
while let Some((mplace, path)) = ref_tracking.todo.pop() {
self.validate_operand_internal(&mplace.into(), path, Some(&mut ref_tracking), None)?;
// Things behind reference do *not* have the provenance reset.
self.validate_operand_internal(
&mplace.into(),
path,
Some(&mut ref_tracking),
None,
/*reset_provenance*/ false,
)?;
}
Ok(())
}

View File

@ -4,7 +4,7 @@
use rustc_target::abi::{Abi, FieldsShape, Scalar, Variants};
use crate::const_eval::{CanAccessMutGlobal, CheckAlignment, CompileTimeMachine};
use crate::interpret::{InterpCx, MemoryKind, OpTy};
use crate::interpret::{InterpCx, MemoryKind};
/// Determines if this type permits "raw" initialization by just transmuting some memory into an
/// instance of `T`.
@ -61,13 +61,17 @@ fn might_permit_raw_init_strict<'tcx>(
.expect("failed to write bytes for zero valid check");
}
let ot: OpTy<'_, _> = allocated.into();
// Assume that if it failed, it's a validation failure.
// This does *not* actually check that references are dereferenceable, but since all types that
// require dereferenceability also require non-null, we don't actually get any false negatives
// due to this.
Ok(cx.validate_operand(&ot, /*recursive*/ false).is_ok())
Ok(cx
.validate_operand(
&allocated.into(),
/*recursive*/ false,
/*reset_provenance*/ false,
)
.is_ok())
}
/// Implements the 'lax' (default) version of the `might_permit_raw_init` checks; see that function for

View File

@ -644,6 +644,12 @@ pub fn write_uninit(&mut self, cx: &impl HasDataLayout, range: AllocRange) -> Al
return Ok(());
}
/// Remove all provenance in the given memory range.
pub fn clear_provenance(&mut self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult {
self.provenance.clear(range, cx)?;
return Ok(());
}
/// Applies a previously prepared provenance copy.
/// The affected range, as defined in the parameters to `provenance().prepare_copy` is expected
/// to be clear of provenance.

View File

@ -307,6 +307,13 @@ pub fn try_to_scalar_int(self) -> Result<ScalarInt, Scalar<AllocId>> {
}
}
pub fn clear_provenance(&mut self) -> InterpResult<'tcx> {
if matches!(self, Scalar::Ptr(..)) {
*self = self.to_scalar_int()?.into();
}
Ok(())
}
#[inline(always)]
pub fn to_scalar_int(self) -> InterpResult<'tcx, ScalarInt> {
self.try_to_scalar_int().map_err(|_| err_unsup!(ReadPointerAsInt(None)).into())

View File

@ -637,7 +637,7 @@ fn write_scalar_atomic(
// The program didn't actually do a read, so suppress the memory access hooks.
// This is also a very special exception where we just ignore an error -- if this read
// was UB e.g. because the memory is uninitialized, we don't want to know!
let old_val = this.run_for_validation(|| this.read_scalar(dest)).ok();
let old_val = this.run_for_validation(|this| this.read_scalar(dest)).ok();
this.allow_data_races_mut(move |this| this.write_scalar(val, dest))?;
this.validate_atomic_store(dest, atomic)?;
this.buffered_atomic_write(val, dest, atomic, old_val)

View File

@ -152,8 +152,10 @@ fn emulate_intrinsic_by_name(
// ```
// Would not be considered UB, or the other way around (`is_val_statically_known(0)`).
"is_val_statically_known" => {
let [arg] = check_arg_count(args)?;
this.validate_operand(arg, /*recursive*/ false)?;
let [_arg] = check_arg_count(args)?;
// FIXME: should we check for validity here? It's tricky because we do not have a
// place. Codegen does not seem to set any attributes like `noundef` for intrinsic
// calls, so we don't *have* to do anything.
let branch: bool = this.machine.rng.get_mut().gen();
this.write_scalar(Scalar::from_bool(branch), dest)?;
}

View File

@ -0,0 +1,10 @@
use std::mem;
// Doing a copy at integer type should lose provenance.
// This tests the unoptimized base case.
fn main() {
let ptrs = [(&42, true)];
let ints: [(usize, bool); 1] = unsafe { mem::transmute(ptrs) };
let ptr = (&raw const ints[0].0).cast::<&i32>();
let _val = unsafe { *ptr.read() }; //~ERROR: dangling
}

View File

@ -0,0 +1,15 @@
error: Undefined Behavior: constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
--> $DIR/int_copy_looses_provenance0.rs:LL:CC
|
LL | let _val = unsafe { *ptr.read() };
| ^^^^^^^^^^ constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/int_copy_looses_provenance0.rs:LL:CC
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
error: aborting due to 1 previous error

View File

@ -0,0 +1,10 @@
use std::mem;
// Doing a copy at integer type should lose provenance.
// This tests the optimized-array case of integer copies.
fn main() {
let ptrs = [&42];
let ints: [usize; 1] = unsafe { mem::transmute(ptrs) };
let ptr = (&raw const ints[0]).cast::<&i32>();
let _val = unsafe { *ptr.read() }; //~ERROR: dangling
}

View File

@ -0,0 +1,15 @@
error: Undefined Behavior: constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
--> $DIR/int_copy_looses_provenance1.rs:LL:CC
|
LL | let _val = unsafe { *ptr.read() };
| ^^^^^^^^^^ constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/int_copy_looses_provenance1.rs:LL:CC
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
error: aborting due to 1 previous error

View File

@ -0,0 +1,12 @@
use std::mem;
// Doing a copy at integer type should lose provenance.
// This tests the case where provenacne is hiding in the metadata of a pointer.
fn main() {
let ptrs = [(&42, &42)];
// Typed copy at wide pointer type (with integer-typed metadata).
let ints: [*const [usize]; 1] = unsafe { mem::transmute(ptrs) };
// Get a pointer to the metadata field.
let ptr = (&raw const ints[0]).wrapping_byte_add(mem::size_of::<*const ()>()).cast::<&i32>();
let _val = unsafe { *ptr.read() }; //~ERROR: dangling
}

View File

@ -0,0 +1,15 @@
error: Undefined Behavior: constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
--> $DIR/int_copy_looses_provenance2.rs:LL:CC
|
LL | let _val = unsafe { *ptr.read() };
| ^^^^^^^^^^ constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/int_copy_looses_provenance2.rs:LL:CC
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
error: aborting due to 1 previous error

View File

@ -0,0 +1,29 @@
#![feature(strict_provenance)]
use std::mem;
#[repr(C, usize)]
#[allow(unused)]
enum E {
Var1(usize),
Var2(usize),
}
// Doing a copy at integer type should lose provenance.
// This tests the case where provenacne is hiding in the discriminant of an enum.
fn main() {
assert_eq!(mem::size_of::<E>(), 2*mem::size_of::<usize>());
// We want to store provenance in the enum discriminant, but the value still needs to
// be valid atfor the type. So we split provenance and data.
let ptr = &42;
let ptr = ptr as *const i32;
let ptrs = [(ptr.with_addr(0), ptr)];
// Typed copy at the enum type.
let ints: [E; 1] = unsafe { mem::transmute(ptrs) };
// Read the discriminant.
let discr = unsafe { (&raw const ints[0]).cast::<*const i32>().read() };
// Take the provenance from there, together with the original address.
let ptr = discr.with_addr(ptr.addr());
// There should be no provenance is `discr`, so this should be UB.
let _val = unsafe { *ptr }; //~ERROR: dangling
}

View File

@ -0,0 +1,15 @@
error: Undefined Behavior: memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
--> $DIR/int_copy_looses_provenance3.rs:LL:CC
|
LL | let _val = unsafe { *ptr };
| ^^^^ memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/int_copy_looses_provenance3.rs:LL:CC
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
error: aborting due to 1 previous error

View File

@ -0,0 +1,17 @@
fn main() {
unsafe {
let mut bytes = [1u8; 16];
let bytes = bytes.as_mut_ptr();
// Put a pointer in the middle.
bytes.add(4).cast::<&i32>().write_unaligned(&42);
// Typed copy of the entire thing as two pointers, but not perfectly
// overlapping with the pointer we have in there.
let copy = bytes.cast::<[*const (); 2]>().read_unaligned();
let copy_bytes = copy.as_ptr().cast::<u8>();
// Now go to the middle of the copy and get the pointer back out.
let ptr = copy_bytes.add(4).cast::<*const i32>().read_unaligned();
// Dereferencing this should fail as the copy has removed the provenance.
let _val = *ptr; //~ERROR: dangling
}
}

View File

@ -0,0 +1,15 @@
error: Undefined Behavior: memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
--> $DIR/ptr_copy_loses_partial_provenance0.rs:LL:CC
|
LL | let _val = *ptr;
| ^^^^ memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/ptr_copy_loses_partial_provenance0.rs:LL:CC
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
error: aborting due to 1 previous error

View File

@ -0,0 +1,17 @@
fn main() {
unsafe {
let mut bytes = [1u8; 16];
let bytes = bytes.as_mut_ptr();
// Put a pointer in the middle.
bytes.add(4).cast::<&i32>().write_unaligned(&42);
// Typed copy of the entire thing as two *function* pointers, but not perfectly
// overlapping with the pointer we have in there.
let copy = bytes.cast::<[fn(); 2]>().read_unaligned();
let copy_bytes = copy.as_ptr().cast::<u8>();
// Now go to the middle of the copy and get the pointer back out.
let ptr = copy_bytes.add(4).cast::<*const i32>().read_unaligned();
// Dereferencing this should fail as the copy has removed the provenance.
let _val = *ptr; //~ERROR: dangling
}
}

View File

@ -0,0 +1,15 @@
error: Undefined Behavior: memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
--> $DIR/ptr_copy_loses_partial_provenance1.rs:LL:CC
|
LL | let _val = *ptr;
| ^^^^ memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/ptr_copy_loses_partial_provenance1.rs:LL:CC
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
error: aborting due to 1 previous error

View File

@ -12,6 +12,7 @@ fn main() {
bytewise_custom_memcpy();
bytewise_custom_memcpy_chunked();
int_load_strip_provenance();
maybe_uninit_preserves_partial_provenance();
}
/// Some basic smoke tests for provenance.
@ -145,3 +146,24 @@ fn int_load_strip_provenance() {
let ints: [usize; 1] = unsafe { mem::transmute(ptrs) };
assert_eq!(ptrs[0] as *const _ as usize, ints[0]);
}
fn maybe_uninit_preserves_partial_provenance() {
// This is the same test as ptr_copy_loses_partial_provenance.rs, but using MaybeUninit and thus
// properly preserving partial provenance.
unsafe {
let mut bytes = [1u8; 16];
let bytes = bytes.as_mut_ptr();
// Put a pointer in the middle.
bytes.add(4).cast::<&i32>().write_unaligned(&42);
// Copy the entire thing as two pointers but not perfectly
// overlapping with the pointer we have in there.
let copy = bytes.cast::<[mem::MaybeUninit<*const ()>; 2]>().read_unaligned();
let copy_bytes = copy.as_ptr().cast::<u8>();
// Now go to the middle of the copy and get the pointer back out.
let ptr = copy_bytes.add(4).cast::<*const i32>().read_unaligned();
// And deref this to ensure we get the right value.
let val = *ptr;
assert_eq!(val, 42);
}
}