stabilize Strict Provenance and Exposed Provenance

This comes with a big docs rewrite.
This commit is contained in:
Ralf Jung 2024-09-14 13:30:37 +02:00
parent bfab34af4c
commit c3e928d8dd
5 changed files with 323 additions and 398 deletions

View File

@ -361,12 +361,16 @@ fn transmute_immediate(
(Int(..) | Float(_), Int(..) | Float(_)) => bx.bitcast(imm, to_backend_ty), (Int(..) | Float(_), Int(..) | Float(_)) => bx.bitcast(imm, to_backend_ty),
(Pointer(..), Pointer(..)) => bx.pointercast(imm, to_backend_ty), (Pointer(..), Pointer(..)) => bx.pointercast(imm, to_backend_ty),
(Int(..), Pointer(..)) => bx.ptradd(bx.const_null(bx.type_ptr()), imm), (Int(..), Pointer(..)) => bx.ptradd(bx.const_null(bx.type_ptr()), imm),
(Pointer(..), Int(..)) => bx.ptrtoint(imm, to_backend_ty), (Pointer(..), Int(..)) => {
// FIXME: this exposes the provenance, which shouldn't be necessary.
bx.ptrtoint(imm, to_backend_ty)
}
(Float(_), Pointer(..)) => { (Float(_), Pointer(..)) => {
let int_imm = bx.bitcast(imm, bx.cx().type_isize()); let int_imm = bx.bitcast(imm, bx.cx().type_isize());
bx.ptradd(bx.const_null(bx.type_ptr()), int_imm) bx.ptradd(bx.const_null(bx.type_ptr()), int_imm)
} }
(Pointer(..), Float(_)) => { (Pointer(..), Float(_)) => {
// FIXME: this exposes the provenance, which shouldn't be necessary.
let int_imm = bx.ptrtoint(imm, bx.cx().type_isize()); let int_imm = bx.ptrtoint(imm, bx.cx().type_isize());
bx.bitcast(int_imm, to_backend_ty) bx.bitcast(int_imm, to_backend_ty)
} }

View File

@ -137,10 +137,11 @@ pub const fn cast_mut(self) -> *mut T {
/// Gets the "address" portion of the pointer. /// Gets the "address" portion of the pointer.
/// ///
/// This is similar to `self as usize`, which semantically discards *provenance* and /// This is similar to `self as usize`, except that the [provenance][crate::ptr#provenance] of
/// *address-space* information. However, unlike `self as usize`, casting the returned address /// the pointer is discarded and not [exposed][crate::ptr#exposed-provenance]. This means that
/// back to a pointer yields a [pointer without provenance][without_provenance], which is undefined behavior to dereference. To /// casting the returned address back to a pointer yields a [pointer without
/// properly restore the lost information and obtain a dereferenceable pointer, use /// provenance][without_provenance], which is undefined behavior to dereference. To properly
/// restore the lost information and obtain a dereferenceable pointer, use
/// [`with_addr`][pointer::with_addr] or [`map_addr`][pointer::map_addr]. /// [`with_addr`][pointer::with_addr] or [`map_addr`][pointer::map_addr].
/// ///
/// If using those APIs is not possible because there is no way to preserve a pointer with the /// If using those APIs is not possible because there is no way to preserve a pointer with the
@ -155,90 +156,81 @@ pub const fn cast_mut(self) -> *mut T {
/// perform a change of representation to produce a value containing only the address /// perform a change of representation to produce a value containing only the address
/// portion of the pointer. What that means is up to the platform to define. /// portion of the pointer. What that means is up to the platform to define.
/// ///
/// This API and its claimed semantics are part of the Strict Provenance experiment, and as such /// This is a [Strict Provenance][crate::ptr#strict-provenance] API.
/// might change in the future (including possibly weakening this so it becomes wholly
/// equivalent to `self as usize`). See the [module documentation][crate::ptr] for details.
#[must_use] #[must_use]
#[inline(always)] #[inline(always)]
#[unstable(feature = "strict_provenance", issue = "95228")] #[stable(feature = "strict_provenance", since = "CURRENT_RUSTC_VERSION")]
pub fn addr(self) -> usize { pub fn addr(self) -> usize {
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. // A pointer-to-integer transmute currently has exactly the right semantics: it returns the
// address without exposing the provenance. Note that this is *not* a stable guarantee about
// transmute semantics, it relies on sysroot crates having special status.
// SAFETY: Pointer-to-integer transmutes are valid (if you are okay with losing the // SAFETY: Pointer-to-integer transmutes are valid (if you are okay with losing the
// provenance). // provenance).
unsafe { mem::transmute(self.cast::<()>()) } unsafe { mem::transmute(self.cast::<()>()) }
} }
/// Exposes the "provenance" part of the pointer for future use in /// Exposes the ["provenance"][crate::ptr#provenance] part of the pointer for future use in
/// [`with_exposed_provenance`][] and returns the "address" portion. /// [`with_exposed_provenance`] and returns the "address" portion.
/// ///
/// This is equivalent to `self as usize`, which semantically discards *provenance* and /// This is equivalent to `self as usize`, which semantically discards provenance information.
/// *address-space* information. Furthermore, this (like the `as` cast) has the implicit /// Furthermore, this (like the `as` cast) has the implicit side-effect of marking the
/// side-effect of marking the provenance as 'exposed', so on platforms that support it you can /// provenance as 'exposed', so on platforms that support it you can later call
/// later call [`with_exposed_provenance`][] to reconstitute the original pointer including its /// [`with_exposed_provenance`] to reconstitute the original pointer including its provenance.
/// provenance. (Reconstructing address space information, if required, is your responsibility.)
/// ///
/// Using this method means that code is *not* following [Strict /// Due to its inherent ambiguity, [`with_exposed_provenance`] may not be supported by tools
/// Provenance][super#strict-provenance] rules. Supporting /// that help you to stay conformant with the Rust memory model. It is recommended to use
/// [`with_exposed_provenance`][] complicates specification and reasoning and may not be supported by /// [Strict Provenance][crate::ptr#strict-provenance] APIs such as [`with_addr`][pointer::with_addr]
/// tools that help you to stay conformant with the Rust memory model, so it is recommended to /// wherever possible, in which case [`addr`][pointer::addr] should be used instead of `expose_provenance`.
/// use [`addr`][pointer::addr] wherever possible.
/// ///
/// On most platforms this will produce a value with the same bytes as the original pointer, /// On most platforms this will produce a value with the same bytes as the original pointer,
/// because all the bytes are dedicated to describing the address. Platforms which need to store /// because all the bytes are dedicated to describing the address. Platforms which need to store
/// additional information in the pointer may not support this operation, since the 'expose' /// additional information in the pointer may not support this operation, since the 'expose'
/// side-effect which is required for [`with_exposed_provenance`][] to work is typically not /// side-effect which is required for [`with_exposed_provenance`] to work is typically not
/// available. /// available.
/// ///
/// It is unclear whether this method can be given a satisfying unambiguous specification. This /// This is an [Exposed Provenance][crate::ptr#exposed-provenance] API.
/// API and its claimed semantics are part of [Exposed Provenance][super#exposed-provenance].
/// ///
/// [`with_exposed_provenance`]: with_exposed_provenance /// [`with_exposed_provenance`]: with_exposed_provenance
#[must_use] #[must_use]
#[inline(always)] #[inline(always)]
#[unstable(feature = "exposed_provenance", issue = "95228")] #[stable(feature = "exposed_provenance", since = "CURRENT_RUSTC_VERSION")]
pub fn expose_provenance(self) -> usize { pub fn expose_provenance(self) -> usize {
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
self.cast::<()>() as usize self.cast::<()>() as usize
} }
/// Creates a new pointer with the given address. /// Creates a new pointer with the given address and the [provenance][crate::ptr#provenance] of
/// `self`.
/// ///
/// This performs the same operation as an `addr as ptr` cast, but copies /// This is similar to a `addr as *const T` cast, but copies
/// the *address-space* and *provenance* of `self` to the new pointer. /// the *provenance* of `self` to the new pointer.
/// This allows us to dynamically preserve and propagate this important /// This avoids the inherent ambiguity of the unary cast.
/// information in a way that is otherwise impossible with a unary cast.
/// ///
/// This is equivalent to using [`wrapping_offset`][pointer::wrapping_offset] to offset /// This is equivalent to using [`wrapping_offset`][pointer::wrapping_offset] to offset
/// `self` to the given address, and therefore has all the same capabilities and restrictions. /// `self` to the given address, and therefore has all the same capabilities and restrictions.
/// ///
/// This API and its claimed semantics are part of the Strict Provenance experiment, /// This is a [Strict Provenance][crate::ptr#strict-provenance] API.
/// see the [module documentation][crate::ptr] for details.
#[must_use] #[must_use]
#[inline] #[inline]
#[unstable(feature = "strict_provenance", issue = "95228")] #[stable(feature = "strict_provenance", since = "CURRENT_RUSTC_VERSION")]
pub fn with_addr(self, addr: usize) -> Self { pub fn with_addr(self, addr: usize) -> Self {
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. // This should probably be an intrinsic to avoid doing any sort of arithmetic, but
// // meanwhile, we can implement it with `wrapping_offset`, which preserves the pointer's
// In the mean-time, this operation is defined to be "as if" it was // provenance.
// a wrapping_offset, so we can emulate it as such. This should properly
// restore pointer provenance even under today's compiler.
let self_addr = self.addr() as isize; let self_addr = self.addr() as isize;
let dest_addr = addr as isize; let dest_addr = addr as isize;
let offset = dest_addr.wrapping_sub(self_addr); let offset = dest_addr.wrapping_sub(self_addr);
// This is the canonical desugaring of this operation
self.wrapping_byte_offset(offset) self.wrapping_byte_offset(offset)
} }
/// Creates a new pointer by mapping `self`'s address to a new one. /// Creates a new pointer by mapping `self`'s address to a new one, preserving the
/// [provenance][crate::ptr#provenance] of `self`.
/// ///
/// This is a convenience for [`with_addr`][pointer::with_addr], see that method for details. /// This is a convenience for [`with_addr`][pointer::with_addr], see that method for details.
/// ///
/// This API and its claimed semantics are part of the Strict Provenance experiment, /// This is a [Strict Provenance][crate::ptr#strict-provenance] API.
/// see the [module documentation][crate::ptr] for details.
#[must_use] #[must_use]
#[inline] #[inline]
#[unstable(feature = "strict_provenance", issue = "95228")] #[stable(feature = "strict_provenance", since = "CURRENT_RUSTC_VERSION")]
pub fn map_addr(self, f: impl FnOnce(usize) -> usize) -> Self { pub fn map_addr(self, f: impl FnOnce(usize) -> usize) -> Self {
self.with_addr(f(self.addr())) self.with_addr(f(self.addr()))
} }
@ -379,7 +371,7 @@ pub const fn to_raw_parts(self) -> (*const (), <T as super::Pointee>::Metadata)
/// * The offset in bytes, `count * size_of::<T>()`, computed on mathematical integers (without /// * The offset in bytes, `count * size_of::<T>()`, computed on mathematical integers (without
/// "wrapping around"), must fit in an `isize`. /// "wrapping around"), must fit in an `isize`.
/// ///
/// * If the computed offset is non-zero, then `self` must be derived from a pointer to some /// * If the computed offset is non-zero, then `self` must be [derived from][crate::ptr#provenance] a pointer to some
/// [allocated object], and the entire memory range between `self` and the result must be in /// [allocated object], and the entire memory range between `self` and the result must be in
/// bounds of that allocated object. In particular, this range must not "wrap around" the edge /// bounds of that allocated object. In particular, this range must not "wrap around" the edge
/// of the address space. /// of the address space.
@ -611,7 +603,7 @@ pub fn mask(self, mask: usize) -> *const T {
/// * `self` and `origin` must either /// * `self` and `origin` must either
/// ///
/// * point to the same address, or /// * point to the same address, or
/// * both be *derived from* a pointer to the same [allocated object], and the memory range between /// * both be [derived from][crate::ptr#provenance] a pointer to the same [allocated object], and the memory range between
/// the two pointers must be in bounds of that object. (See below for an example.) /// the two pointers must be in bounds of that object. (See below for an example.)
/// ///
/// * The distance between the pointers, in bytes, must be an exact multiple /// * The distance between the pointers, in bytes, must be an exact multiple
@ -871,7 +863,7 @@ pub const fn guaranteed_ne(self, other: *const T) -> Option<bool>
/// * The offset in bytes, `count * size_of::<T>()`, computed on mathematical integers (without /// * The offset in bytes, `count * size_of::<T>()`, computed on mathematical integers (without
/// "wrapping around"), must fit in an `isize`. /// "wrapping around"), must fit in an `isize`.
/// ///
/// * If the computed offset is non-zero, then `self` must be derived from a pointer to some /// * If the computed offset is non-zero, then `self` must be [derived from][crate::ptr#provenance] a pointer to some
/// [allocated object], and the entire memory range between `self` and the result must be in /// [allocated object], and the entire memory range between `self` and the result must be in
/// bounds of that allocated object. In particular, this range must not "wrap around" the edge /// bounds of that allocated object. In particular, this range must not "wrap around" the edge
/// of the address space. /// of the address space.
@ -978,7 +970,7 @@ const fn comptime(_: *const (), _: usize, _: usize) -> bool {
/// * The offset in bytes, `count * size_of::<T>()`, computed on mathematical integers (without /// * The offset in bytes, `count * size_of::<T>()`, computed on mathematical integers (without
/// "wrapping around"), must fit in an `isize`. /// "wrapping around"), must fit in an `isize`.
/// ///
/// * If the computed offset is non-zero, then `self` must be derived from a pointer to some /// * If the computed offset is non-zero, then `self` must be [derived from][crate::ptr#provenance] a pointer to some
/// [allocated object], and the entire memory range between `self` and the result must be in /// [allocated object], and the entire memory range between `self` and the result must be in
/// bounds of that allocated object. In particular, this range must not "wrap around" the edge /// bounds of that allocated object. In particular, this range must not "wrap around" the edge
/// of the address space. /// of the address space.

View File

@ -18,10 +18,11 @@
//! * For operations of [size zero][zst], *every* pointer is valid, including the [null] pointer. //! * For operations of [size zero][zst], *every* pointer is valid, including the [null] pointer.
//! The following points are only concerned with non-zero-sized accesses. //! The following points are only concerned with non-zero-sized accesses.
//! * A [null] pointer is *never* valid. //! * A [null] pointer is *never* valid.
//! * For a pointer to be valid, it is necessary, but not always sufficient, that the pointer //! * For a pointer to be valid, it is necessary, but not always sufficient, that the pointer be
//! be *dereferenceable*: the memory range of the given size starting at the pointer must all be //! *dereferenceable*. The [provenance] of the pointer is used to determine which [allocated
//! within the bounds of a single allocated object. Note that in Rust, //! object] it is derived from; a pointer is dereferenceable if the memory range of the given size
//! every (stack-allocated) variable is considered a separate allocated object. //! starting at the pointer is entirely contained within the bounds of that allocated object. Note
//! that in Rust, every (stack-allocated) variable is considered a separate allocated object.
//! * All accesses performed by functions in this module are *non-atomic* in the sense //! * All accesses performed by functions in this module are *non-atomic* in the sense
//! of [atomic operations] used to synchronize between threads. This means it is //! of [atomic operations] used to synchronize between threads. This means it is
//! undefined behavior to perform two concurrent accesses to the same location from different //! undefined behavior to perform two concurrent accesses to the same location from different
@ -130,123 +131,130 @@
//! //!
//! [`null()`]: null //! [`null()`]: null
//! //!
//! # Strict Provenance //! # Provenance
//!
//! **The following text is non-normative, insufficiently formal, and is an extremely strict
//! interpretation of provenance. It's ok if your code doesn't strictly conform to it.**
//!
//! [Strict Provenance][] is an experimental set of APIs that help tools that try
//! to validate the memory-safety of your program's execution. Notably this includes [Miri][]
//! and [CHERI][], which can detect when you access out of bounds memory or otherwise violate
//! Rust's memory model.
//!
//! Provenance must exist in some form for any programming
//! language compiled for modern computer architectures, but specifying a model for provenance
//! in a way that is useful to both compilers and programmers is an ongoing challenge.
//! The [Strict Provenance][] experiment seeks to explore the question: *what if we just said you
//! couldn't do all the nasty operations that make provenance so messy?*
//!
//! What APIs would have to be removed? What APIs would have to be added? How much would code
//! have to change, and is it worse or better now? Would any patterns become truly inexpressible?
//! Could we carve out special exceptions for those patterns? Should we?
//!
//! A secondary goal of this project is to see if we can disambiguate the many functions of
//! pointer<->integer casts enough for the definition of `usize` to be loosened so that it
//! isn't *pointer*-sized but address-space/offset/allocation-sized (we'll probably continue
//! to conflate these notions). This would potentially make it possible to more efficiently
//! target platforms where pointers are larger than offsets, such as CHERI and maybe some
//! segmented architectures.
//!
//! ## Provenance
//!
//! **This section is *non-normative* and is part of the [Strict Provenance][] experiment.**
//! //!
//! Pointers are not *simply* an "integer" or "address". For instance, it's uncontroversial //! Pointers are not *simply* an "integer" or "address". For instance, it's uncontroversial
//! to say that a Use After Free is clearly Undefined Behaviour, even if you "get lucky" //! to say that a Use After Free is clearly Undefined Behaviour, even if you "get lucky"
//! and the freed memory gets reallocated before your read/write (in fact this is the //! and the freed memory gets reallocated before your read/write (in fact this is the
//! worst-case scenario, UAFs would be much less concerning if this didn't happen!). //! worst-case scenario, UAFs would be much less concerning if this didn't happen!).
//! To rationalize this claim, pointers need to somehow be *more* than just their addresses: //! As another example, consider that [`wrapping_offset`] is documented to "remember"
//! they must have provenance. //! the allocated object that the original pointer points to, even if it is offset far
//! outside the memory range occupied by that allocated object.
//! To rationalize claims like this, pointers need to somehow be *more* than just their addresses:
//! they must have **provenance**.
//! //!
//! When an allocation is created, that allocation has a unique Original Pointer. For alloc //! A pointer value in Rust semantically contains the following information:
//! APIs this is literally the pointer the call returns, and for local variables and statics,
//! this is the name of the variable/static. This is mildly overloading the term "pointer"
//! for the sake of brevity/exposition.
//! //!
//! The Original Pointer for an allocation is guaranteed to have unique access to the entire
//! allocation and *only* that allocation. In this sense, an allocation can be thought of
//! as a "sandbox" that cannot be broken into or out of. *Provenance* is the permission
//! to access an allocation's sandbox and has both a *spatial* and *temporal* component:
//!
//! * Spatial: A range of bytes that the pointer is allowed to access.
//! * Temporal: The lifetime (of the allocation) that access to these bytes is tied to.
//!
//! Spatial provenance makes sure you don't go beyond your sandbox, while temporal provenance
//! makes sure that you can't "get lucky" after your permission to access some memory
//! has been revoked (either through deallocations or borrows expiring).
//!
//! Provenance is implicitly shared with all pointers transitively derived from
//! The Original Pointer through operations like [`offset`], borrowing, and pointer casts.
//! Some operations may *shrink* the derived provenance, limiting how much memory it can
//! access or how long it's valid for (i.e. borrowing a subfield and subslicing).
//!
//! Shrinking provenance cannot be undone: even if you "know" there is a larger allocation, you
//! can't derive a pointer with a larger provenance. Similarly, you cannot "recombine"
//! two contiguous provenances back into one (i.e. with a `fn merge(&[T], &[T]) -> &[T]`).
//!
//! A reference to a value always has provenance over exactly the memory that field occupies.
//! A reference to a slice always has provenance over exactly the range that slice describes.
//!
//! If an allocation is deallocated, all pointers with provenance to that allocation become
//! invalidated, and effectively lose their provenance.
//!
//! The strict provenance experiment is mostly only interested in exploring stricter *spatial*
//! provenance. In this sense it can be thought of as a subset of the more ambitious and
//! formal [Stacked Borrows][] research project, which is what tools like [Miri][] are based on.
//! In particular, Stacked Borrows is necessary to properly describe what borrows are allowed
//! to do and when they become invalidated. This necessarily involves much more complex
//! *temporal* reasoning than simply identifying allocations. Adjusting APIs and code
//! for the strict provenance experiment will also greatly help Stacked Borrows.
//!
//!
//! ## Pointer Vs Addresses
//!
//! **This section is *non-normative* and is part of the [Strict Provenance][] experiment.**
//!
//! One of the largest historical issues with trying to define provenance is that programmers
//! freely convert between pointers and integers. Once you allow for this, it generally becomes
//! impossible to accurately track and preserve provenance information, and you need to appeal
//! to very complex and unreliable heuristics. But of course, converting between pointers and
//! integers is very useful, so what can we do?
//!
//! Also did you know WASM is actually a "Harvard Architecture"? As in function pointers are
//! handled completely differently from data pointers? And we kind of just shipped Rust on WASM
//! without really addressing the fact that we let you freely convert between function pointers
//! and data pointers, because it mostly Just Works? Let's just put that on the "pointer casts
//! are dubious" pile.
//!
//! Strict Provenance attempts to square these circles by decoupling Rust's traditional conflation
//! of pointers and `usize` (and `isize`), and defining a pointer to semantically contain the
//! following information:
//!
//! * The **address-space** it is part of (e.g. "data" vs "code" in WASM).
//! * The **address** it points to, which can be represented by a `usize`. //! * The **address** it points to, which can be represented by a `usize`.
//! * The **provenance** it has, defining the memory it has permission to access. //! * The **provenance** it has, defining the memory it has permission to access. Provenance can be
//! Provenance can be absent, in which case the pointer does not have permission to access any memory. //! absent, in which case the pointer does not have permission to access any memory.
//! //!
//! Under Strict Provenance, a `usize` *cannot* accurately represent a pointer, and converting from //! The exact structure of provenance is not yet specified, but the permission defined by a
//! a pointer to a `usize` is generally an operation which *only* extracts the address. It is //! pointer's provenance have a *spatial* component, a *temporal* component, and a *mutability*
//! therefore *impossible* to construct a valid pointer from a `usize` because there is no way //! component:
//! to restore the address-space and provenance. In other words, pointer-integer-pointer
//! roundtrips are not possible (in the sense that the resulting pointer is not dereferenceable).
//! //!
//! The key insight to making this model *at all* viable is the [`with_addr`][] method: //! * Spatial: The set of memory addresses that the pointer is allowed to access.
//! * Temporal: The timespan during which the pointer is allowed to access those memory addresses.
//! * Mutability: Whether the pointer may only access the memory for reads, or also access it for
//! writes. Note that this can interact with the other components, e.g. a pointer might permit
//! mutation only for a subset of addresses, or only for a subset of its maximal timespan.
//!
//! When an [allocated object] is created, it has a unique Original Pointer. For alloc
//! APIs this is literally the pointer the call returns, and for local variables and statics,
//! this is the name of the variable/static. (This is mildly overloading the term "pointer"
//! for the sake of brevity/exposition.)
//!
//! The Original Pointer for an allocated object has provenance that constrains the *spatial*
//! permissions of this pointer to the memory range of the allocation, and the *temporal*
//! permissions to the lifetime of the allocation. Provenance is implicitly inherited by all
//! pointers transitively derived from the Original Pointer through operations like [`offset`],
//! borrowing, and pointer casts. Some operations may *shrink* the permissions of the derived
//! provenance, limiting how much memory it can access or how long it's valid for (i.e. borrowing a
//! subfield and subslicing can shrink the spatial component of provenance, and all borrowing can
//! shrink the temporal component of provenance). However, no operation can ever *grow* the
//! permissions of the derived provenance: even if you "know" there is a larger allocation, you
//! can't derive a pointer with a larger provenance. Similarly, you cannot "recombine" two
//! contiguous provenances back into one (i.e. with a `fn merge(&[T], &[T]) -> &[T]`).
//!
//! A reference to a place always has provenance over at least the memory that place occupies.
//! A reference to a slice always has provenance over at least the range that slice describes.
//! Whether and when exactly the provenance of a reference gets "shrunk" to *exactly* fit
//! the memory it points to is not yet determined.
//!
//! A *shared* reference only ever has provenance that permits reading from memory,
//! and never permits writes, except inside [`UnsafeCell`].
//!
//! Provenance can affect whether a program has undefined behavior:
//!
//! * It is undefined behavior to access memory through a pointer that does not have provenance over
//! that memory. Note that a pointer "at the end" of its provenance is not actually outside its
//! provenance, it just has 0 bytes it can load/store. Zero-sized accesses do not require any
//! provenance since they access an empty range of memory.
//!
//! * It is undefined behavior to [`offset`] a pointer across a memory range that is not contained
//! in the allocated object it is derived from, or to [`offset_from`] two pointers not derived
//! from the same allocated object. Provenance is used to say what exactly "derived from" even
//! means: the lineage of a pointer is traced back to the Original Pointer it descends from, and
//! that identifies the relevant allocated object. In particular, it's always UB to offset a
//! pointer derived from something that is now deallocated, except if the offset is 0.
//!
//! But it *is* still sound to:
//!
//! * Create a pointer without provenance from just an address (see [`ptr::dangling`]). Such a
//! pointer cannot be used for memory accesses (except for zero-sized accesses). This can still be
//! useful for sentinel values like `null` *or* to represent a tagged pointer that will never be
//! dereferenceable. In general, it is always sound for an integer to pretend to be a pointer "for
//! fun" as long as you don't use operations on it which require it to be valid (non-zero-sized
//! offset, read, write, etc).
//!
//! * Forge an allocation of size zero at any sufficiently aligned non-null address.
//! i.e. the usual "ZSTs are fake, do what you want" rules apply.
//!
//! * [`wrapping_offset`] a pointer outside its provenance. This includes pointers
//! which have "no" provenance. In particular, this makes it sound to do pointer tagging tricks.
//!
//! * Compare arbitrary pointers by address. Pointer comparison ignores provenance and addresses
//! *are* just integers, so there is always a coherent answer, even if the pointers are dangling
//! or from different provenances. Note that if you get "lucky" and notice that a pointer at the
//! end of one allocated object is the "same" address as the start of another allocated object,
//! anything you do with that fact is *probably* going to be gibberish. The scope of that
//! gibberish is kept under control by the fact that the two pointers *still* aren't allowed to
//! access the other's allocation (bytes), because they still have different provenance.
//!
//! Note that the full definition of provenance in Rust is not decided yet, as this interacts
//! with the as-yet undecided [aliasing] rules.
//!
//! ## Pointers Vs Integers
//!
//! From this discussion, it becomes very clear that a `usize` *cannot* accurately represent a pointer,
//! and converting from a pointer to a `usize` is generally an operation which *only* extracts the
//! address. Converting this address back into pointer requires somehow answering the question:
//! which provenance should the resulting pointer have?
//!
//! Rust provides two ways of dealing with this situation: *Strict Provenance* and *Exposed Provenance*.
//!
//! Note that a pointer *can* represent a `usize` (via [`without_provenance`]), so the right type to
//! use in situations where a value is "sometimes a pointer and sometimes a bare `usize`" is a
//! pointer type.
//!
//! ## Strict Provenance
//!
//! "Strict Provenance" refers to a set of APIs designed to make working with provenance more
//! explicit. They are intended as substitutes for casting a pointer to an integer and back.
//!
//! Entirely avoiding integer-to-pointer casts successfully side-steps the inherent ambiguity of
//! that operation. This benefits compiler optimizations, and it is pretty much a requirement for
//! using tools like [Miri] and architectures like [CHERI] that aim to detect and diagnose pointer
//! misuse.
//!
//! The key insight to making programming without integer-to-pointer casts *at all* viable is the
//! [`with_addr`] method:
//! //!
//! ```text //! ```text
//! /// Creates a new pointer with the given address. //! /// Creates a new pointer with the given address.
//! /// //! ///
//! /// This performs the same operation as an `addr as ptr` cast, but copies //! /// This performs the same operation as an `addr as ptr` cast, but copies
//! /// the *address-space* and *provenance* of `self` to the new pointer. //! /// the *provenance* of `self` to the new pointer.
//! /// This allows us to dynamically preserve and propagate this important //! /// This allows us to dynamically preserve and propagate this important
//! /// information in a way that is otherwise impossible with a unary cast. //! /// information in a way that is otherwise impossible with a unary cast.
//! /// //! ///
@ -257,23 +265,21 @@
//! //!
//! So you're still able to drop down to the address representation and do whatever //! So you're still able to drop down to the address representation and do whatever
//! clever bit tricks you want *as long as* you're able to keep around a pointer //! clever bit tricks you want *as long as* you're able to keep around a pointer
//! into the allocation you care about that can "reconstitute" the other parts of the pointer. //! into the allocation you care about that can "reconstitute" the provenance.
//! Usually this is very easy, because you only are taking a pointer, messing with the address, //! Usually this is very easy, because you only are taking a pointer, messing with the address,
//! and then immediately converting back to a pointer. To make this use case more ergonomic, //! and then immediately converting back to a pointer. To make this use case more ergonomic,
//! we provide the [`map_addr`][] method. //! we provide the [`map_addr`] method.
//! //!
//! To help make it clear that code is "following" Strict Provenance semantics, we also provide an //! To help make it clear that code is "following" Strict Provenance semantics, we also provide an
//! [`addr`][] method which promises that the returned address is not part of a //! [`addr`] method which promises that the returned address is not part of a
//! pointer-usize-pointer roundtrip. In the future we may provide a lint for pointer<->integer //! pointer-integer-pointer roundtrip. In the future we may provide a lint for pointer<->integer
//! casts to help you audit if your code conforms to strict provenance. //! casts to help you audit if your code conforms to strict provenance.
//! //!
//! //! ### Using Strict Provenance
//! ## Using Strict Provenance
//! //!
//! Most code needs no changes to conform to strict provenance, as the only really concerning //! Most code needs no changes to conform to strict provenance, as the only really concerning
//! operation that *wasn't* obviously already Undefined Behaviour is casts from usize to a //! operation is casts from usize to a pointer. For code which *does* cast a `usize` to a pointer,
//! pointer. For code which *does* cast a `usize` to a pointer, the scope of the change depends //! the scope of the change depends on exactly what you're doing.
//! on exactly what you're doing.
//! //!
//! In general, you just need to make sure that if you want to convert a `usize` address to a //! In general, you just need to make sure that if you want to convert a `usize` address to a
//! pointer and then use that pointer to read/write memory, you need to keep around a pointer //! pointer and then use that pointer to read/write memory, you need to keep around a pointer
@ -314,122 +320,65 @@
//! be using AtomicPtr instead. If that messes up the way you atomically manipulate pointers, //! be using AtomicPtr instead. If that messes up the way you atomically manipulate pointers,
//! we would like to know why, and what needs to be done to fix it.) //! we would like to know why, and what needs to be done to fix it.)
//! //!
//! Something more complicated and just generally *evil* like an XOR-List requires more significant
//! changes like allocating all nodes in a pre-allocated Vec or Arena and using a pointer
//! to the whole allocation to reconstitute the XORed addresses.
//!
//! Situations where a valid pointer *must* be created from just an address, such as baremetal code //! Situations where a valid pointer *must* be created from just an address, such as baremetal code
//! accessing a memory-mapped interface at a fixed address, are an open question on how to support. //! accessing a memory-mapped interface at a fixed address, cannot currently be handled with strict
//! These situations *will* still be allowed, but we might require some kind of "I know what I'm //! provenance APIs and should use [exposed provenance](#exposed-provenance).
//! doing" annotation to explain the situation to the compiler. It's also possible they need no
//! special attention at all, because they're generally accessing memory outside the scope of
//! "the abstract machine", or already using "I know what I'm doing" annotations like "volatile".
//!
//! Under [Strict Provenance] it is Undefined Behaviour to:
//!
//! * Access memory through a pointer that does not have provenance over that memory.
//!
//! * [`offset`] a pointer to or from an address it doesn't have provenance over.
//! This means it's always UB to offset a pointer derived from something deallocated,
//! even if the offset is 0. Note that a pointer "one past the end" of its provenance
//! is not actually outside its provenance, it just has 0 bytes it can load/store.
//!
//! But it *is* still sound to:
//!
//! * Create a pointer without provenance from just an address (see [`ptr::dangling`][]). Such a
//! pointer cannot be used for memory accesses (except for zero-sized accesses). This can still be
//! useful for sentinel values like `null` *or* to represent a tagged pointer that will never be
//! dereferenceable. In general, it is always sound for an integer to pretend to be a pointer "for
//! fun" as long as you don't use operations on it which require it to be valid (non-zero-sized
//! offset, read, write, etc).
//!
//! * Forge an allocation of size zero at any sufficiently aligned non-null address.
//! i.e. the usual "ZSTs are fake, do what you want" rules apply *but* this only applies
//! for actual forgery (integers cast to pointers). If you borrow some struct's field
//! that *happens* to be zero-sized, the resulting pointer will have provenance tied to
//! that allocation, and it will still get invalidated if the allocation gets deallocated.
//! In the future we may introduce an API to make such a forged allocation explicit.
//!
//! * [`wrapping_offset`][] a pointer outside its provenance. This includes pointers
//! which have "no" provenance. Unfortunately there may be practical limits on this for a
//! particular platform, and it's an open question as to how to specify this (if at all).
//! Notably, [CHERI][] relies on a compression scheme that can't handle a
//! pointer getting offset "too far" out of bounds. If this happens, the address
//! returned by `addr` will be the value you expect, but the provenance will get invalidated
//! and using it to read/write will fault. The details of this are architecture-specific
//! and based on alignment, but the buffer on either side of the pointer's range is pretty
//! generous (think kilobytes, not bytes).
//!
//! * Compare arbitrary pointers by address. Addresses *are* just integers and so there is
//! always a coherent answer, even if the pointers are dangling or from different
//! address-spaces/provenances. Of course, comparing addresses from different address-spaces
//! is generally going to be *meaningless*, but so is comparing Kilograms to Meters, and Rust
//! doesn't prevent that either. Similarly, if you get "lucky" and notice that a pointer
//! one-past-the-end is the "same" address as the start of an unrelated allocation, anything
//! you do with that fact is *probably* going to be gibberish. The scope of that gibberish
//! is kept under control by the fact that the two pointers *still* aren't allowed to access
//! the other's allocation (bytes), because they still have different provenance.
//!
//! * Perform pointer tagging tricks. This falls out of [`wrapping_offset`] but is worth
//! mentioning in more detail because of the limitations of [CHERI][]. Low-bit tagging
//! is very robust, and often doesn't even go out of bounds because types ensure
//! size >= align (and over-aligning actually gives CHERI more flexibility). Anything
//! more complex than this rapidly enters "extremely platform-specific" territory as
//! certain things may or may not be allowed based on specific supported operations.
//! For instance, ARM explicitly supports high-bit tagging, and so CHERI on ARM inherits
//! that and should support it.
//! //!
//! ## Exposed Provenance //! ## Exposed Provenance
//! //!
//! **This section is *non-normative* and is an extension to the [Strict Provenance] experiment.** //! As discussed above, integer-to-pointer casts are not possible with Strict Provenance APIs.
//!
//! As discussed above, pointer-usize-pointer roundtrips are not possible under [Strict Provenance].
//! This is by design: the goal of Strict Provenance is to provide a clear specification that we are //! This is by design: the goal of Strict Provenance is to provide a clear specification that we are
//! confident can be formalized unambiguously and can be subject to precise formal reasoning. //! confident can be formalized unambiguously and can be subject to precise formal reasoning.
//! Integer-to-pointer casts do not (currently) have such a clear specification.
//! //!
//! However, there exist situations where pointer-usize-pointer roundtrips cannot be avoided, or //! However, there exist situations where integer-to-pointer casts cannot be avoided, or
//! where avoiding them would require major refactoring. Legacy platform APIs also regularly assume //! where avoiding them would require major refactoring. Legacy platform APIs also regularly assume
//! that `usize` can capture all the information that makes up a pointer. The goal of Strict //! that `usize` can capture all the information that makes up a pointer.
//! Provenance is not to rule out such code; the goal is to put all the *other* pointer-manipulating //! Bare-metal platforms can also require the synthesis of a pointer "out of thin air" without
//! code onto a more solid foundation. Strict Provenance is about improving the situation where //! anywhere to obtain proper provenance from.
//! possible (all the code that can be written with Strict Provenance) without making things worse
//! for situations where Strict Provenance is insufficient.
//! //!
//! For these situations, there is a highly experimental extension to Strict Provenance called //! Rust's model for dealing with integer-to-pointer casts is called *Exposed Provenance*. However,
//! *Exposed Provenance*. This extension permits pointer-usize-pointer roundtrips. However, its //! the semantics of Exposed Provenance are on much less solid footing than Strict Provenance, and
//! semantics are on much less solid footing than Strict Provenance, and at this point it is not yet //! at this point it is not yet clear whether a satisfying unambiguous semantics can be defined for
//! clear where a satisfying unambiguous semantics can be defined for Exposed Provenance. //! Exposed Provenance. (If that sounds bad, be reassured that other popular languages that provide
//! Furthermore, Exposed Provenance will not work (well) with tools like [Miri] and [CHERI]. //! integer-to-pointer casts are not faring any better.) Furthermore, Exposed Provenance will not
//! work (well) with tools like [Miri] and [CHERI].
//! //!
//! Exposed Provenance is provided by the [`expose_provenance`] and [`with_exposed_provenance`] methods, //! Exposed Provenance is provided by the [`expose_provenance`] and [`with_exposed_provenance`] methods,
//! which are meant to replace `as` casts between pointers and integers. [`expose_provenance`] is a lot like //! which are equivalent to `as` casts between pointers and integers.
//! [`addr`], but additionally adds the provenance of the pointer to a global list of 'exposed' //! - [`expose_provenance`] is a lot like [`addr`], but additionally adds the provenance of the
//! provenances. (This list is purely conceptual, it exists for the purpose of specifying Rust but //! pointer to a global list of 'exposed' provenances. (This list is purely conceptual, it exists
//! is not materialized in actual executions, except in tools like [Miri].) [`with_exposed_provenance`] //! for the purpose of specifying Rust but is not materialized in actual executions, except in
//! can be used to construct a pointer with one of these previously 'exposed' provenances. //! tools like [Miri].)
//! [`with_exposed_provenance`] takes only `addr: usize` as arguments, so unlike in [`with_addr`] there is //! Memory which is outside the control of the Rust abstract machine (MMIO registers, for example)
//! no indication of what the correct provenance for the returned pointer is -- and that is exactly //! is always considered to be exposed, so long as this memory is disjoint from memory that will
//! what makes pointer-usize-pointer roundtrips so tricky to rigorously specify! There is no //! be used by the abstract machine such as the stack, heap, and statics.
//! algorithm that decides which provenance will be used. You can think of this as "guessing" the //! - [`with_exposed_provenance`] can be used to construct a pointer with one of these previously
//! right provenance, and the guess will be "maximally in your favor", in the sense that if there is //! 'exposed' provenances. [`with_exposed_provenance`] takes only `addr: usize` as arguments, so
//! any way to avoid undefined behavior, then that is the guess that will be taken. However, if //! unlike in [`with_addr`] there is no indication of what the correct provenance for the returned
//! there is *no* previously 'exposed' provenance that justifies the way the returned pointer will //! pointer is -- and that is exactly what makes integer-to-pointer casts so tricky to rigorously
//! be used, the program has undefined behavior. //! specify! The compiler will do its best to pick the right provenance for you, but currently we
//! cannot provide any guarantees about which provenance the resulting pointer will have. Only one
//! thing is clear: if there is *no* previously 'exposed' provenance that justifies the way the
//! returned pointer will be used, the program has undefined behavior.
//! //!
//! Using [`expose_provenance`] or [`with_exposed_provenance`] (or the `as` casts) means that code is //! If at all possible, we encourage code to be ported to [Strict Provenance] APIs, thus avoiding
//! *not* following Strict Provenance rules. The goal of the Strict Provenance experiment is to //! the need for Exposed Provenance. Maximizing the amount of such code is a major win for avoiding
//! determine how far one can get in Rust without the use of [`expose_provenance`] and //! specification complexity and to facilitate adoption of tools like [CHERI] and [Miri] that can be
//! [`with_exposed_provenance`], and to encourage code to be written with Strict Provenance APIs only. //! a big help in increasing the confidence in (unsafe) Rust code. However, we acknowledge that this
//! Maximizing the amount of such code is a major win for avoiding specification complexity and to //! is not always possible, and offer Exposed Provenance as a way to explicit "opt out" of the
//! facilitate adoption of tools like [CHERI] and [Miri] that can be a big help in increasing the //! well-defined semantics of Strict Provenance, and "opt in" to the unclear semantics of
//! confidence in (unsafe) Rust code. //! integer-to-pointer casts.
//! //!
//! [aliasing]: ../../nomicon/aliasing.html //! [aliasing]: ../../nomicon/aliasing.html
//! [allocated object]: #allocated-object
//! [provenance]: #provenance
//! [book]: ../../book/ch19-01-unsafe-rust.html#dereferencing-a-raw-pointer //! [book]: ../../book/ch19-01-unsafe-rust.html#dereferencing-a-raw-pointer
//! [ub]: ../../reference/behavior-considered-undefined.html //! [ub]: ../../reference/behavior-considered-undefined.html
//! [zst]: ../../nomicon/exotic-sizes.html#zero-sized-types-zsts //! [zst]: ../../nomicon/exotic-sizes.html#zero-sized-types-zsts
//! [atomic operations]: crate::sync::atomic //! [atomic operations]: crate::sync::atomic
//! [`offset`]: pointer::offset //! [`offset`]: pointer::offset
//! [`offset_from`]: pointer::offset_from
//! [`wrapping_offset`]: pointer::wrapping_offset //! [`wrapping_offset`]: pointer::wrapping_offset
//! [`with_addr`]: pointer::with_addr //! [`with_addr`]: pointer::with_addr
//! [`map_addr`]: pointer::map_addr //! [`map_addr`]: pointer::map_addr
@ -439,8 +388,8 @@
//! [`with_exposed_provenance`]: with_exposed_provenance //! [`with_exposed_provenance`]: with_exposed_provenance
//! [Miri]: https://github.com/rust-lang/miri //! [Miri]: https://github.com/rust-lang/miri
//! [CHERI]: https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/ //! [CHERI]: https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/
//! [Strict Provenance]: https://github.com/rust-lang/rust/issues/95228 //! [Strict Provenance]: #strict-provenance
//! [Stacked Borrows]: https://plv.mpi-sws.org/rustbelt/stacked-borrows/ //! [`UnsafeCell`]: core::cell::UnsafeCell
#![stable(feature = "rust1", since = "1.0.0")] #![stable(feature = "rust1", since = "1.0.0")]
// There are many unsafe functions taking pointers that don't dereference them. // There are many unsafe functions taking pointers that don't dereference them.
@ -629,7 +578,7 @@ pub const fn null_mut<T: ?Sized + Thin>() -> *mut T {
from_raw_parts_mut(without_provenance_mut::<()>(0), ()) from_raw_parts_mut(without_provenance_mut::<()>(0), ())
} }
/// Creates a pointer with the given address and no provenance. /// Creates a pointer with the given address and no [provenance][crate::ptr#provenance].
/// ///
/// This is equivalent to `ptr::null().with_addr(addr)`. /// This is equivalent to `ptr::null().with_addr(addr)`.
/// ///
@ -641,16 +590,15 @@ pub const fn null_mut<T: ?Sized + Thin>() -> *mut T {
/// This is different from `addr as *const T`, which creates a pointer that picks up a previously /// This is different from `addr as *const T`, which creates a pointer that picks up a previously
/// exposed provenance. See [`with_exposed_provenance`] for more details on that operation. /// exposed provenance. See [`with_exposed_provenance`] for more details on that operation.
/// ///
/// This API and its claimed semantics are part of the Strict Provenance experiment, /// This is a [Strict Provenance][crate::ptr#strict-provenance] API.
/// see the [module documentation][crate::ptr] for details.
#[inline(always)] #[inline(always)]
#[must_use] #[must_use]
#[rustc_const_stable(feature = "stable_things_using_strict_provenance", since = "1.61.0")] #[rustc_const_stable(feature = "stable_things_using_strict_provenance", since = "1.61.0")]
#[unstable(feature = "strict_provenance", issue = "95228")] #[stable(feature = "strict_provenance", since = "CURRENT_RUSTC_VERSION")]
pub const fn without_provenance<T>(addr: usize) -> *const T { pub const fn without_provenance<T>(addr: usize) -> *const T {
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. // An int-to-pointer transmute currently has exactly the intended semantics: it creates a
// We use transmute rather than a cast so tools like Miri can tell that this // pointer without provenance. Note that this is *not* a stable guarantee about transmute
// is *not* the same as with_exposed_provenance. // semantics, it relies on sysroot crates having special status.
// SAFETY: every valid integer is also a valid pointer (as long as you don't dereference that // SAFETY: every valid integer is also a valid pointer (as long as you don't dereference that
// pointer). // pointer).
unsafe { mem::transmute(addr) } unsafe { mem::transmute(addr) }
@ -668,12 +616,12 @@ pub const fn without_provenance<T>(addr: usize) -> *const T {
#[inline(always)] #[inline(always)]
#[must_use] #[must_use]
#[rustc_const_stable(feature = "stable_things_using_strict_provenance", since = "1.61.0")] #[rustc_const_stable(feature = "stable_things_using_strict_provenance", since = "1.61.0")]
#[unstable(feature = "strict_provenance", issue = "95228")] #[stable(feature = "strict_provenance", since = "CURRENT_RUSTC_VERSION")]
pub const fn dangling<T>() -> *const T { pub const fn dangling<T>() -> *const T {
without_provenance(mem::align_of::<T>()) without_provenance(mem::align_of::<T>())
} }
/// Creates a pointer with the given address and no provenance. /// Creates a pointer with the given address and no [provenance][crate::ptr#provenance].
/// ///
/// This is equivalent to `ptr::null_mut().with_addr(addr)`. /// This is equivalent to `ptr::null_mut().with_addr(addr)`.
/// ///
@ -685,16 +633,15 @@ pub const fn dangling<T>() -> *const T {
/// This is different from `addr as *mut T`, which creates a pointer that picks up a previously /// This is different from `addr as *mut T`, which creates a pointer that picks up a previously
/// exposed provenance. See [`with_exposed_provenance_mut`] for more details on that operation. /// exposed provenance. See [`with_exposed_provenance_mut`] for more details on that operation.
/// ///
/// This API and its claimed semantics are part of the Strict Provenance experiment, /// This is a [Strict Provenance][crate::ptr#strict-provenance] API.
/// see the [module documentation][crate::ptr] for details.
#[inline(always)] #[inline(always)]
#[must_use] #[must_use]
#[rustc_const_stable(feature = "stable_things_using_strict_provenance", since = "1.61.0")] #[rustc_const_stable(feature = "stable_things_using_strict_provenance", since = "1.61.0")]
#[unstable(feature = "strict_provenance", issue = "95228")] #[stable(feature = "strict_provenance", since = "CURRENT_RUSTC_VERSION")]
pub const fn without_provenance_mut<T>(addr: usize) -> *mut T { pub const fn without_provenance_mut<T>(addr: usize) -> *mut T {
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. // An int-to-pointer transmute currently has exactly the intended semantics: it creates a
// We use transmute rather than a cast so tools like Miri can tell that this // pointer without provenance. Note that this is *not* a stable guarantee about transmute
// is *not* the same as with_exposed_provenance. // semantics, it relies on sysroot crates having special status.
// SAFETY: every valid integer is also a valid pointer (as long as you don't dereference that // SAFETY: every valid integer is also a valid pointer (as long as you don't dereference that
// pointer). // pointer).
unsafe { mem::transmute(addr) } unsafe { mem::transmute(addr) }
@ -712,96 +659,88 @@ pub const fn without_provenance_mut<T>(addr: usize) -> *mut T {
#[inline(always)] #[inline(always)]
#[must_use] #[must_use]
#[rustc_const_stable(feature = "stable_things_using_strict_provenance", since = "1.61.0")] #[rustc_const_stable(feature = "stable_things_using_strict_provenance", since = "1.61.0")]
#[unstable(feature = "strict_provenance", issue = "95228")] #[stable(feature = "strict_provenance", since = "CURRENT_RUSTC_VERSION")]
pub const fn dangling_mut<T>() -> *mut T { pub const fn dangling_mut<T>() -> *mut T {
without_provenance_mut(mem::align_of::<T>()) without_provenance_mut(mem::align_of::<T>())
} }
/// Converts an address back to a pointer, picking up a previously 'exposed' provenance. /// Converts an address back to a pointer, picking up some previously 'exposed'
/// [provenance][crate::ptr#provenance].
/// ///
/// This is a more rigorously specified alternative to `addr as *const T`. The provenance of the /// This is fully equivalent to `addr as *const T`. The provenance of the returned pointer is that
/// returned pointer is that of *any* pointer that was previously exposed by passing it to /// of *some* pointer that was previously exposed by passing it to
/// [`expose_provenance`][pointer::expose_provenance], or a `ptr as usize` cast. In addition, memory which is /// [`expose_provenance`][pointer::expose_provenance], or a `ptr as usize` cast. In addition, memory
/// outside the control of the Rust abstract machine (MMIO registers, for example) is always /// which is outside the control of the Rust abstract machine (MMIO registers, for example) is
/// considered to be exposed, so long as this memory is disjoint from memory that will be used by /// always considered to be accessible with an exposed provenance, so long as this memory is disjoint
/// the abstract machine such as the stack, heap, and statics. /// from memory that will be used by the abstract machine such as the stack, heap, and statics.
/// ///
/// If there is no 'exposed' provenance that justifies the way this pointer will be used, /// The exact provenance that gets picked is not specified. The compiler will do its best to pick
/// the program has undefined behavior. In particular, the aliasing rules still apply: pointers /// the "right" provenance for you (whatever that may be), but currently we cannot provide any
/// and references that have been invalidated due to aliasing accesses cannot be used anymore, /// guarantees about which provenance the resulting pointer will have -- and therefore there
/// even if they have been exposed! /// is no definite specification for which memory the resulting pointer may access.
/// ///
/// Note that there is no algorithm that decides which provenance will be used. You can think of this /// If there is *no* previously 'exposed' provenance that justifies the way the returned pointer
/// as "guessing" the right provenance, and the guess will be "maximally in your favor", in the sense /// will be used, the program has undefined behavior. In particular, the aliasing rules still apply:
/// that if there is any way to avoid undefined behavior (while upholding all aliasing requirements), /// pointers and references that have been invalidated due to aliasing accesses cannot be used
/// then that is the guess that will be taken. /// anymore, even if they have been exposed!
/// ///
/// On platforms with multiple address spaces, it is your responsibility to ensure that the /// Due to its inherent ambiguity, this operation may not be supported by tools that help you to
/// address makes sense in the address space that this pointer will be used with. /// stay conformant with the Rust memory model. It is recommended to use [Strict
/// /// Provenance][self#strict-provenance] APIs such as [`with_addr`][pointer::with_addr] wherever
/// Using this function means that code is *not* following [Strict /// possible.
/// Provenance][self#strict-provenance] rules. "Guessing" a
/// suitable provenance complicates specification and reasoning and may not be supported by
/// tools that help you to stay conformant with the Rust memory model, so it is recommended to
/// use [`with_addr`][pointer::with_addr] wherever possible.
/// ///
/// On most platforms this will produce a value with the same bytes as the address. Platforms /// On most platforms this will produce a value with the same bytes as the address. Platforms
/// which need to store additional information in a pointer may not support this operation, /// which need to store additional information in a pointer may not support this operation,
/// since it is generally not possible to actually *compute* which provenance the returned /// since it is generally not possible to actually *compute* which provenance the returned
/// pointer has to pick up. /// pointer has to pick up.
/// ///
/// It is unclear whether this function can be given a satisfying unambiguous specification. This /// This is an [Exposed Provenance][crate::ptr#exposed-provenance] API.
/// API and its claimed semantics are part of [Exposed Provenance][self#exposed-provenance].
#[must_use] #[must_use]
#[inline(always)] #[inline(always)]
#[unstable(feature = "exposed_provenance", issue = "95228")] #[stable(feature = "exposed_provenance", since = "CURRENT_RUSTC_VERSION")]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
#[allow(fuzzy_provenance_casts)] // this *is* the explicit provenance API one should use instead #[allow(fuzzy_provenance_casts)] // this *is* the explicit provenance API one should use instead
pub fn with_exposed_provenance<T>(addr: usize) -> *const T pub fn with_exposed_provenance<T>(addr: usize) -> *const T {
where
T: Sized,
{
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
addr as *const T addr as *const T
} }
/// Converts an address back to a mutable pointer, picking up a previously 'exposed' provenance. /// Converts an address back to a mutable pointer, picking up some previously 'exposed'
/// [provenance][crate::ptr#provenance].
/// ///
/// This is a more rigorously specified alternative to `addr as *mut T`. The provenance of the /// This is fully equivalent to `addr as *mut T`. The provenance of the returned pointer is that
/// returned pointer is that of *any* pointer that was previously passed to /// of *some* pointer that was previously exposed by passing it to
/// [`expose_provenance`][pointer::expose_provenance] or a `ptr as usize` cast. If there is no previously /// [`expose_provenance`][pointer::expose_provenance], or a `ptr as usize` cast. In addition, memory
/// 'exposed' provenance that justifies the way this pointer will be used, the program has undefined /// which is outside the control of the Rust abstract machine (MMIO registers, for example) is
/// behavior. Note that there is no algorithm that decides which provenance will be used. You can /// always considered to be accessible with an exposed provenance, so long as this memory is disjoint
/// think of this as "guessing" the right provenance, and the guess will be "maximally in your /// from memory that will be used by the abstract machine such as the stack, heap, and statics.
/// favor", in the sense that if there is any way to avoid undefined behavior, then that is the
/// guess that will be taken.
/// ///
/// On platforms with multiple address spaces, it is your responsibility to ensure that the /// The exact provenance that gets picked is not specified. The compiler will do its best to pick
/// address makes sense in the address space that this pointer will be used with. /// the "right" provenance for you (whatever that may be), but currently we cannot provide any
/// guarantees about which provenance the resulting pointer will have -- and therefore there
/// is no definite specification for which memory the resulting pointer may access.
/// ///
/// Using this function means that code is *not* following [Strict /// If there is *no* previously 'exposed' provenance that justifies the way the returned pointer
/// Provenance][self#strict-provenance] rules. "Guessing" a /// will be used, the program has undefined behavior. In particular, the aliasing rules still apply:
/// suitable provenance complicates specification and reasoning and may not be supported by /// pointers and references that have been invalidated due to aliasing accesses cannot be used
/// tools that help you to stay conformant with the Rust memory model, so it is recommended to /// anymore, even if they have been exposed!
/// use [`with_addr`][pointer::with_addr] wherever possible. ///
/// Due to its inherent ambiguity, this operation may not be supported by tools that help you to
/// stay conformant with the Rust memory model. It is recommended to use [Strict
/// Provenance][self#strict-provenance] APIs such as [`with_addr`][pointer::with_addr] wherever
/// possible.
/// ///
/// On most platforms this will produce a value with the same bytes as the address. Platforms /// On most platforms this will produce a value with the same bytes as the address. Platforms
/// which need to store additional information in a pointer may not support this operation, /// which need to store additional information in a pointer may not support this operation,
/// since it is generally not possible to actually *compute* which provenance the returned /// since it is generally not possible to actually *compute* which provenance the returned
/// pointer has to pick up. /// pointer has to pick up.
/// ///
/// It is unclear whether this function can be given a satisfying unambiguous specification. This /// This is an [Exposed Provenance][crate::ptr#exposed-provenance] API.
/// API and its claimed semantics are part of [Exposed Provenance][self#exposed-provenance].
#[must_use] #[must_use]
#[inline(always)] #[inline(always)]
#[unstable(feature = "exposed_provenance", issue = "95228")] #[stable(feature = "exposed_provenance", since = "CURRENT_RUSTC_VERSION")]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
#[allow(fuzzy_provenance_casts)] // this *is* the explicit provenance API one should use instead #[allow(fuzzy_provenance_casts)] // this *is* the explicit provenance API one should use instead
pub fn with_exposed_provenance_mut<T>(addr: usize) -> *mut T pub fn with_exposed_provenance_mut<T>(addr: usize) -> *mut T {
where
T: Sized,
{
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
addr as *mut T addr as *mut T
} }

View File

@ -124,12 +124,12 @@ pub const fn cast_const(self) -> *const T {
/// Gets the "address" portion of the pointer. /// Gets the "address" portion of the pointer.
/// ///
/// This is similar to `self as usize`, which semantically discards *provenance* and /// This is similar to `self as usize`, except that the [provenance][crate::ptr#provenance] of
/// *address-space* information. However, unlike `self as usize`, casting the returned address /// the pointer is discarded and not [exposed][crate::ptr#exposed-provenance]. This means that
/// back to a pointer yields a [pointer without provenance][without_provenance_mut], which is undefined /// casting the returned address back to a pointer yields a [pointer without
/// behavior to dereference. To properly restore the lost information and obtain a /// provenance][without_provenance_mut], which is undefined behavior to dereference. To properly
/// dereferenceable pointer, use [`with_addr`][pointer::with_addr] or /// restore the lost information and obtain a dereferenceable pointer, use
/// [`map_addr`][pointer::map_addr]. /// [`with_addr`][pointer::with_addr] or [`map_addr`][pointer::map_addr].
/// ///
/// If using those APIs is not possible because there is no way to preserve a pointer with the /// If using those APIs is not possible because there is no way to preserve a pointer with the
/// required provenance, then Strict Provenance might not be for you. Use pointer-integer casts /// required provenance, then Strict Provenance might not be for you. Use pointer-integer casts
@ -143,89 +143,80 @@ pub const fn cast_const(self) -> *const T {
/// perform a change of representation to produce a value containing only the address /// perform a change of representation to produce a value containing only the address
/// portion of the pointer. What that means is up to the platform to define. /// portion of the pointer. What that means is up to the platform to define.
/// ///
/// This API and its claimed semantics are part of the Strict Provenance experiment, and as such /// This is a [Strict Provenance][crate::ptr#strict-provenance] API.
/// might change in the future (including possibly weakening this so it becomes wholly
/// equivalent to `self as usize`). See the [module documentation][crate::ptr] for details.
#[must_use] #[must_use]
#[inline(always)] #[inline(always)]
#[unstable(feature = "strict_provenance", issue = "95228")] #[stable(feature = "strict_provenance", since = "CURRENT_RUSTC_VERSION")]
pub fn addr(self) -> usize { pub fn addr(self) -> usize {
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. // A pointer-to-integer transmute currently has exactly the right semantics: it returns the
// address without exposing the provenance. Note that this is *not* a stable guarantee about
// transmute semantics, it relies on sysroot crates having special status.
// SAFETY: Pointer-to-integer transmutes are valid (if you are okay with losing the // SAFETY: Pointer-to-integer transmutes are valid (if you are okay with losing the
// provenance). // provenance).
unsafe { mem::transmute(self.cast::<()>()) } unsafe { mem::transmute(self.cast::<()>()) }
} }
/// Exposes the "provenance" part of the pointer for future use in /// Exposes the ["provenance"][crate::ptr#provenance] part of the pointer for future use in
/// [`with_exposed_provenance`][] and returns the "address" portion. /// [`with_exposed_provenance_mut`] and returns the "address" portion.
/// ///
/// This is equivalent to `self as usize`, which semantically discards *provenance* and /// This is equivalent to `self as usize`, which semantically discards provenance information.
/// *address-space* information. Furthermore, this (like the `as` cast) has the implicit /// Furthermore, this (like the `as` cast) has the implicit side-effect of marking the
/// side-effect of marking the provenance as 'exposed', so on platforms that support it you can /// provenance as 'exposed', so on platforms that support it you can later call
/// later call [`with_exposed_provenance_mut`][] to reconstitute the original pointer including its /// [`with_exposed_provenance_mut`] to reconstitute the original pointer including its provenance.
/// provenance. (Reconstructing address space information, if required, is your responsibility.)
/// ///
/// Using this method means that code is *not* following [Strict /// Due to its inherent ambiguity, [`with_exposed_provenance_mut`] may not be supported by tools
/// Provenance][super#strict-provenance] rules. Supporting /// that help you to stay conformant with the Rust memory model. It is recommended to use
/// [`with_exposed_provenance_mut`][] complicates specification and reasoning and may not be supported /// [Strict Provenance][crate::ptr#strict-provenance] APIs such as [`with_addr`][pointer::with_addr]
/// by tools that help you to stay conformant with the Rust memory model, so it is recommended /// wherever possible, in which case [`addr`][pointer::addr] should be used instead of `expose_provenance`.
/// to use [`addr`][pointer::addr] wherever possible.
/// ///
/// On most platforms this will produce a value with the same bytes as the original pointer, /// On most platforms this will produce a value with the same bytes as the original pointer,
/// because all the bytes are dedicated to describing the address. Platforms which need to store /// because all the bytes are dedicated to describing the address. Platforms which need to store
/// additional information in the pointer may not support this operation, since the 'expose' /// additional information in the pointer may not support this operation, since the 'expose'
/// side-effect which is required for [`with_exposed_provenance_mut`][] to work is typically not /// side-effect which is required for [`with_exposed_provenance_mut`] to work is typically not
/// available. /// available.
/// ///
/// It is unclear whether this method can be given a satisfying unambiguous specification. This /// This is an [Exposed Provenance][crate::ptr#exposed-provenance] API.
/// API and its claimed semantics are part of [Exposed Provenance][super#exposed-provenance].
/// ///
/// [`with_exposed_provenance_mut`]: with_exposed_provenance_mut /// [`with_exposed_provenance_mut`]: with_exposed_provenance_mut
#[inline(always)] #[inline(always)]
#[unstable(feature = "exposed_provenance", issue = "95228")] #[stable(feature = "exposed_provenance", since = "CURRENT_RUSTC_VERSION")]
pub fn expose_provenance(self) -> usize { pub fn expose_provenance(self) -> usize {
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
self.cast::<()>() as usize self.cast::<()>() as usize
} }
/// Creates a new pointer with the given address. /// Creates a new pointer with the given address and the [provenance][crate::ptr#provenance] of
/// `self`.
/// ///
/// This performs the same operation as an `addr as ptr` cast, but copies /// This is similar to a `addr as *mut T` cast, but copies
/// the *address-space* and *provenance* of `self` to the new pointer. /// the *provenance* of `self` to the new pointer.
/// This allows us to dynamically preserve and propagate this important /// This avoids the inherent ambiguity of the unary cast.
/// information in a way that is otherwise impossible with a unary cast.
/// ///
/// This is equivalent to using [`wrapping_offset`][pointer::wrapping_offset] to offset /// This is equivalent to using [`wrapping_offset`][pointer::wrapping_offset] to offset
/// `self` to the given address, and therefore has all the same capabilities and restrictions. /// `self` to the given address, and therefore has all the same capabilities and restrictions.
/// ///
/// This API and its claimed semantics are an extension to the Strict Provenance experiment, /// This is a [Strict Provenance][crate::ptr#strict-provenance] API.
/// see the [module documentation][crate::ptr] for details.
#[must_use] #[must_use]
#[inline] #[inline]
#[unstable(feature = "strict_provenance", issue = "95228")] #[stable(feature = "strict_provenance", since = "CURRENT_RUSTC_VERSION")]
pub fn with_addr(self, addr: usize) -> Self { pub fn with_addr(self, addr: usize) -> Self {
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. // This should probably be an intrinsic to avoid doing any sort of arithmetic, but
// // meanwhile, we can implement it with `wrapping_offset`, which preserves the pointer's
// In the mean-time, this operation is defined to be "as if" it was // provenance.
// a wrapping_offset, so we can emulate it as such. This should properly
// restore pointer provenance even under today's compiler.
let self_addr = self.addr() as isize; let self_addr = self.addr() as isize;
let dest_addr = addr as isize; let dest_addr = addr as isize;
let offset = dest_addr.wrapping_sub(self_addr); let offset = dest_addr.wrapping_sub(self_addr);
// This is the canonical desugaring of this operation
self.wrapping_byte_offset(offset) self.wrapping_byte_offset(offset)
} }
/// Creates a new pointer by mapping `self`'s address to a new one. /// Creates a new pointer by mapping `self`'s address to a new one, preserving the original
/// pointer's [provenance][crate::ptr#provenance].
/// ///
/// This is a convenience for [`with_addr`][pointer::with_addr], see that method for details. /// This is a convenience for [`with_addr`][pointer::with_addr], see that method for details.
/// ///
/// This API and its claimed semantics are part of the Strict Provenance experiment, /// This is a [Strict Provenance][crate::ptr#strict-provenance] API.
/// see the [module documentation][crate::ptr] for details.
#[must_use] #[must_use]
#[inline] #[inline]
#[unstable(feature = "strict_provenance", issue = "95228")] #[stable(feature = "strict_provenance", since = "CURRENT_RUSTC_VERSION")]
pub fn map_addr(self, f: impl FnOnce(usize) -> usize) -> Self { pub fn map_addr(self, f: impl FnOnce(usize) -> usize) -> Self {
self.with_addr(f(self.addr())) self.with_addr(f(self.addr()))
} }
@ -376,7 +367,7 @@ pub const fn to_raw_parts(self) -> (*mut (), <T as super::Pointee>::Metadata) {
/// * The offset in bytes, `count * size_of::<T>()`, computed on mathematical integers (without /// * The offset in bytes, `count * size_of::<T>()`, computed on mathematical integers (without
/// "wrapping around"), must fit in an `isize`. /// "wrapping around"), must fit in an `isize`.
/// ///
/// * If the computed offset is non-zero, then `self` must be derived from a pointer to some /// * If the computed offset is non-zero, then `self` must be [derived from][crate::ptr#provenance] a pointer to some
/// [allocated object], and the entire memory range between `self` and the result must be in /// [allocated object], and the entire memory range between `self` and the result must be in
/// bounds of that allocated object. In particular, this range must not "wrap around" the edge /// bounds of that allocated object. In particular, this range must not "wrap around" the edge
/// of the address space. /// of the address space.
@ -777,7 +768,7 @@ pub const fn guaranteed_ne(self, other: *mut T) -> Option<bool>
/// * `self` and `origin` must either /// * `self` and `origin` must either
/// ///
/// * point to the same address, or /// * point to the same address, or
/// * both be *derived from* a pointer to the same [allocated object], and the memory range between /// * both be [derived from][crate::ptr#provenance] a pointer to the same [allocated object], and the memory range between
/// the two pointers must be in bounds of that object. (See below for an example.) /// the two pointers must be in bounds of that object. (See below for an example.)
/// ///
/// * The distance between the pointers, in bytes, must be an exact multiple /// * The distance between the pointers, in bytes, must be an exact multiple
@ -954,7 +945,7 @@ pub const fn guaranteed_ne(self, other: *mut T) -> Option<bool>
/// * The offset in bytes, `count * size_of::<T>()`, computed on mathematical integers (without /// * The offset in bytes, `count * size_of::<T>()`, computed on mathematical integers (without
/// "wrapping around"), must fit in an `isize`. /// "wrapping around"), must fit in an `isize`.
/// ///
/// * If the computed offset is non-zero, then `self` must be derived from a pointer to some /// * If the computed offset is non-zero, then `self` must be [derived from][crate::ptr#provenance] a pointer to some
/// [allocated object], and the entire memory range between `self` and the result must be in /// [allocated object], and the entire memory range between `self` and the result must be in
/// bounds of that allocated object. In particular, this range must not "wrap around" the edge /// bounds of that allocated object. In particular, this range must not "wrap around" the edge
/// of the address space. /// of the address space.
@ -1061,7 +1052,7 @@ const fn comptime(_: *const (), _: usize, _: usize) -> bool {
/// * The offset in bytes, `count * size_of::<T>()`, computed on mathematical integers (without /// * The offset in bytes, `count * size_of::<T>()`, computed on mathematical integers (without
/// "wrapping around"), must fit in an `isize`. /// "wrapping around"), must fit in an `isize`.
/// ///
/// * If the computed offset is non-zero, then `self` must be derived from a pointer to some /// * If the computed offset is non-zero, then `self` must be [derived from][crate::ptr#provenance] a pointer to some
/// [allocated object], and the entire memory range between `self` and the result must be in /// [allocated object], and the entire memory range between `self` and the result must be in
/// bounds of that allocated object. In particular, this range must not "wrap around" the edge /// bounds of that allocated object. In particular, this range must not "wrap around" the edge
/// of the address space. /// of the address space.

View File

@ -283,40 +283,39 @@ pub const fn to_raw_parts(self) -> (NonNull<()>, <T as super::Pointee>::Metadata
/// ///
/// For more details see the equivalent method on a raw pointer, [`pointer::addr`]. /// For more details see the equivalent method on a raw pointer, [`pointer::addr`].
/// ///
/// This API and its claimed semantics are part of the Strict Provenance experiment, /// This is a [Strict Provenance][crate::ptr#strict-provenance] API.
/// see the [`ptr` module documentation][crate::ptr].
#[must_use] #[must_use]
#[inline] #[inline]
#[unstable(feature = "strict_provenance", issue = "95228")] #[stable(feature = "strict_provenance", since = "CURRENT_RUSTC_VERSION")]
pub fn addr(self) -> NonZero<usize> { pub fn addr(self) -> NonZero<usize> {
// SAFETY: The pointer is guaranteed by the type to be non-null, // SAFETY: The pointer is guaranteed by the type to be non-null,
// meaning that the address will be non-zero. // meaning that the address will be non-zero.
unsafe { NonZero::new_unchecked(self.pointer.addr()) } unsafe { NonZero::new_unchecked(self.pointer.addr()) }
} }
/// Creates a new pointer with the given address. /// Creates a new pointer with the given address and the [provenance][crate::ptr#provenance] of
/// `self`.
/// ///
/// For more details see the equivalent method on a raw pointer, [`pointer::with_addr`]. /// For more details see the equivalent method on a raw pointer, [`pointer::with_addr`].
/// ///
/// This API and its claimed semantics are part of the Strict Provenance experiment, /// This is a [Strict Provenance][crate::ptr#strict-provenance] API.
/// see the [`ptr` module documentation][crate::ptr].
#[must_use] #[must_use]
#[inline] #[inline]
#[unstable(feature = "strict_provenance", issue = "95228")] #[stable(feature = "strict_provenance", since = "CURRENT_RUSTC_VERSION")]
pub fn with_addr(self, addr: NonZero<usize>) -> Self { pub fn with_addr(self, addr: NonZero<usize>) -> Self {
// SAFETY: The result of `ptr::from::with_addr` is non-null because `addr` is guaranteed to be non-zero. // SAFETY: The result of `ptr::from::with_addr` is non-null because `addr` is guaranteed to be non-zero.
unsafe { NonNull::new_unchecked(self.pointer.with_addr(addr.get()) as *mut _) } unsafe { NonNull::new_unchecked(self.pointer.with_addr(addr.get()) as *mut _) }
} }
/// Creates a new pointer by mapping `self`'s address to a new one. /// Creates a new pointer by mapping `self`'s address to a new one, preserving the
/// [provenance][crate::ptr#provenance] of `self`.
/// ///
/// For more details see the equivalent method on a raw pointer, [`pointer::map_addr`]. /// For more details see the equivalent method on a raw pointer, [`pointer::map_addr`].
/// ///
/// This API and its claimed semantics are part of the Strict Provenance experiment, /// This is a [Strict Provenance][crate::ptr#strict-provenance] API.
/// see the [`ptr` module documentation][crate::ptr].
#[must_use] #[must_use]
#[inline] #[inline]
#[unstable(feature = "strict_provenance", issue = "95228")] #[stable(feature = "strict_provenance", since = "CURRENT_RUSTC_VERSION")]
pub fn map_addr(self, f: impl FnOnce(NonZero<usize>) -> NonZero<usize>) -> Self { pub fn map_addr(self, f: impl FnOnce(NonZero<usize>) -> NonZero<usize>) -> Self {
self.with_addr(f(self.addr())) self.with_addr(f(self.addr()))
} }