Rollup merge of #129684 - Strophox:miri-pass-pointer-to-ffi, r=RalfJung

Enable Miri to pass pointers through FFI

Following https://github.com/rust-lang/rust/pull/126787, the purpose of this PR is to now enable Miri to execute native calls that make use of pointers.

> <details>
>
> <summary> Simple example </summary>
>
> ```rust
> extern "C" {
>     fn ptr_printer(ptr: *mut i32);
> }
>
> fn main() {
>     let ptr = &mut 42 as *mut i32;
>     unsafe {
>         ptr_printer(ptr);
>     }
> }
> ```
> ```c
> void ptr_printer(int *ptr) {
>   printf("printing pointer dereference from C: %d\n", *ptr);
> }
> ```
> should now show `printing pointer dereference from C: 42`.
>
> </details>

Note that this PR does not yet implement any logic involved in updating Miri's "analysis" state (byte initialization, provenance) upon such a native call.

r? ``@RalfJung``
This commit is contained in:
Matthias Krüger 2024-08-31 20:36:25 +02:00 committed by GitHub
commit a5fb8b90bf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 260 additions and 17 deletions

View File

@ -362,17 +362,7 @@ fn adjust_global_allocation<'b>(
ecx: &InterpCx<'tcx, Self>,
id: AllocId,
alloc: &'b Allocation,
) -> InterpResult<'tcx, Cow<'b, Allocation<Self::Provenance, Self::AllocExtra, Self::Bytes>>>
{
// The default implementation does a copy; CTFE machines have a more efficient implementation
// based on their particular choice for `Provenance`, `AllocExtra`, and `Bytes`.
let kind = Self::GLOBAL_KIND
.expect("if GLOBAL_KIND is None, adjust_global_allocation must be overwritten");
let alloc = alloc.adjust_from_tcx(&ecx.tcx, |ptr| ecx.global_root_pointer(ptr))?;
let extra =
Self::init_alloc_extra(ecx, id, MemoryKind::Machine(kind), alloc.size(), alloc.align)?;
Ok(Cow::Owned(alloc.with_extra(extra)))
}
) -> InterpResult<'tcx, Cow<'b, Allocation<Self::Provenance, Self::AllocExtra, Self::Bytes>>>;
/// Initialize the extra state of an allocation.
///

View File

@ -358,10 +358,11 @@ impl Allocation {
pub fn adjust_from_tcx<Prov: Provenance, Bytes: AllocBytes, Err>(
&self,
cx: &impl HasDataLayout,
mut alloc_bytes: impl FnMut(&[u8], Align) -> Result<Bytes, Err>,
mut adjust_ptr: impl FnMut(Pointer<CtfeProvenance>) -> Result<Pointer<Prov>, Err>,
) -> Result<Allocation<Prov, (), Bytes>, Err> {
// Copy the data.
let mut bytes = Bytes::from_bytes(Cow::Borrowed(&*self.bytes), self.align);
let mut bytes = alloc_bytes(&*self.bytes, self.align)?;
// Adjust provenance of pointers stored in this allocation.
let mut new_provenance = Vec::with_capacity(self.provenance.ptrs().len());
let ptr_size = cx.data_layout().pointer_size.bytes_usize();

View File

@ -42,6 +42,11 @@ pub struct GlobalStateInner {
/// they do not have an `AllocExtra`.
/// This is the inverse of `int_to_ptr_map`.
base_addr: FxHashMap<AllocId, u64>,
/// Temporarily store prepared memory space for global allocations the first time their memory
/// address is required. This is used to ensure that the memory is allocated before Miri assigns
/// it an internal address, which is important for matching the internal address to the machine
/// address so FFI can read from pointers.
prepared_alloc_bytes: FxHashMap<AllocId, MiriAllocBytes>,
/// A pool of addresses we can reuse for future allocations.
reuse: ReusePool,
/// Whether an allocation has been exposed or not. This cannot be put
@ -59,6 +64,7 @@ fn visit_provenance(&self, _visit: &mut VisitWith<'_>) {
let GlobalStateInner {
int_to_ptr_map: _,
base_addr: _,
prepared_alloc_bytes: _,
reuse: _,
exposed: _,
next_base_addr: _,
@ -78,6 +84,7 @@ pub fn new(config: &MiriConfig, stack_addr: u64) -> Self {
GlobalStateInner {
int_to_ptr_map: Vec::default(),
base_addr: FxHashMap::default(),
prepared_alloc_bytes: FxHashMap::default(),
reuse: ReusePool::new(config),
exposed: FxHashSet::default(),
next_base_addr: stack_addr,
@ -166,7 +173,39 @@ fn addr_from_alloc_id(
assert!(!matches!(kind, AllocKind::Dead));
// This allocation does not have a base address yet, pick or reuse one.
let base_addr = if let Some((reuse_addr, clock)) = global_state.reuse.take_addr(
let base_addr = if ecx.machine.native_lib.is_some() {
// In native lib mode, we use the "real" address of the bytes for this allocation.
// This ensures the interpreted program and native code have the same view of memory.
match kind {
AllocKind::LiveData => {
let ptr = if ecx.tcx.try_get_global_alloc(alloc_id).is_some() {
// For new global allocations, we always pre-allocate the memory to be able use the machine address directly.
let prepared_bytes = MiriAllocBytes::zeroed(size, align)
.unwrap_or_else(|| {
panic!("Miri ran out of memory: cannot create allocation of {size:?} bytes")
});
let ptr = prepared_bytes.as_ptr();
// Store prepared allocation space to be picked up for use later.
global_state.prepared_alloc_bytes.try_insert(alloc_id, prepared_bytes).unwrap();
ptr
} else {
ecx.get_alloc_bytes_unchecked_raw(alloc_id)?
};
// Ensure this pointer's provenance is exposed, so that it can be used by FFI code.
ptr.expose_provenance().try_into().unwrap()
}
AllocKind::Function | AllocKind::VTable => {
// Allocate some dummy memory to get a unique address for this function/vtable.
let alloc_bytes = MiriAllocBytes::from_bytes(&[0u8; 1], Align::from_bytes(1).unwrap());
// We don't need to expose these bytes as nobody is allowed to access them.
let addr = alloc_bytes.as_ptr().addr().try_into().unwrap();
// Leak the underlying memory to ensure it remains unique.
std::mem::forget(alloc_bytes);
addr
}
AllocKind::Dead => unreachable!()
}
} else if let Some((reuse_addr, clock)) = global_state.reuse.take_addr(
&mut *rng,
size,
align,
@ -318,6 +357,33 @@ fn adjust_alloc_root_pointer(
Ok(base_ptr.wrapping_offset(offset, ecx))
}
// This returns some prepared `MiriAllocBytes`, either because `addr_from_alloc_id` reserved
// memory space in the past, or by doing the pre-allocation right upon being called.
fn get_global_alloc_bytes(&self, id: AllocId, kind: MemoryKind, bytes: &[u8], align: Align) -> InterpResult<'tcx, MiriAllocBytes> {
let ecx = self.eval_context_ref();
Ok(if ecx.machine.native_lib.is_some() {
// In native lib mode, MiriAllocBytes for global allocations are handled via `prepared_alloc_bytes`.
// This additional call ensures that some `MiriAllocBytes` are always prepared.
ecx.addr_from_alloc_id(id, kind)?;
let mut global_state = ecx.machine.alloc_addresses.borrow_mut();
// The memory we need here will have already been allocated during an earlier call to
// `addr_from_alloc_id` for this allocation. So don't create a new `MiriAllocBytes` here, instead
// fetch the previously prepared bytes from `prepared_alloc_bytes`.
let mut prepared_alloc_bytes = global_state
.prepared_alloc_bytes
.remove(&id)
.unwrap_or_else(|| panic!("alloc bytes for {id:?} have not been prepared"));
// Sanity-check that the prepared allocation has the right size and alignment.
assert!(prepared_alloc_bytes.as_ptr().is_aligned_to(align.bytes_usize()));
assert_eq!(prepared_alloc_bytes.len(), bytes.len());
// Copy allocation contents into prepared memory.
prepared_alloc_bytes.copy_from_slice(bytes);
prepared_alloc_bytes
} else {
MiriAllocBytes::from_bytes(std::borrow::Cow::Borrowed(&*bytes), align)
})
}
/// When a pointer is used for a memory access, this computes where in which allocation the
/// access is going.
fn ptr_get_alloc(

View File

@ -887,7 +887,7 @@ fn get_or_create_thread_local_alloc(
let alloc = this.ctfe_query(|tcx| tcx.eval_static_initializer(def_id))?;
// We make a full copy of this allocation.
let mut alloc =
alloc.inner().adjust_from_tcx(&this.tcx, |ptr| this.global_root_pointer(ptr))?;
alloc.inner().adjust_from_tcx(&this.tcx, |bytes, align| Ok(MiriAllocBytes::from_bytes(std::borrow::Cow::Borrowed(bytes), align)), |ptr| this.global_root_pointer(ptr))?;
// This allocation will be deallocated when the thread dies, so it is not in read-only memory.
alloc.mutability = Mutability::Mut;
// Create a fresh allocation with this content.

View File

@ -12,6 +12,9 @@
#![feature(let_chains)]
#![feature(trait_upcasting)]
#![feature(strict_overflow_ops)]
#![feature(strict_provenance)]
#![feature(exposed_provenance)]
#![feature(pointer_is_aligned_to)]
// Configure clippy and other lints
#![allow(
clippy::collapsible_else_if,

View File

@ -1,6 +1,7 @@
//! Global machine state as well as implementation of the interpreter engine
//! `Machine` trait.
use std::borrow::Cow;
use std::cell::RefCell;
use std::collections::hash_map::Entry;
use std::fmt;
@ -1261,6 +1262,30 @@ fn ptr_get_alloc(
})
}
/// Called to adjust global allocations to the Provenance and AllocExtra of this machine.
///
/// If `alloc` contains pointers, then they are all pointing to globals.
///
/// This should avoid copying if no work has to be done! If this returns an owned
/// allocation (because a copy had to be done to adjust things), machine memory will
/// cache the result. (This relies on `AllocMap::get_or` being able to add the
/// owned allocation to the map even when the map is shared.)
fn adjust_global_allocation<'b>(
ecx: &InterpCx<'tcx, Self>,
id: AllocId,
alloc: &'b Allocation,
) -> InterpResult<'tcx, Cow<'b, Allocation<Self::Provenance, Self::AllocExtra, Self::Bytes>>>
{
let kind = Self::GLOBAL_KIND.unwrap().into();
let alloc = alloc.adjust_from_tcx(&ecx.tcx,
|bytes, align| ecx.get_global_alloc_bytes(id, kind, bytes, align),
|ptr| ecx.global_root_pointer(ptr),
)?;
let extra =
Self::init_alloc_extra(ecx, id, kind, alloc.size(), alloc.align)?;
Ok(Cow::Owned(alloc.with_extra(extra)))
}
#[inline(always)]
fn before_memory_read(
_tcx: TyCtxtAt<'tcx>,

View File

@ -194,6 +194,8 @@ enum CArg {
UInt64(u64),
/// usize.
USize(usize),
/// Raw pointer, stored as C's `void*`.
RawPtr(*mut std::ffi::c_void),
}
impl<'a> CArg {
@ -210,6 +212,7 @@ fn arg_downcast(&'a self) -> libffi::high::Arg<'a> {
CArg::UInt32(i) => ffi::arg(i),
CArg::UInt64(i) => ffi::arg(i),
CArg::USize(i) => ffi::arg(i),
CArg::RawPtr(i) => ffi::arg(i),
}
}
}
@ -234,6 +237,16 @@ fn imm_to_carg<'tcx>(v: ImmTy<'tcx>, cx: &impl HasDataLayout) -> InterpResult<'t
ty::Uint(UintTy::U64) => CArg::UInt64(v.to_scalar().to_u64()?),
ty::Uint(UintTy::Usize) =>
CArg::USize(v.to_scalar().to_target_usize(cx)?.try_into().unwrap()),
ty::RawPtr(_, mutability) => {
// Arbitrary mutable pointer accesses are not currently supported in Miri.
if mutability.is_mut() {
throw_unsup_format!("unsupported mutable pointer type for native call: {}", v.layout.ty);
} else {
let s = v.to_scalar().to_pointer(cx)?.addr();
// This relies on the `expose_provenance` in `addr_from_alloc_id`.
CArg::RawPtr(std::ptr::with_exposed_provenance_mut(s.bytes_usize()))
}
},
_ => throw_unsup_format!("unsupported argument type for native call: {}", v.layout.ty),
})
}

View File

@ -1,12 +1,20 @@
CODEABI_1.0 {
# Define which symbols to export.
global:
# scalar_arguments.c
add_one_int;
printer;
test_stack_spill;
get_unsigned_int;
add_int16;
add_short_to_long;
# ptr_read_access.c
print_pointer;
access_simple;
access_nested;
access_static;
# The rest remains private.
local: *;
};

View File

@ -0,0 +1,82 @@
//@only-target-linux
//@only-on-host
fn main() {
test_pointer();
test_simple();
test_nested();
test_static();
}
// Test void function that dereferences a pointer and prints its contents from C.
fn test_pointer() {
extern "C" {
fn print_pointer(ptr: *const i32);
}
let x = 42;
unsafe { print_pointer(&x) };
}
// Test function that dereferences a simple struct pointer and accesses a field.
fn test_simple() {
#[repr(C)]
struct Simple {
field: i32
}
extern "C" {
fn access_simple(s_ptr: *const Simple) -> i32;
}
let simple = Simple { field: -42 };
assert_eq!(unsafe { access_simple(&simple) }, -42);
}
// Test function that dereferences nested struct pointers and accesses fields.
fn test_nested() {
use std::ptr::NonNull;
#[derive(Debug, PartialEq, Eq)]
#[repr(C)]
struct Nested {
value: i32,
next: Option<NonNull<Nested>>,
}
extern "C" {
fn access_nested(n_ptr: *const Nested) -> i32;
}
let mut nested_0 = Nested { value: 97, next: None };
let mut nested_1 = Nested { value: 98, next: NonNull::new(&mut nested_0) };
let nested_2 = Nested { value: 99, next: NonNull::new(&mut nested_1) };
assert_eq!(unsafe { access_nested(&nested_2) }, 97);
}
// Test function that dereferences static struct pointers and accesses fields.
fn test_static() {
#[repr(C)]
struct Static {
value: i32,
recurse: &'static Static,
}
extern "C" {
fn access_static(n_ptr: *const Static) -> i32;
}
static STATIC: Static = Static {
value: 9001,
recurse: &STATIC,
};
assert_eq!(unsafe { access_static(&STATIC) }, 9001);
}

View File

@ -0,0 +1 @@
printing pointer dereference from C: 42

View File

@ -0,0 +1,47 @@
#include <stdio.h>
/* Test: test_pointer */
void print_pointer(const int *ptr) {
printf("printing pointer dereference from C: %d\n", *ptr);
}
/* Test: test_simple */
typedef struct Simple {
int field;
} Simple;
int access_simple(const Simple *s_ptr) {
return s_ptr->field;
}
/* Test: test_nested */
typedef struct Nested {
int value;
struct Nested *next;
} Nested;
// Returns the innermost/last value of a Nested pointer chain.
int access_nested(const Nested *n_ptr) {
// Edge case: `n_ptr == NULL` (i.e. first Nested is None).
if (!n_ptr) { return 0; }
while (n_ptr->next) {
n_ptr = n_ptr->next;
}
return n_ptr->value;
}
/* Test: test_static */
typedef struct Static {
int value;
struct Static *recurse;
} Static;
int access_static(const Static *s_ptr) {
return s_ptr->recurse->recurse->value;
}

View File

@ -36,18 +36,25 @@ fn build_native_lib() -> PathBuf {
// Create the directory if it does not already exist.
std::fs::create_dir_all(&so_target_dir)
.expect("Failed to create directory for shared object file");
let so_file_path = so_target_dir.join("libtestlib.so");
let so_file_path = so_target_dir.join("native-lib.so");
let cc_output = Command::new(cc)
.args([
"-shared",
"-o",
so_file_path.to_str().unwrap(),
"tests/native-lib/test.c",
// FIXME: Automate gathering of all relevant C source files in the directory.
"tests/native-lib/scalar_arguments.c",
"tests/native-lib/ptr_read_access.c",
// Only add the functions specified in libcode.version to the shared object file.
// This is to avoid automatically adding `malloc`, etc.
// Source: https://anadoxin.org/blog/control-over-symbol-exports-in-gcc.html/
"-fPIC",
"-Wl,--version-script=tests/native-lib/libtest.map",
"-Wl,--version-script=tests/native-lib/native-lib.map",
// Ensure we notice serious problems in the C code.
"-Wall",
"-Wextra",
"-Wpedantic",
"-Werror",
])
.output()
.expect("failed to generate shared object file for testing native function calls");