Miri: run .CRT$XLB linker section on thread-end

This commit is contained in:
Ralf Jung 2024-04-14 18:56:31 +02:00
parent 3305e71095
commit 5934aaaa97
7 changed files with 182 additions and 210 deletions

View File

@ -276,6 +276,7 @@ unsafe fn register_dtor(key: &'static StaticKey) {
// the address of the symbol to ensure it sticks around.
#[link_section = ".CRT$XLB"]
#[cfg_attr(miri, used)] // Miri only considers explicitly `#[used]` statics for `lookup_link_section`
pub static p_thread_callback: unsafe extern "system" fn(c::LPVOID, c::DWORD, c::LPVOID) =
on_tls_callback;

View File

@ -137,8 +137,7 @@ fn config(&mut self, config: &mut Config) {
config.override_queries = Some(|_, local_providers| {
// `exported_symbols` and `reachable_non_generics` provided by rustc always returns
// an empty result if `tcx.sess.opts.output_types.should_codegen()` is false.
// In addition we need to add #[used] symbols to exported_symbols for .init_array
// handling.
// In addition we need to add #[used] symbols to exported_symbols for `lookup_link_section`.
local_providers.exported_symbols = |tcx, LocalCrate| {
let reachable_set = tcx.with_stable_hashing_context(|hcx| {
tcx.reachable_set(()).to_sorted(&hcx, true)

View File

@ -158,7 +158,7 @@ pub struct Thread<'mir, 'tcx> {
}
pub type StackEmptyCallback<'mir, 'tcx> =
Box<dyn FnMut(&mut MiriInterpCx<'mir, 'tcx>) -> InterpResult<'tcx, Poll<()>>>;
Box<dyn FnMut(&mut MiriInterpCx<'mir, 'tcx>) -> InterpResult<'tcx, Poll<()>> + 'tcx>;
impl<'mir, 'tcx> Thread<'mir, 'tcx> {
/// Get the name of the current thread if it was set.

View File

@ -192,18 +192,18 @@ fn default() -> MiriConfig {
/// The state of the main thread. Implementation detail of `on_main_stack_empty`.
#[derive(Default, Debug)]
enum MainThreadState {
enum MainThreadState<'tcx> {
#[default]
Running,
TlsDtors(tls::TlsDtorsState),
TlsDtors(tls::TlsDtorsState<'tcx>),
Yield {
remaining: u32,
},
Done,
}
impl MainThreadState {
fn on_main_stack_empty<'tcx>(
impl<'tcx> MainThreadState<'tcx> {
fn on_main_stack_empty(
&mut self,
this: &mut MiriInterpCx<'_, 'tcx>,
) -> InterpResult<'tcx, Poll<()>> {

View File

@ -9,16 +9,21 @@
use rustc_apfloat::ieee::{Double, Single};
use rustc_apfloat::Float;
use rustc_hir::def::{DefKind, Namespace};
use rustc_hir::def_id::{DefId, CRATE_DEF_INDEX};
use rustc_hir::{
def::{DefKind, Namespace},
def_id::{CrateNum, DefId, CRATE_DEF_INDEX, LOCAL_CRATE},
};
use rustc_index::IndexVec;
use rustc_middle::middle::dependency_format::Linkage;
use rustc_middle::middle::exported_symbols::ExportedSymbol;
use rustc_middle::mir;
use rustc_middle::ty::{
self,
layout::{LayoutOf, TyAndLayout},
FloatTy, IntTy, Ty, TyCtxt, UintTy,
};
use rustc_span::{def_id::CrateNum, sym, Span, Symbol};
use rustc_session::config::CrateType;
use rustc_span::{sym, Span, Symbol};
use rustc_target::abi::{Align, FieldIdx, FieldsShape, Size, Variants};
use rustc_target::spec::abi::Abi;
@ -142,6 +147,38 @@ fn find_children<'tcx: 'a, 'a>(
None
}
/// Call `f` for each exported symbol.
pub fn iter_exported_symbols<'tcx>(
tcx: TyCtxt<'tcx>,
mut f: impl FnMut(CrateNum, DefId) -> InterpResult<'tcx>,
) -> InterpResult<'tcx> {
// `dependency_formats` includes all the transitive informations needed to link a crate,
// which is what we need here since we need to dig out `exported_symbols` from all transitive
// dependencies.
let dependency_formats = tcx.dependency_formats(());
let dependency_format = dependency_formats
.iter()
.find(|(crate_type, _)| *crate_type == CrateType::Executable)
.expect("interpreting a non-executable crate");
for cnum in iter::once(LOCAL_CRATE).chain(dependency_format.1.iter().enumerate().filter_map(
|(num, &linkage)| {
// We add 1 to the number because that's what rustc also does everywhere it
// calls `CrateNum::new`...
#[allow(clippy::arithmetic_side_effects)]
(linkage != Linkage::NotLinked).then_some(CrateNum::new(num + 1))
},
)) {
// We can ignore `_export_info` here: we are a Rust crate, and everything is exported
// from a Rust crate.
for &(symbol, _export_info) in tcx.exported_symbols(cnum) {
if let ExportedSymbol::NonGeneric(def_id) = symbol {
f(cnum, def_id)?;
}
}
}
Ok(())
}
/// Convert a softfloat type to its corresponding hostfloat type.
pub trait ToHost {
type HostFloat;
@ -1180,6 +1217,37 @@ fn expect_target_feature_for_intrinsic(
}
Ok(())
}
/// Lookup an array of immediates stored as a linker section of name `name`.
fn lookup_link_section(
&mut self,
name: &str,
) -> InterpResult<'tcx, Vec<ImmTy<'tcx, Provenance>>> {
let this = self.eval_context_mut();
let tcx = this.tcx.tcx;
let mut array = vec![];
iter_exported_symbols(tcx, |_cnum, def_id| {
let attrs = tcx.codegen_fn_attrs(def_id);
let Some(link_section) = attrs.link_section else {
return Ok(());
};
if link_section.as_str() == name {
let instance = ty::Instance::mono(tcx, def_id);
let const_val = this.eval_global(instance).unwrap_or_else(|err| {
panic!(
"failed to evaluate static in required link_section: {def_id:?}\n{err:?}"
)
});
let val = this.read_immediate(&const_val)?;
array.push(val);
}
Ok(())
})?;
Ok(array)
}
}
impl<'mir, 'tcx> MiriMachine<'mir, 'tcx> {

View File

@ -2,17 +2,10 @@
use rustc_apfloat::Float;
use rustc_ast::expand::allocator::AllocatorKind;
use rustc_hir::{
def::DefKind,
def_id::{CrateNum, LOCAL_CRATE},
};
use rustc_middle::middle::{
codegen_fn_attrs::CodegenFnAttrFlags, dependency_format::Linkage,
exported_symbols::ExportedSymbol,
};
use rustc_hir::{def::DefKind, def_id::CrateNum};
use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
use rustc_middle::mir;
use rustc_middle::ty;
use rustc_session::config::CrateType;
use rustc_span::Symbol;
use rustc_target::{
abi::{Align, Size},
@ -158,81 +151,6 @@ fn emulate_dyn_sym(
Ok(())
}
fn lookup_init_array(&mut self) -> InterpResult<'tcx, Vec<ty::Instance<'tcx>>> {
let this = self.eval_context_mut();
let tcx = this.tcx.tcx;
let mut init_arrays = vec![];
let dependency_formats = tcx.dependency_formats(());
let dependency_format = dependency_formats
.iter()
.find(|(crate_type, _)| *crate_type == CrateType::Executable)
.expect("interpreting a non-executable crate");
for cnum in iter::once(LOCAL_CRATE).chain(
dependency_format.1.iter().enumerate().filter_map(|(num, &linkage)| {
// We add 1 to the number because that's what rustc also does everywhere it
// calls `CrateNum::new`...
#[allow(clippy::arithmetic_side_effects)]
(linkage != Linkage::NotLinked).then_some(CrateNum::new(num + 1))
}),
) {
for &(symbol, _export_info) in tcx.exported_symbols(cnum) {
if let ExportedSymbol::NonGeneric(def_id) = symbol {
let attrs = tcx.codegen_fn_attrs(def_id);
let link_section = if let Some(link_section) = attrs.link_section {
if !link_section.as_str().starts_with(".init_array") {
continue;
}
link_section
} else {
continue;
};
init_arrays.push((link_section, def_id));
}
}
}
init_arrays.sort_by(|(a, _), (b, _)| a.as_str().cmp(b.as_str()));
let endianness = tcx.data_layout.endian;
let ptr_size = tcx.data_layout.pointer_size;
let mut init_array = vec![];
for (_, def_id) in init_arrays {
let alloc = tcx.eval_static_initializer(def_id)?.inner();
let mut expected_offset = Size::ZERO;
for &(offset, prov) in alloc.provenance().ptrs().iter() {
if offset != expected_offset {
throw_ub_format!(".init_array.* may not contain any non-function pointer data");
}
expected_offset += ptr_size;
let alloc_id = prov.alloc_id();
let reloc_target_alloc = tcx.global_alloc(alloc_id);
match reloc_target_alloc {
GlobalAlloc::Function(instance) => {
let addend = {
let offset = offset.bytes() as usize;
let bytes = &alloc.inspect_with_uninit_and_ptr_outside_interpreter(
offset..offset + ptr_size.bytes() as usize,
);
read_target_uint(endianness, bytes).unwrap()
};
assert_eq!(addend, 0);
init_array.push(instance);
}
_ => throw_ub_format!(".init_array.* member is not a function pointer"),
}
}
}
Ok(init_array)
}
/// Lookup the body of a function that has `link_name` as the symbol name.
fn lookup_exported_symbol(
&mut self,
@ -249,74 +167,48 @@ fn lookup_exported_symbol(
Entry::Vacant(e) => {
// Find it if it was not cached.
let mut instance_and_crate: Option<(ty::Instance<'_>, CrateNum)> = None;
// `dependency_formats` includes all the transitive informations needed to link a crate,
// which is what we need here since we need to dig out `exported_symbols` from all transitive
// dependencies.
let dependency_formats = tcx.dependency_formats(());
let dependency_format = dependency_formats
.iter()
.find(|(crate_type, _)| *crate_type == CrateType::Executable)
.expect("interpreting a non-executable crate");
for cnum in iter::once(LOCAL_CRATE).chain(
dependency_format.1.iter().enumerate().filter_map(|(num, &linkage)| {
// We add 1 to the number because that's what rustc also does everywhere it
// calls `CrateNum::new`...
#[allow(clippy::arithmetic_side_effects)]
(linkage != Linkage::NotLinked).then_some(CrateNum::new(num + 1))
}),
) {
// We can ignore `_export_info` here: we are a Rust crate, and everything is exported
// from a Rust crate.
for &(symbol, _export_info) in tcx.exported_symbols(cnum) {
if let ExportedSymbol::NonGeneric(def_id) = symbol {
let attrs = tcx.codegen_fn_attrs(def_id);
let symbol_name = if let Some(export_name) = attrs.export_name {
export_name
} else if attrs.flags.contains(CodegenFnAttrFlags::NO_MANGLE) {
tcx.item_name(def_id)
helpers::iter_exported_symbols(tcx, |cnum, def_id| {
let attrs = tcx.codegen_fn_attrs(def_id);
let symbol_name = if let Some(export_name) = attrs.export_name {
export_name
} else if attrs.flags.contains(CodegenFnAttrFlags::NO_MANGLE) {
tcx.item_name(def_id)
} else {
// Skip over items without an explicitly defined symbol name.
return Ok(());
};
if symbol_name == link_name {
if let Some((original_instance, original_cnum)) = instance_and_crate {
// Make sure we are consistent wrt what is 'first' and 'second'.
let original_span = tcx.def_span(original_instance.def_id()).data();
let span = tcx.def_span(def_id).data();
if original_span < span {
throw_machine_stop!(TerminationInfo::MultipleSymbolDefinitions {
link_name,
first: original_span,
first_crate: tcx.crate_name(original_cnum),
second: span,
second_crate: tcx.crate_name(cnum),
});
} else {
// Skip over items without an explicitly defined symbol name.
continue;
};
if symbol_name == link_name {
if let Some((original_instance, original_cnum)) = instance_and_crate
{
// Make sure we are consistent wrt what is 'first' and 'second'.
let original_span =
tcx.def_span(original_instance.def_id()).data();
let span = tcx.def_span(def_id).data();
if original_span < span {
throw_machine_stop!(
TerminationInfo::MultipleSymbolDefinitions {
link_name,
first: original_span,
first_crate: tcx.crate_name(original_cnum),
second: span,
second_crate: tcx.crate_name(cnum),
}
);
} else {
throw_machine_stop!(
TerminationInfo::MultipleSymbolDefinitions {
link_name,
first: span,
first_crate: tcx.crate_name(cnum),
second: original_span,
second_crate: tcx.crate_name(original_cnum),
}
);
}
}
if !matches!(tcx.def_kind(def_id), DefKind::Fn | DefKind::AssocFn) {
throw_ub_format!(
"attempt to call an exported symbol that is not defined as a function"
);
}
instance_and_crate = Some((ty::Instance::mono(tcx, def_id), cnum));
throw_machine_stop!(TerminationInfo::MultipleSymbolDefinitions {
link_name,
first: span,
first_crate: tcx.crate_name(cnum),
second: original_span,
second_crate: tcx.crate_name(original_cnum),
});
}
}
if !matches!(tcx.def_kind(def_id), DefKind::Fn | DefKind::AssocFn) {
throw_ub_format!(
"attempt to call an exported symbol that is not defined as a function"
);
}
instance_and_crate = Some((ty::Instance::mono(tcx, def_id), cnum));
}
}
Ok(())
})?;
e.insert(instance_and_crate.map(|ic| ic.0))
}

View File

@ -219,61 +219,76 @@ fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
}
#[derive(Debug, Default)]
pub struct TlsDtorsState(TlsDtorsStatePriv);
pub struct TlsDtorsState<'tcx>(TlsDtorsStatePriv<'tcx>);
#[derive(Debug, Default)]
enum TlsDtorsStatePriv {
enum TlsDtorsStatePriv<'tcx> {
#[default]
Init,
PthreadDtors(RunningDtorState),
/// For Windows Dtors, we store the list of functions that we still have to call.
/// These are functions from the magic `.CRT$XLB` linker section.
WindowsDtors(Vec<ImmTy<'tcx, Provenance>>),
Done,
}
impl TlsDtorsState {
pub fn on_stack_empty<'tcx>(
impl<'tcx> TlsDtorsState<'tcx> {
pub fn on_stack_empty(
&mut self,
this: &mut MiriInterpCx<'_, 'tcx>,
) -> InterpResult<'tcx, Poll<()>> {
use TlsDtorsStatePriv::*;
match &mut self.0 {
Init => {
match this.tcx.sess.target.os.as_ref() {
"linux" | "freebsd" | "android" => {
// Run the pthread dtors.
self.0 = PthreadDtors(Default::default());
}
"macos" => {
// The macOS thread wide destructor runs "before any TLS slots get
// freed", so do that first.
this.schedule_macos_tls_dtor()?;
// When the stack is empty again, go on with the pthread dtors.
self.0 = PthreadDtors(Default::default());
}
"windows" => {
// Run the special magic hook.
this.schedule_windows_tls_dtors()?;
// And move to the final state.
self.0 = Done;
}
_ => {
// No TLS dtor support.
// FIXME: should we do something on wasi?
self.0 = Done;
let new_state = 'new_state: {
match &mut self.0 {
Init => {
match this.tcx.sess.target.os.as_ref() {
"linux" | "freebsd" | "android" => {
// Run the pthread dtors.
break 'new_state PthreadDtors(Default::default());
}
"macos" => {
// The macOS thread wide destructor runs "before any TLS slots get
// freed", so do that first.
this.schedule_macos_tls_dtor()?;
// When the stack is empty again, go on with the pthread dtors.
break 'new_state PthreadDtors(Default::default());
}
"windows" => {
// Determine which destructors to run.
let dtors = this.lookup_windows_tls_dtors()?;
// And move to the final state.
break 'new_state WindowsDtors(dtors);
}
_ => {
// No TLS dtor support.
// FIXME: should we do something on wasi?
break 'new_state Done;
}
}
}
}
PthreadDtors(state) => {
match this.schedule_next_pthread_tls_dtor(state)? {
Poll::Pending => {} // just keep going
Poll::Ready(()) => self.0 = Done,
PthreadDtors(state) => {
match this.schedule_next_pthread_tls_dtor(state)? {
Poll::Pending => return Ok(Poll::Pending), // just keep going
Poll::Ready(()) => break 'new_state Done,
}
}
WindowsDtors(dtors) => {
if let Some(dtor) = dtors.pop() {
this.schedule_windows_tls_dtor(dtor)?;
return Ok(Poll::Pending); // we stay in this state (but `dtors` got shorter)
} else {
// No more destructors to run.
break 'new_state Done;
}
}
Done => {
this.machine.tls.delete_all_thread_tls(this.get_active_thread());
return Ok(Poll::Ready(()));
}
}
Done => {
this.machine.tls.delete_all_thread_tls(this.get_active_thread());
return Ok(Poll::Ready(()));
}
}
};
self.0 = new_state;
Ok(Poll::Pending)
}
}
@ -282,22 +297,19 @@ impl<'mir, 'tcx: 'mir> EvalContextPrivExt<'mir, 'tcx> for crate::MiriInterpCx<'m
trait EvalContextPrivExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
/// Schedule TLS destructors for Windows.
/// On windows, TLS destructors are managed by std.
fn schedule_windows_tls_dtors(&mut self) -> InterpResult<'tcx> {
fn lookup_windows_tls_dtors(&mut self) -> InterpResult<'tcx, Vec<ImmTy<'tcx, Provenance>>> {
let this = self.eval_context_mut();
// Windows has a special magic linker section that is run on certain events.
// Instead of searching for that section and supporting arbitrary hooks in there
// (that would be basically https://github.com/rust-lang/miri/issues/450),
// we specifically look up the static in libstd that we know is placed
// in that section.
if !this.have_module(&["std"]) {
// Looks like we are running in a `no_std` crate.
// That also means no TLS dtors callback to call.
return Ok(());
}
let thread_callback =
this.eval_windows("thread_local_key", "p_thread_callback").to_pointer(this)?;
let thread_callback = this.get_ptr_fn(thread_callback)?.as_instance()?;
// We don't support most of that, but just enough to make thread-local dtors in `std` work.
Ok(this.lookup_link_section(".CRT$XLB")?)
}
fn schedule_windows_tls_dtor(&mut self, dtor: ImmTy<'tcx, Provenance>) -> InterpResult<'tcx> {
let this = self.eval_context_mut();
let dtor = dtor.to_scalar().to_pointer(this)?;
let thread_callback = this.get_ptr_fn(dtor)?.as_instance()?;
// FIXME: Technically, the reason should be `DLL_PROCESS_DETACH` when the main thread exits
// but std treats both the same.
@ -305,7 +317,7 @@ fn schedule_windows_tls_dtors(&mut self) -> InterpResult<'tcx> {
// The signature of this function is `unsafe extern "system" fn(h: c::LPVOID, dwReason: c::DWORD, pv: c::LPVOID)`.
// FIXME: `h` should be a handle to the current module and what `pv` should be is unknown
// but both are ignored by std
// but both are ignored by std.
this.call_function(
thread_callback,
Abi::System { unwind: false },