rust/src/eval.rs

454 lines
17 KiB
Rust
Raw Normal View History

2019-06-29 07:15:05 -05:00
//! Main evaluator loop and setting up the initial stack frame.
use std::convert::TryFrom;
2020-04-05 16:03:44 -05:00
use std::ffi::OsStr;
use std::iter;
use log::info;
2020-03-01 03:26:24 -06:00
use rustc_hir::def_id::DefId;
use rustc_middle::ty::{
self,
layout::{LayoutCx, LayoutOf},
TyCtxt,
};
use rustc_target::spec::abi::Abi;
2021-09-02 17:41:10 -05:00
use rustc_session::config::EntryFnType;
use crate::*;
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum AlignmentCheck {
/// Do not check alignment.
None,
/// Check alignment "symbolically", i.e., using only the requested alignment for an allocation and not its real base address.
Symbolic,
/// Check alignment on the actual physical integer address.
Int,
}
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum RejectOpWith {
/// Isolated op is rejected with an abort of the machine.
Abort,
/// If not Abort, miri returns an error for an isolated op.
/// Following options determine if user should be warned about such error.
/// Do not print warning about rejected isolated op.
NoWarning,
/// Print a warning about rejected isolated op, with backtrace.
Warning,
/// Print a warning about rejected isolated op, without backtrace.
WarningWithoutBacktrace,
}
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum IsolatedOp {
/// Reject an op requiring communication with the host. By
/// default, miri rejects the op with an abort. If not, it returns
/// an error code, and prints a warning about it. Warning levels
/// are controlled by `RejectOpWith` enum.
Reject(RejectOpWith),
/// Execute op requiring communication with the host, i.e. disable isolation.
Allow,
}
/// Configuration needed to spawn a Miri instance.
#[derive(Clone)]
pub struct MiriConfig {
/// Determine if validity checking is enabled.
pub validate: bool,
/// Determines if Stacked Borrows is enabled.
pub stacked_borrows: bool,
/// Controls alignment checking.
pub check_alignment: AlignmentCheck,
/// Controls integer and float validity (e.g., initialization) checking.
pub check_number_validity: bool,
2021-05-29 12:36:06 -05:00
/// Controls function [ABI](Abi) checking.
pub check_abi: bool,
/// Action for an op requiring communication with the host.
pub isolated_op: IsolatedOp,
2019-12-07 06:44:48 -06:00
/// Determines if memory leaks should be ignored.
pub ignore_leaks: bool,
2019-08-28 17:20:50 -05:00
/// Environment variables that should always be isolated from the host.
pub excluded_env_vars: Vec<String>,
/// Command-line arguments passed to the interpreted program.
pub args: Vec<String>,
/// The seed to use when non-determinism or randomness are required (e.g. ptr-to-int cast, `getrandom()`).
2019-08-06 10:28:50 -05:00
pub seed: Option<u64>,
2020-07-02 02:50:52 -05:00
/// The stacked borrows pointer id to report about
2019-11-30 17:02:58 -06:00
pub tracked_pointer_tag: Option<PtrId>,
2020-07-02 02:50:52 -05:00
/// The stacked borrows call ID to report about
pub tracked_call_id: Option<CallId>,
/// The allocation id to report about.
pub tracked_alloc_id: Option<AllocId>,
/// Whether to track raw pointers in stacked borrows.
pub track_raw: bool,
/// Determine if data race detection should be enabled
pub data_race_detector: bool,
/// Rate of spurious failures for compare_exchange_weak atomic operations,
/// between 0.0 and 1.0, defaulting to 0.8 (80% chance of failure).
pub cmpxchg_weak_failure_rate: f64,
2021-05-29 17:09:46 -05:00
/// If `Some`, enable the `measureme` profiler, writing results to a file
/// with the specified prefix.
pub measureme_out: Option<String>,
/// Panic when unsupported functionality is encountered
pub panic_on_unsupported: bool,
}
impl Default for MiriConfig {
fn default() -> MiriConfig {
MiriConfig {
validate: true,
stacked_borrows: true,
check_alignment: AlignmentCheck::Int,
check_number_validity: false,
2021-05-29 12:36:06 -05:00
check_abi: true,
isolated_op: IsolatedOp::Reject(RejectOpWith::Abort),
ignore_leaks: false,
excluded_env_vars: vec![],
args: vec![],
seed: None,
tracked_pointer_tag: None,
2020-07-02 02:50:52 -05:00
tracked_call_id: None,
tracked_alloc_id: None,
track_raw: false,
data_race_detector: true,
cmpxchg_weak_failure_rate: 0.8,
measureme_out: None,
panic_on_unsupported: false,
}
}
}
/// Returns a freshly created `InterpCx`, along with an `MPlaceTy` representing
2021-09-02 17:41:10 -05:00
/// the location where the return value of the `start` function will be
/// written to.
/// Public because this is also used by `priroda`.
pub fn create_ecx<'mir, 'tcx: 'mir>(
tcx: TyCtxt<'tcx>,
2021-09-02 17:41:10 -05:00
entry_id: DefId,
entry_type: EntryFnType,
config: MiriConfig,
) -> InterpResult<'tcx, (InterpCx<'mir, 'tcx, Evaluator<'mir, 'tcx>>, MPlaceTy<'tcx, Tag>)> {
2020-04-05 16:03:44 -05:00
let param_env = ty::ParamEnv::reveal_all();
let layout_cx = LayoutCx { tcx, param_env };
let mut ecx = InterpCx::new(
2020-06-15 10:38:27 -05:00
tcx,
rustc_span::source_map::DUMMY_SP,
2020-04-05 16:03:44 -05:00
param_env,
Evaluator::new(&config, layout_cx),
MemoryExtra::new(&config),
);
2019-08-14 10:24:35 -05:00
// Complete initialization.
2020-03-07 10:35:00 -06:00
EnvVars::init(&mut ecx, config.excluded_env_vars)?;
2020-03-07 14:33:27 -06:00
MemoryExtra::init_extern_statics(&mut ecx)?;
// Make sure we have MIR. We check MIR for some stable monomorphic function in libcore.
let sentinel = ecx.resolve_path(&["core", "ascii", "escape_default"]);
if !tcx.is_mir_available(sentinel.def.def_id()) {
tcx.sess.fatal("the current sysroot was built without `-Zalways-encode-mir`. Use `cargo miri setup` to prepare a sysroot that is suitable for Miri.");
}
2019-08-14 10:24:35 -05:00
// Setup first stack-frame
2021-09-02 17:41:10 -05:00
let entry_instance = ty::Instance::mono(tcx, entry_id);
2021-09-02 17:41:10 -05:00
// First argument is constructed later, because its skipped if the entry function uses #[start]
2019-11-29 04:08:27 -06:00
// Second argument (argc): length of `config.args`.
let argc = Scalar::from_machine_usize(u64::try_from(config.args.len()).unwrap(), &ecx);
// Third argument (`argv`): created from `config.args`.
let argv = {
// Put each argument in memory, collect pointers.
2021-07-15 13:33:08 -05:00
let mut argvs = Vec::<Immediate<Tag>>::new();
for arg in config.args.iter() {
// Make space for `0` terminator.
let size = u64::try_from(arg.len()).unwrap().checked_add(1).unwrap();
let arg_type = tcx.mk_array(tcx.types.u8, size);
2021-07-04 08:59:55 -05:00
let arg_place =
ecx.allocate(ecx.layout_of(arg_type)?, MiriMemoryKind::Machine.into())?;
2019-12-04 03:43:36 -06:00
ecx.write_os_str_to_c_str(OsStr::new(arg), arg_place.ptr, size)?;
2021-07-15 13:33:08 -05:00
ecx.mark_immutable(&*arg_place);
argvs.push(arg_place.to_ref(&ecx));
}
// Make an array with all these pointers, in the Miri memory.
2021-05-16 04:28:01 -05:00
let argvs_layout = ecx.layout_of(
tcx.mk_array(tcx.mk_imm_ptr(tcx.types.u8), u64::try_from(argvs.len()).unwrap()),
)?;
2021-07-04 08:59:55 -05:00
let argvs_place = ecx.allocate(argvs_layout, MiriMemoryKind::Machine.into())?;
for (idx, arg) in argvs.into_iter().enumerate() {
let place = ecx.mplace_field(&argvs_place, idx)?;
2021-07-15 13:33:08 -05:00
ecx.write_immediate(arg, &place.into())?;
}
2021-07-15 13:33:08 -05:00
ecx.mark_immutable(&*argvs_place);
// A pointer to that place is the 3rd argument for main.
2021-07-15 13:33:08 -05:00
let argv = argvs_place.to_ref(&ecx);
// Store `argc` and `argv` for macOS `_NSGetArg{c,v}`.
{
2019-12-23 05:56:23 -06:00
let argc_place =
2021-07-04 08:59:55 -05:00
ecx.allocate(ecx.machine.layouts.isize, MiriMemoryKind::Machine.into())?;
ecx.write_scalar(argc, &argc_place.into())?;
2021-07-15 13:33:08 -05:00
ecx.mark_immutable(&*argc_place);
ecx.machine.argc = Some(*argc_place);
let argv_place = ecx.allocate(
ecx.layout_of(tcx.mk_imm_ptr(tcx.types.unit))?,
2020-02-23 14:55:02 -06:00
MiriMemoryKind::Machine.into(),
2021-07-04 08:59:55 -05:00
)?;
2021-07-15 13:33:08 -05:00
ecx.write_immediate(argv, &argv_place.into())?;
ecx.mark_immutable(&*argv_place);
ecx.machine.argv = Some(*argv_place);
}
// Store command line as UTF-16 for Windows `GetCommandLineW`.
{
// Construct a command string with all the aguments.
let cmd_utf16: Vec<u16> = args_to_utf16_command_string(config.args.iter());
let cmd_type = tcx.mk_array(tcx.types.u16, u64::try_from(cmd_utf16.len()).unwrap());
2021-07-04 08:59:55 -05:00
let cmd_place =
ecx.allocate(ecx.layout_of(cmd_type)?, MiriMemoryKind::Machine.into())?;
2021-07-15 13:33:08 -05:00
ecx.machine.cmd_line = Some(*cmd_place);
// Store the UTF-16 string. We just allocated so we know the bounds are fine.
for (idx, &c) in cmd_utf16.iter().enumerate() {
let place = ecx.mplace_field(&cmd_place, idx)?;
ecx.write_scalar(Scalar::from_u16(c), &place.into())?;
}
2021-07-15 13:33:08 -05:00
ecx.mark_immutable(&*cmd_place);
}
argv
};
2019-08-06 10:28:50 -05:00
// Return place (in static memory so that it does not count as leak).
2021-07-04 08:59:55 -05:00
let ret_place = ecx.allocate(ecx.machine.layouts.isize, MiriMemoryKind::Machine.into())?;
// Call start function.
2021-09-02 17:41:10 -05:00
match entry_type {
EntryFnType::Main => {
let start_id = tcx.lang_items().start_fn().unwrap();
let main_ret_ty = tcx.fn_sig(entry_id).output();
let main_ret_ty = main_ret_ty.no_bound_vars().unwrap();
let start_instance = ty::Instance::resolve(
tcx,
ty::ParamEnv::reveal_all(),
start_id,
tcx.mk_substs(::std::iter::once(ty::subst::GenericArg::from(main_ret_ty))),
)
.unwrap()
.unwrap();
let main_ptr = ecx.memory.create_fn_alloc(FnVal::Instance(entry_instance));
ecx.call_function(
start_instance,
Abi::Rust,
&[Scalar::from_pointer(main_ptr, &ecx).into(), argc.into(), argv],
Some(&ret_place.into()),
StackPopCleanup::None { cleanup: true },
)?;
}
EntryFnType::Start => {
ecx.call_function(
entry_instance,
Abi::Rust,
&[argc.into(), argv],
Some(&ret_place.into()),
StackPopCleanup::None { cleanup: true },
)?;
}
}
Ok((ecx, ret_place))
}
2021-09-02 17:41:10 -05:00
/// Evaluates the entry function specified by `entry_id`.
/// Returns `Some(return_code)` if program executed completed.
/// Returns `None` if an evaluation error occured.
2021-09-02 17:41:10 -05:00
pub fn eval_entry<'tcx>(
tcx: TyCtxt<'tcx>,
entry_id: DefId,
entry_type: EntryFnType,
config: MiriConfig,
) -> Option<i64> {
// Copy setting before we move `config`.
let ignore_leaks = config.ignore_leaks;
2019-12-07 06:44:48 -06:00
2021-09-02 17:41:10 -05:00
let (mut ecx, ret_place) = match create_ecx(tcx, entry_id, entry_type, config) {
Ok(v) => v,
2020-05-03 05:10:24 -05:00
Err(err) => {
err.print_backtrace();
2021-02-18 03:34:32 -06:00
panic!("Miri initialization error: {}", err.kind())
}
};
// Perform the main execution.
let res: InterpResult<'_, i64> = (|| {
// Main loop.
2020-04-16 21:40:02 -05:00
loop {
let info = ecx.preprocess_diagnostics();
2020-04-16 21:40:02 -05:00
match ecx.schedule()? {
SchedulingAction::ExecuteStep => {
assert!(ecx.step()?, "a terminated thread was scheduled for execution");
}
2020-04-30 16:07:07 -05:00
SchedulingAction::ExecuteTimeoutCallback => {
2021-05-16 04:28:01 -05:00
assert!(
ecx.machine.communicate(),
"scheduler callbacks require disabled isolation, but the code \
2021-05-16 04:28:01 -05:00
that created the callback did not check it"
);
2020-04-30 16:07:07 -05:00
ecx.run_timeout_callback()?;
}
2020-04-16 21:40:02 -05:00
SchedulingAction::ExecuteDtors => {
2020-04-29 15:16:22 -05:00
// This will either enable the thread again (so we go back
// to `ExecuteStep`), or determine that this thread is done
// for good.
2020-04-26 22:49:53 -05:00
ecx.schedule_next_tls_dtor_for_active_thread()?;
2020-04-16 21:40:02 -05:00
}
SchedulingAction::Stop => {
break;
}
}
ecx.process_diagnostics(info);
2020-01-08 05:49:46 -06:00
}
2021-07-15 13:33:08 -05:00
let return_code = ecx.read_scalar(&ret_place.into())?.to_machine_isize(&ecx)?;
Ok(return_code)
})();
// Machine cleanup.
EnvVars::cleanup(&mut ecx).unwrap();
// Process the result.
match res {
Ok(return_code) => {
2019-11-27 02:13:37 -06:00
if !ignore_leaks {
// Check for thread leaks.
if !ecx.have_all_terminated() {
tcx.sess.err(
"the main thread terminated without waiting for all remaining threads",
);
tcx.sess.note_without_error("pass `-Zmiri-ignore-leaks` to disable this check");
return None;
}
// Check for memory leaks.
info!("Additonal static roots: {:?}", ecx.machine.static_roots);
let leaks = ecx.memory.leak_report(&ecx.machine.static_roots);
2019-11-27 02:13:37 -06:00
if leaks != 0 {
tcx.sess.err("the evaluated program leaked memory");
tcx.sess.note_without_error("pass `-Zmiri-ignore-leaks` to disable this check");
2019-11-27 02:13:37 -06:00
// Ignore the provided return code - let the reported error
// determine the return code.
return None;
}
}
2019-12-23 17:08:47 -06:00
Some(return_code)
}
Err(e) => report_error(&ecx, e),
}
}
/// Turns an array of arguments into a Windows command line string.
///
/// The string will be UTF-16 encoded and NUL terminated.
///
/// Panics if the zeroth argument contains the `"` character because doublequotes
/// in argv[0] cannot be encoded using the standard command line parsing rules.
2021-09-22 14:46:20 -05:00
///
/// Further reading:
/// * [Parsing C++ command-line arguments](https://docs.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-160#parsing-c-command-line-arguments)
/// * [The C/C++ Parameter Parsing Rules](https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES)
fn args_to_utf16_command_string<I, T>(mut args: I) -> Vec<u16>
where
I: Iterator<Item = T>,
T: AsRef<str>,
{
// Parse argv[0]. Slashes aren't escaped. Literal double quotes are not allowed.
2021-09-22 14:46:20 -05:00
let mut cmd = {
let arg0 = if let Some(arg0) = args.next() {
arg0
} else {
return vec![0];
};
let arg0 = arg0.as_ref();
2021-09-22 14:46:20 -05:00
if arg0.contains('"') {
panic!("argv[0] cannot contain a doublequote (\") character");
} else {
// Always surround argv[0] with quotes.
let mut s = String::new();
s.push('"');
s.push_str(arg0);
s.push('"');
s
}
};
// Build the other arguments.
for arg in args {
let arg = arg.as_ref();
cmd.push(' ');
if arg.is_empty() {
cmd.push_str("\"\"");
} else if !arg.bytes().any(|c| matches!(c, b'"' | b'\t' | b' ')) {
2021-09-22 14:46:20 -05:00
// No quote, tab, or space -- no escaping required.
cmd.push_str(arg);
} else {
2021-09-22 14:46:20 -05:00
// Spaces and tabs are escaped by surrounding them in quotes.
// Quotes are themselves escaped by using backslashes when in a
// quoted block.
// Backslashes only need to be escaped when one or more are directly
// followed by a quote. Otherwise they are taken literally.
cmd.push('"');
let mut chars = arg.chars().peekable();
loop {
let mut nslashes = 0;
while let Some(&'\\') = chars.peek() {
chars.next();
nslashes += 1;
}
match chars.next() {
Some('"') => {
cmd.extend(iter::repeat('\\').take(nslashes * 2 + 1));
cmd.push('"');
}
Some(c) => {
cmd.extend(iter::repeat('\\').take(nslashes));
cmd.push(c);
}
None => {
cmd.extend(iter::repeat('\\').take(nslashes * 2));
break;
}
}
}
cmd.push('"');
}
}
if cmd.contains('\0') {
panic!("interior null in command line arguments");
}
cmd.encode_utf16().chain(iter::once(0)).collect()
}
2021-09-22 14:46:20 -05:00
#[cfg(test)]
mod tests {
use super::*;
#[test]
#[should_panic(expected = "argv[0] cannot contain a doublequote (\") character")]
fn windows_argv0_panic_on_quote() {
args_to_utf16_command_string(["\""].iter());
}
#[test]
fn windows_argv0_no_escape() {
// Ensure that a trailing backslash in argv[0] is not escaped.
let cmd = String::from_utf16_lossy(&args_to_utf16_command_string(
[r"C:\Program Files\", "arg1", "arg 2", "arg \" 3"].iter(),
2021-09-22 14:46:20 -05:00
));
assert_eq!(cmd.trim_end_matches("\0"), r#""C:\Program Files\" arg1 "arg 2" "arg \" 3""#);
2021-09-22 14:46:20 -05:00
}
}