Auto merge of #51702 - ecstatic-morse:infinite-loop-detection, r=oli-obk

Infinite loop detection for const evaluation

Resolves #50637.

An `EvalContext` stores the transient state (stack, heap, etc.) of the MIRI virtual machine while it executing code. As long as MIRI only executes pure functions, we can detect if a program is in a state where it will never terminate by periodically taking a "snapshot" of this transient state and comparing it to previous ones. If any two states are exactly equal, the machine must be in an infinite loop.

Instead of fully cloning a snapshot every time the detector is run, we store a snapshot's hash. Only when a hash collision occurs do we fully clone the interpreter state. Future snapshots which cause a collision will be compared against this clone, causing the interpreter to abort if they are equal.

At the moment, snapshots are not taken until MIRI has progressed a certain amount. After this threshold, snapshots are taken every `DETECTOR_SNAPSHOT_PERIOD` steps. This means that an infinite loop with period `P` will be detected after a maximum of `2 * P * DETECTOR_SNAPSHOT_PERIOD` interpreter steps. The factor of 2 arises because we only clone a snapshot after it causes a hash collision.
This commit is contained in:
bors 2018-07-11 16:24:46 +00:00
commit d573fe1778
13 changed files with 302 additions and 27 deletions

View File

@ -549,7 +549,8 @@ for ::mir::interpret::EvalErrorKind<'gcx, O> {
RemainderByZero |
DivisionByZero |
GeneratorResumedAfterReturn |
GeneratorResumedAfterPanic => {}
GeneratorResumedAfterPanic |
InfiniteLoop => {}
ReferencedConstant(ref err) => err.hash_stable(hcx, hasher),
MachineError(ref err) => err.hash_stable(hcx, hasher),
FunctionPointerTyMismatch(a, b) => {

View File

@ -264,6 +264,7 @@ pub enum EvalErrorKind<'tcx, O> {
ReferencedConstant(Lrc<ConstEvalErr<'tcx>>),
GeneratorResumedAfterReturn,
GeneratorResumedAfterPanic,
InfiniteLoop,
}
pub type EvalResult<'tcx, T = ()> = Result<T, EvalError<'tcx>>;
@ -398,6 +399,8 @@ impl<'tcx, O> EvalErrorKind<'tcx, O> {
RemainderByZero => "attempt to calculate the remainder with a divisor of zero",
GeneratorResumedAfterReturn => "generator resumed after completion",
GeneratorResumedAfterPanic => "generator resumed after panicking",
InfiniteLoop =>
"duplicate interpreter state observed here, const evaluation will never terminate",
}
}
}

View File

@ -36,7 +36,7 @@ use ty::codec::TyDecoder;
use std::sync::atomic::{AtomicU32, Ordering};
use std::num::NonZeroU32;
#[derive(Clone, Debug, PartialEq, RustcEncodable, RustcDecodable)]
#[derive(Clone, Debug, PartialEq, Eq, Hash, RustcEncodable, RustcDecodable)]
pub enum Lock {
NoLock,
WriteLock(DynamicLifetime),

View File

@ -1636,7 +1636,7 @@ impl Debug for ValidationOp {
}
// This is generic so that it can be reused by miri
#[derive(Clone, RustcEncodable, RustcDecodable)]
#[derive(Clone, Hash, PartialEq, Eq, RustcEncodable, RustcDecodable)]
pub struct ValidationOperand<'tcx, T> {
pub place: T,
pub ty: Ty<'tcx>,

View File

@ -587,6 +587,7 @@ impl<'a, 'tcx, O: Lift<'tcx>> Lift<'tcx> for interpret::EvalErrorKind<'a, O> {
RemainderByZero => RemainderByZero,
GeneratorResumedAfterReturn => GeneratorResumedAfterReturn,
GeneratorResumedAfterPanic => GeneratorResumedAfterPanic,
InfiniteLoop => InfiniteLoop,
})
}
}

View File

@ -185,6 +185,7 @@ fn eval_body_using_ecx<'a, 'mir, 'tcx>(
Ok((value, ptr, layout.ty))
}
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct CompileTimeEvaluator;
impl<'tcx> Into<EvalError<'tcx>> for ConstEvalError {

View File

@ -1,4 +1,5 @@
use std::fmt::Write;
use std::hash::{Hash, Hasher};
use std::mem;
use rustc::hir::def_id::DefId;
@ -9,6 +10,7 @@ use rustc::ty::layout::{self, Size, Align, HasDataLayout, IntegerExt, LayoutOf,
use rustc::ty::subst::{Subst, Substs};
use rustc::ty::{self, Ty, TyCtxt, TypeAndMut};
use rustc::ty::query::TyCtxtAt;
use rustc_data_structures::fx::{FxHashSet, FxHasher};
use rustc_data_structures::indexed_vec::{IndexVec, Idx};
use rustc::mir::interpret::{
FrameInfo, GlobalId, Value, Scalar,
@ -41,13 +43,17 @@ pub struct EvalContext<'a, 'mir, 'tcx: 'a + 'mir, M: Machine<'mir, 'tcx>> {
/// The maximum number of stack frames allowed
pub(crate) stack_limit: usize,
/// The maximum number of terminators that may be evaluated.
/// This prevents infinite loops and huge computations from freezing up const eval.
/// Remove once halting problem is solved.
pub(crate) terminators_remaining: usize,
/// When this value is negative, it indicates the number of interpreter
/// steps *until* the loop detector is enabled. When it is positive, it is
/// the number of steps after the detector has been enabled modulo the loop
/// detector period.
pub(crate) steps_since_detector_enabled: isize,
pub(crate) loop_detector: InfiniteLoopDetector<'a, 'mir, 'tcx, M>,
}
/// A stack frame.
#[derive(Clone)]
pub struct Frame<'mir, 'tcx: 'mir> {
////////////////////////////////////////////////////////////////////////////////
// Function and callsite information
@ -89,6 +95,121 @@ pub struct Frame<'mir, 'tcx: 'mir> {
pub stmt: usize,
}
impl<'mir, 'tcx: 'mir> Eq for Frame<'mir, 'tcx> {}
impl<'mir, 'tcx: 'mir> PartialEq for Frame<'mir, 'tcx> {
fn eq(&self, other: &Self) -> bool {
let Frame {
mir: _,
instance,
span: _,
return_to_block,
return_place,
locals,
block,
stmt,
} = self;
// Some of these are constant during evaluation, but are included
// anyways for correctness.
*instance == other.instance
&& *return_to_block == other.return_to_block
&& *return_place == other.return_place
&& *locals == other.locals
&& *block == other.block
&& *stmt == other.stmt
}
}
impl<'mir, 'tcx: 'mir> Hash for Frame<'mir, 'tcx> {
fn hash<H: Hasher>(&self, state: &mut H) {
let Frame {
mir: _,
instance,
span: _,
return_to_block,
return_place,
locals,
block,
stmt,
} = self;
instance.hash(state);
return_to_block.hash(state);
return_place.hash(state);
locals.hash(state);
block.hash(state);
stmt.hash(state);
}
}
/// The virtual machine state during const-evaluation at a given point in time.
type EvalSnapshot<'a, 'mir, 'tcx, M>
= (M, Vec<Frame<'mir, 'tcx>>, Memory<'a, 'mir, 'tcx, M>);
pub(crate) struct InfiniteLoopDetector<'a, 'mir, 'tcx: 'a + 'mir, M: Machine<'mir, 'tcx>> {
/// The set of all `EvalSnapshot` *hashes* observed by this detector.
///
/// When a collision occurs in this table, we store the full snapshot in
/// `snapshots`.
hashes: FxHashSet<u64>,
/// The set of all `EvalSnapshot`s observed by this detector.
///
/// An `EvalSnapshot` will only be fully cloned once it has caused a
/// collision in `hashes`. As a result, the detector must observe at least
/// *two* full cycles of an infinite loop before it triggers.
snapshots: FxHashSet<EvalSnapshot<'a, 'mir, 'tcx, M>>,
}
impl<'a, 'mir, 'tcx, M> Default for InfiniteLoopDetector<'a, 'mir, 'tcx, M>
where M: Machine<'mir, 'tcx>,
'tcx: 'a + 'mir,
{
fn default() -> Self {
InfiniteLoopDetector {
hashes: FxHashSet::default(),
snapshots: FxHashSet::default(),
}
}
}
impl<'a, 'mir, 'tcx, M> InfiniteLoopDetector<'a, 'mir, 'tcx, M>
where M: Machine<'mir, 'tcx>,
'tcx: 'a + 'mir,
{
/// Returns `true` if the loop detector has not yet observed a snapshot.
pub fn is_empty(&self) -> bool {
self.hashes.is_empty()
}
pub fn observe_and_analyze(
&mut self,
machine: &M,
stack: &Vec<Frame<'mir, 'tcx>>,
memory: &Memory<'a, 'mir, 'tcx, M>,
) -> EvalResult<'tcx, ()> {
let snapshot = (machine, stack, memory);
let mut fx = FxHasher::default();
snapshot.hash(&mut fx);
let hash = fx.finish();
if self.hashes.insert(hash) {
// No collision
return Ok(())
}
if self.snapshots.insert((machine.clone(), stack.clone(), memory.clone())) {
// Spurious collision or first cycle
return Ok(())
}
// Second cycle
Err(EvalErrorKind::InfiniteLoop.into())
}
}
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
pub enum StackPopCleanup {
/// The stackframe existed to compute the initial value of a static/constant, make sure it
@ -173,7 +294,7 @@ impl<'c, 'b, 'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> LayoutOf
}
}
const MAX_TERMINATORS: usize = 1_000_000;
const STEPS_UNTIL_DETECTOR_ENABLED: isize = 1_000_000;
impl<'a, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> EvalContext<'a, 'mir, 'tcx, M> {
pub fn new(
@ -189,16 +310,17 @@ impl<'a, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> EvalContext<'a, 'mir, 'tcx, M
memory: Memory::new(tcx, memory_data),
stack: Vec::new(),
stack_limit: tcx.sess.const_eval_stack_frame_limit,
terminators_remaining: MAX_TERMINATORS,
loop_detector: Default::default(),
steps_since_detector_enabled: -STEPS_UNTIL_DETECTOR_ENABLED,
}
}
pub(crate) fn with_fresh_body<F: FnOnce(&mut Self) -> R, R>(&mut self, f: F) -> R {
let stack = mem::replace(&mut self.stack, Vec::new());
let terminators_remaining = mem::replace(&mut self.terminators_remaining, MAX_TERMINATORS);
let steps = mem::replace(&mut self.steps_since_detector_enabled, -STEPS_UNTIL_DETECTOR_ENABLED);
let r = f(self);
self.stack = stack;
self.terminators_remaining = terminators_remaining;
self.steps_since_detector_enabled = steps;
r
}
@ -538,8 +660,6 @@ impl<'a, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> EvalContext<'a, 'mir, 'tcx, M
}
Aggregate(ref kind, ref operands) => {
self.inc_step_counter_and_check_limit(operands.len());
let (dest, active_field_index) = match **kind {
mir::AggregateKind::Adt(adt_def, variant_index, _, active_field_index) => {
self.write_discriminant_value(dest_ty, dest, variant_index)?;

View File

@ -2,6 +2,8 @@
//! This separation exists to ensure that no fancy miri features like
//! interpreting common C functions leak into CTFE.
use std::hash::Hash;
use rustc::mir::interpret::{AllocId, EvalResult, Scalar, Pointer, AccessKind, GlobalId};
use super::{EvalContext, Place, ValTy, Memory};
@ -13,9 +15,9 @@ use syntax::ast::Mutability;
/// Methods of this trait signifies a point where CTFE evaluation would fail
/// and some use case dependent behaviour can instead be applied
pub trait Machine<'mir, 'tcx>: Sized {
pub trait Machine<'mir, 'tcx>: Clone + Eq + Hash {
/// Additional data that can be accessed via the Memory
type MemoryData;
type MemoryData: Clone + Eq + Hash;
/// Additional memory kinds a machine wishes to distinguish from the builtin ones
type MemoryKinds: ::std::fmt::Debug + PartialEq + Copy + Clone;

View File

@ -1,4 +1,5 @@
use std::collections::VecDeque;
use std::hash::{Hash, Hasher};
use std::ptr;
use rustc::hir::def_id::DefId;
@ -9,7 +10,7 @@ use rustc::ty::layout::{self, Align, TargetDataLayout, Size};
use rustc::mir::interpret::{Pointer, AllocId, Allocation, AccessKind, Value,
EvalResult, Scalar, EvalErrorKind, GlobalId, AllocType};
pub use rustc::mir::interpret::{write_target_uint, write_target_int, read_target_uint};
use rustc_data_structures::fx::{FxHashSet, FxHashMap};
use rustc_data_structures::fx::{FxHashSet, FxHashMap, FxHasher};
use syntax::ast::Mutability;
@ -19,7 +20,7 @@ use super::{EvalContext, Machine};
// Allocations and pointers
////////////////////////////////////////////////////////////////////////////////
#[derive(Debug, PartialEq, Copy, Clone)]
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
pub enum MemoryKind<T> {
/// Error if deallocated except during a stack pop
Stack,
@ -31,6 +32,7 @@ pub enum MemoryKind<T> {
// Top-level interpreter memory
////////////////////////////////////////////////////////////////////////////////
#[derive(Clone)]
pub struct Memory<'a, 'mir, 'tcx: 'a + 'mir, M: Machine<'mir, 'tcx>> {
/// Additional data required by the Machine
pub data: M::MemoryData,
@ -47,6 +49,64 @@ pub struct Memory<'a, 'mir, 'tcx: 'a + 'mir, M: Machine<'mir, 'tcx>> {
pub tcx: TyCtxtAt<'a, 'tcx, 'tcx>,
}
impl<'a, 'mir, 'tcx, M> Eq for Memory<'a, 'mir, 'tcx, M>
where M: Machine<'mir, 'tcx>,
'tcx: 'a + 'mir,
{}
impl<'a, 'mir, 'tcx, M> PartialEq for Memory<'a, 'mir, 'tcx, M>
where M: Machine<'mir, 'tcx>,
'tcx: 'a + 'mir,
{
fn eq(&self, other: &Self) -> bool {
let Memory {
data,
alloc_kind,
alloc_map,
cur_frame,
tcx: _,
} = self;
*data == other.data
&& *alloc_kind == other.alloc_kind
&& *alloc_map == other.alloc_map
&& *cur_frame == other.cur_frame
}
}
impl<'a, 'mir, 'tcx, M> Hash for Memory<'a, 'mir, 'tcx, M>
where M: Machine<'mir, 'tcx>,
'tcx: 'a + 'mir,
{
fn hash<H: Hasher>(&self, state: &mut H) {
let Memory {
data,
alloc_kind: _,
alloc_map: _,
cur_frame,
tcx: _,
} = self;
data.hash(state);
cur_frame.hash(state);
// We ignore some fields which don't change between evaluation steps.
// Since HashMaps which contain the same items may have different
// iteration orders, we use a commutative operation (in this case
// addition, but XOR would also work), to combine the hash of each
// `Allocation`.
self.allocations()
.map(|allocs| {
let mut h = FxHasher::default();
allocs.hash(&mut h);
h.finish()
})
.fold(0u64, |hash, x| hash.wrapping_add(x))
.hash(state);
}
}
impl<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> {
pub fn new(tcx: TyCtxtAt<'a, 'tcx, 'tcx>, data: M::MemoryData) -> Self {
Memory {
@ -866,7 +926,7 @@ impl<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> {
for i in 0..size.bytes() {
let defined = undef_mask.get(src.offset + Size::from_bytes(i));
for j in 0..repeat {
dest_allocation.undef_mask.set(
dest.offset + Size::from_bytes(i + (size.bytes() * j)),

View File

@ -7,7 +7,7 @@ use rustc::mir::interpret::{GlobalId, Value, Scalar, EvalResult, Pointer};
use super::{EvalContext, Machine, ValTy};
use interpret::memory::HasMemory;
#[derive(Copy, Clone, Debug)]
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
pub enum Place {
/// A place referring to a value allocated in the `Memory` system.
Ptr {
@ -24,7 +24,7 @@ pub enum Place {
Local { frame: usize, local: mir::Local },
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
pub enum PlaceExtra {
None,
Length(u64),

View File

@ -8,13 +8,34 @@ use rustc::mir::interpret::EvalResult;
use super::{EvalContext, Machine};
impl<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> EvalContext<'a, 'mir, 'tcx, M> {
pub fn inc_step_counter_and_check_limit(&mut self, n: usize) {
self.terminators_remaining = self.terminators_remaining.saturating_sub(n);
if self.terminators_remaining == 0 {
// FIXME(#49980): make this warning a lint
self.tcx.sess.span_warn(self.frame().span, "Constant evaluating a complex constant, this might take some time");
self.terminators_remaining = 1_000_000;
pub fn inc_step_counter_and_detect_loops(&mut self) -> EvalResult<'tcx, ()> {
/// The number of steps between loop detector snapshots.
/// Should be a power of two for performance reasons.
const DETECTOR_SNAPSHOT_PERIOD: isize = 256;
{
let steps = &mut self.steps_since_detector_enabled;
*steps += 1;
if *steps < 0 {
return Ok(());
}
*steps %= DETECTOR_SNAPSHOT_PERIOD;
if *steps != 0 {
return Ok(());
}
}
if self.loop_detector.is_empty() {
// First run of the loop detector
// FIXME(#49980): make this warning a lint
self.tcx.sess.span_warn(self.frame().span,
"Constant evaluating a complex constant, this might take some time");
}
self.loop_detector.observe_and_analyze(&self.machine, &self.stack, &self.memory)
}
/// Returns true as long as there are more things to do.
@ -36,7 +57,7 @@ impl<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> EvalContext<'a, 'mir, 'tcx, M> {
return Ok(true);
}
self.inc_step_counter_and_check_limit(1);
self.inc_step_counter_and_detect_loops()?;
let terminator = basic_block.terminator();
assert_eq!(old_frames, self.cur_frame());

View File

@ -0,0 +1,25 @@
// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#![feature(const_let)]
fn main() {
// Tests the Collatz conjecture with an incorrect base case (0 instead of 1).
// The value of `n` will loop indefinitely (4 - 2 - 1 - 4).
let _ = [(); {
//~^ WARNING Constant evaluating a complex constant, this might take some time
//~| ERROR could not evaluate repeat length
let mut n = 113383; // #20 in https://oeis.org/A006884
while n != 0 { //~ ERROR constant contains unimplemented expression type
n = if n % 2 == 0 { n/2 } else { 3*n + 1 };
}
n
}];
}

View File

@ -0,0 +1,41 @@
error[E0019]: constant contains unimplemented expression type
--> $DIR/infinite_loop.rs:20:9
|
LL | / while n != 0 { //~ ERROR constant contains unimplemented expression type
LL | | n = if n % 2 == 0 { n/2 } else { 3*n + 1 };
LL | | }
| |_________^
warning: Constant evaluating a complex constant, this might take some time
--> $DIR/infinite_loop.rs:16:18
|
LL | let _ = [(); {
| __________________^
LL | | //~^ WARNING Constant evaluating a complex constant, this might take some time
LL | | //~| ERROR could not evaluate repeat length
LL | | let mut n = 113383; // #20 in https://oeis.org/A006884
... |
LL | | n
LL | | }];
| |_____^
error[E0080]: could not evaluate repeat length
--> $DIR/infinite_loop.rs:16:18
|
LL | let _ = [(); {
| __________________^
LL | | //~^ WARNING Constant evaluating a complex constant, this might take some time
LL | | //~| ERROR could not evaluate repeat length
LL | | let mut n = 113383; // #20 in https://oeis.org/A006884
LL | | while n != 0 { //~ ERROR constant contains unimplemented expression type
LL | | n = if n % 2 == 0 { n/2 } else { 3*n + 1 };
| | ---------- duplicate interpreter state observed here, const evaluation will never terminate
LL | | }
LL | | n
LL | | }];
| |_____^
error: aborting due to 2 previous errors
Some errors occurred: E0019, E0080.
For more information about an error, try `rustc --explain E0019`.