Auto merge of #51702 - ecstatic-morse:infinite-loop-detection, r=oli-obk

Infinite loop detection for const evaluation Resolves #50637. An `EvalContext` stores the transient state (stack, heap, etc.) of the MIRI virtual machine while it executing code. As long as MIRI only executes pure functions, we can detect if a program is in a state where it will never terminate by periodically taking a "snapshot" of this transient state and comparing it to previous ones. If any two states are exactly equal, the machine must be in an infinite loop. Instead of fully cloning a snapshot every time the detector is run, we store a snapshot's hash. Only when a hash collision occurs do we fully clone the interpreter state. Future snapshots which cause a collision will be compared against this clone, causing the interpreter to abort if they are equal. At the moment, snapshots are not taken until MIRI has progressed a certain amount. After this threshold, snapshots are taken every `DETECTOR_SNAPSHOT_PERIOD` steps. This means that an infinite loop with period `P` will be detected after a maximum of `2 * P * DETECTOR_SNAPSHOT_PERIOD` interpreter steps. The factor of 2 arises because we only clone a snapshot after it causes a hash collision.
2018-07-11 16:24:46 +00:00 · 2018-07-11 16:24:46 +00:00 · d573fe1778
commit d573fe1778
parent 66787e0524 cf5eaa75bb
13 changed files with 302 additions and 27 deletions
--- a/src/librustc/ich/impls_ty.rs
+++ b/src/librustc/ich/impls_ty.rs
@ -549,7 +549,8 @@ for ::mir::interpret::EvalErrorKind<'gcx, O> {
            RemainderByZero |
            DivisionByZero |
            GeneratorResumedAfterReturn |
-            GeneratorResumedAfterPanic => {}
+            GeneratorResumedAfterPanic |
+            InfiniteLoop => {}
            ReferencedConstant(ref err) => err.hash_stable(hcx, hasher),
            MachineError(ref err) => err.hash_stable(hcx, hasher),
            FunctionPointerTyMismatch(a, b) => {
--- a/src/librustc/mir/interpret/error.rs
+++ b/src/librustc/mir/interpret/error.rs
@ -264,6 +264,7 @@ pub enum EvalErrorKind<'tcx, O> {
    ReferencedConstant(Lrc<ConstEvalErr<'tcx>>),
    GeneratorResumedAfterReturn,
    GeneratorResumedAfterPanic,
+    InfiniteLoop,
 }

 pub type EvalResult<'tcx, T = ()> = Result<T, EvalError<'tcx>>;
@ -398,6 +399,8 @@ impl<'tcx, O> EvalErrorKind<'tcx, O> {
            RemainderByZero => "attempt to calculate the remainder with a divisor of zero",
            GeneratorResumedAfterReturn => "generator resumed after completion",
            GeneratorResumedAfterPanic => "generator resumed after panicking",
+            InfiniteLoop =>
+                "duplicate interpreter state observed here, const evaluation will never terminate",
        }
    }
 }
--- a/src/librustc/mir/interpret/mod.rs
+++ b/src/librustc/mir/interpret/mod.rs
@ -36,7 +36,7 @@ use ty::codec::TyDecoder;
 use std::sync::atomic::{AtomicU32, Ordering};
 use std::num::NonZeroU32;

-#[derive(Clone, Debug, PartialEq, RustcEncodable, RustcDecodable)]
+#[derive(Clone, Debug, PartialEq, Eq, Hash, RustcEncodable, RustcDecodable)]
 pub enum Lock {
    NoLock,
    WriteLock(DynamicLifetime),
--- a/src/librustc/mir/mod.rs
+++ b/src/librustc/mir/mod.rs
@ -1636,7 +1636,7 @@ impl Debug for ValidationOp {
 }

 // This is generic so that it can be reused by miri
-#[derive(Clone, RustcEncodable, RustcDecodable)]
+#[derive(Clone, Hash, PartialEq, Eq, RustcEncodable, RustcDecodable)]
 pub struct ValidationOperand<'tcx, T> {
    pub place: T,
    pub ty: Ty<'tcx>,
--- a/src/librustc/ty/structural_impls.rs
+++ b/src/librustc/ty/structural_impls.rs
@ -587,6 +587,7 @@ impl<'a, 'tcx, O: Lift<'tcx>> Lift<'tcx> for interpret::EvalErrorKind<'a, O> {
            RemainderByZero => RemainderByZero,
            GeneratorResumedAfterReturn => GeneratorResumedAfterReturn,
            GeneratorResumedAfterPanic => GeneratorResumedAfterPanic,
+            InfiniteLoop => InfiniteLoop,
        })
    }
 }
--- a/src/librustc_mir/interpret/const_eval.rs
+++ b/src/librustc_mir/interpret/const_eval.rs
@ -185,6 +185,7 @@ fn eval_body_using_ecx<'a, 'mir, 'tcx>(
    Ok((value, ptr, layout.ty))
 }

+#[derive(Debug, Clone, Eq, PartialEq, Hash)]
 pub struct CompileTimeEvaluator;

 impl<'tcx> Into<EvalError<'tcx>> for ConstEvalError {
--- a/src/librustc_mir/interpret/eval_context.rs
+++ b/src/librustc_mir/interpret/eval_context.rs
@ -1,4 +1,5 @@
 use std::fmt::Write;
+use std::hash::{Hash, Hasher};
 use std::mem;

 use rustc::hir::def_id::DefId;
@ -9,6 +10,7 @@ use rustc::ty::layout::{self, Size, Align, HasDataLayout, IntegerExt, LayoutOf,
 use rustc::ty::subst::{Subst, Substs};
 use rustc::ty::{self, Ty, TyCtxt, TypeAndMut};
 use rustc::ty::query::TyCtxtAt;
+use rustc_data_structures::fx::{FxHashSet, FxHasher};
 use rustc_data_structures::indexed_vec::{IndexVec, Idx};
 use rustc::mir::interpret::{
    FrameInfo, GlobalId, Value, Scalar,
@ -41,13 +43,17 @@ pub struct EvalContext<'a, 'mir, 'tcx: 'a + 'mir, M: Machine<'mir, 'tcx>> {
    /// The maximum number of stack frames allowed
    pub(crate) stack_limit: usize,

-    /// The maximum number of terminators that may be evaluated.
-    /// This prevents infinite loops and huge computations from freezing up const eval.
-    /// Remove once halting problem is solved.
-    pub(crate) terminators_remaining: usize,
+    /// When this value is negative, it indicates the number of interpreter
+    /// steps *until* the loop detector is enabled. When it is positive, it is
+    /// the number of steps after the detector has been enabled modulo the loop
+    /// detector period.
+    pub(crate) steps_since_detector_enabled: isize,
+
+    pub(crate) loop_detector: InfiniteLoopDetector<'a, 'mir, 'tcx, M>,
 }

 /// A stack frame.
+#[derive(Clone)]
 pub struct Frame<'mir, 'tcx: 'mir> {
    ////////////////////////////////////////////////////////////////////////////////
    // Function and callsite information
@ -89,6 +95,121 @@ pub struct Frame<'mir, 'tcx: 'mir> {
    pub stmt: usize,
 }

+impl<'mir, 'tcx: 'mir> Eq for Frame<'mir, 'tcx> {}
+
+impl<'mir, 'tcx: 'mir> PartialEq for Frame<'mir, 'tcx> {
+    fn eq(&self, other: &Self) -> bool {
+        let Frame {
+            mir: _,
+            instance,
+            span: _,
+            return_to_block,
+            return_place,
+            locals,
+            block,
+            stmt,
+        } = self;
+
+        // Some of these are constant during evaluation, but are included
+        // anyways for correctness.
+        *instance == other.instance
+            && *return_to_block == other.return_to_block
+            && *return_place == other.return_place
+            && *locals == other.locals
+            && *block == other.block
+            && *stmt == other.stmt
+    }
+}
+
+impl<'mir, 'tcx: 'mir> Hash for Frame<'mir, 'tcx> {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        let Frame {
+            mir: _,
+            instance,
+            span: _,
+            return_to_block,
+            return_place,
+            locals,
+            block,
+            stmt,
+        } = self;
+
+        instance.hash(state);
+        return_to_block.hash(state);
+        return_place.hash(state);
+        locals.hash(state);
+        block.hash(state);
+        stmt.hash(state);
+    }
+}
+
+/// The virtual machine state during const-evaluation at a given point in time.
+type EvalSnapshot<'a, 'mir, 'tcx, M>
+    = (M, Vec<Frame<'mir, 'tcx>>, Memory<'a, 'mir, 'tcx, M>);
+
+pub(crate) struct InfiniteLoopDetector<'a, 'mir, 'tcx: 'a + 'mir, M: Machine<'mir, 'tcx>> {
+    /// The set of all `EvalSnapshot` *hashes* observed by this detector.
+    ///
+    /// When a collision occurs in this table, we store the full snapshot in
+    /// `snapshots`.
+    hashes: FxHashSet<u64>,
+
+    /// The set of all `EvalSnapshot`s observed by this detector.
+    ///
+    /// An `EvalSnapshot` will only be fully cloned once it has caused a
+    /// collision in `hashes`. As a result, the detector must observe at least
+    /// *two* full cycles of an infinite loop before it triggers.
+    snapshots: FxHashSet<EvalSnapshot<'a, 'mir, 'tcx, M>>,
+}
+
+impl<'a, 'mir, 'tcx, M> Default for InfiniteLoopDetector<'a, 'mir, 'tcx, M>
+    where M: Machine<'mir, 'tcx>,
+          'tcx: 'a + 'mir,
+{
+    fn default() -> Self {
+        InfiniteLoopDetector {
+            hashes: FxHashSet::default(),
+            snapshots: FxHashSet::default(),
+        }
+    }
+}
+
+impl<'a, 'mir, 'tcx, M> InfiniteLoopDetector<'a, 'mir, 'tcx, M>
+    where M: Machine<'mir, 'tcx>,
+          'tcx: 'a + 'mir,
+{
+    /// Returns `true` if the loop detector has not yet observed a snapshot.
+    pub fn is_empty(&self) -> bool {
+        self.hashes.is_empty()
+    }
+
+    pub fn observe_and_analyze(
+        &mut self,
+        machine: &M,
+        stack: &Vec<Frame<'mir, 'tcx>>,
+        memory: &Memory<'a, 'mir, 'tcx, M>,
+    ) -> EvalResult<'tcx, ()> {
+        let snapshot = (machine, stack, memory);
+
+        let mut fx = FxHasher::default();
+        snapshot.hash(&mut fx);
+        let hash = fx.finish();
+
+        if self.hashes.insert(hash) {
+            // No collision
+            return Ok(())
+        }
+
+        if self.snapshots.insert((machine.clone(), stack.clone(), memory.clone())) {
+            // Spurious collision or first cycle
+            return Ok(())
+        }
+
+        // Second cycle
+        Err(EvalErrorKind::InfiniteLoop.into())
+    }
+}
+
 #[derive(Clone, Debug, Eq, PartialEq, Hash)]
 pub enum StackPopCleanup {
    /// The stackframe existed to compute the initial value of a static/constant, make sure it
@ -173,7 +294,7 @@ impl<'c, 'b, 'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> LayoutOf
    }
 }

-const MAX_TERMINATORS: usize = 1_000_000;
+const STEPS_UNTIL_DETECTOR_ENABLED: isize = 1_000_000;

 impl<'a, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> EvalContext<'a, 'mir, 'tcx, M> {
    pub fn new(
@ -189,16 +310,17 @@ impl<'a, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> EvalContext<'a, 'mir, 'tcx, M
            memory: Memory::new(tcx, memory_data),
            stack: Vec::new(),
            stack_limit: tcx.sess.const_eval_stack_frame_limit,
-            terminators_remaining: MAX_TERMINATORS,
+            loop_detector: Default::default(),
+            steps_since_detector_enabled: -STEPS_UNTIL_DETECTOR_ENABLED,
        }
    }

    pub(crate) fn with_fresh_body<F: FnOnce(&mut Self) -> R, R>(&mut self, f: F) -> R {
        let stack = mem::replace(&mut self.stack, Vec::new());
-        let terminators_remaining = mem::replace(&mut self.terminators_remaining, MAX_TERMINATORS);
+        let steps = mem::replace(&mut self.steps_since_detector_enabled, -STEPS_UNTIL_DETECTOR_ENABLED);
        let r = f(self);
        self.stack = stack;
-        self.terminators_remaining = terminators_remaining;
+        self.steps_since_detector_enabled = steps;
        r
    }

@ -538,8 +660,6 @@ impl<'a, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> EvalContext<'a, 'mir, 'tcx, M
            }

            Aggregate(ref kind, ref operands) => {
-                self.inc_step_counter_and_check_limit(operands.len());
-
                let (dest, active_field_index) = match **kind {
                    mir::AggregateKind::Adt(adt_def, variant_index, _, active_field_index) => {
                        self.write_discriminant_value(dest_ty, dest, variant_index)?;
--- a/src/librustc_mir/interpret/machine.rs
+++ b/src/librustc_mir/interpret/machine.rs
@ -2,6 +2,8 @@
 //! This separation exists to ensure that no fancy miri features like
 //! interpreting common C functions leak into CTFE.

+use std::hash::Hash;
+
 use rustc::mir::interpret::{AllocId, EvalResult, Scalar, Pointer, AccessKind, GlobalId};
 use super::{EvalContext, Place, ValTy, Memory};

@ -13,9 +15,9 @@ use syntax::ast::Mutability;

 /// Methods of this trait signifies a point where CTFE evaluation would fail
 /// and some use case dependent behaviour can instead be applied
-pub trait Machine<'mir, 'tcx>: Sized {
+pub trait Machine<'mir, 'tcx>: Clone + Eq + Hash {
    /// Additional data that can be accessed via the Memory
-    type MemoryData;
+    type MemoryData: Clone + Eq + Hash;

    /// Additional memory kinds a machine wishes to distinguish from the builtin ones
    type MemoryKinds: ::std::fmt::Debug + PartialEq + Copy + Clone;
--- a/src/librustc_mir/interpret/memory.rs
+++ b/src/librustc_mir/interpret/memory.rs
@ -1,4 +1,5 @@
 use std::collections::VecDeque;
+use std::hash::{Hash, Hasher};
 use std::ptr;

 use rustc::hir::def_id::DefId;
@ -9,7 +10,7 @@ use rustc::ty::layout::{self, Align, TargetDataLayout, Size};
 use rustc::mir::interpret::{Pointer, AllocId, Allocation, AccessKind, Value,
                            EvalResult, Scalar, EvalErrorKind, GlobalId, AllocType};
 pub use rustc::mir::interpret::{write_target_uint, write_target_int, read_target_uint};
-use rustc_data_structures::fx::{FxHashSet, FxHashMap};
+use rustc_data_structures::fx::{FxHashSet, FxHashMap, FxHasher};

 use syntax::ast::Mutability;

@ -19,7 +20,7 @@ use super::{EvalContext, Machine};
 // Allocations and pointers
 ////////////////////////////////////////////////////////////////////////////////

-#[derive(Debug, PartialEq, Copy, Clone)]
+#[derive(Debug, PartialEq, Eq, Copy, Clone)]
 pub enum MemoryKind<T> {
    /// Error if deallocated except during a stack pop
    Stack,
@ -31,6 +32,7 @@ pub enum MemoryKind<T> {
 // Top-level interpreter memory
 ////////////////////////////////////////////////////////////////////////////////

+#[derive(Clone)]
 pub struct Memory<'a, 'mir, 'tcx: 'a + 'mir, M: Machine<'mir, 'tcx>> {
    /// Additional data required by the Machine
    pub data: M::MemoryData,
@ -47,6 +49,64 @@ pub struct Memory<'a, 'mir, 'tcx: 'a + 'mir, M: Machine<'mir, 'tcx>> {
    pub tcx: TyCtxtAt<'a, 'tcx, 'tcx>,
 }

+impl<'a, 'mir, 'tcx, M> Eq for Memory<'a, 'mir, 'tcx, M>
+    where M: Machine<'mir, 'tcx>,
+          'tcx: 'a + 'mir,
+{}
+
+impl<'a, 'mir, 'tcx, M> PartialEq for Memory<'a, 'mir, 'tcx, M>
+    where M: Machine<'mir, 'tcx>,
+          'tcx: 'a + 'mir,
+{
+    fn eq(&self, other: &Self) -> bool {
+        let Memory {
+            data,
+            alloc_kind,
+            alloc_map,
+            cur_frame,
+            tcx: _,
+        } = self;
+
+        *data == other.data
+            && *alloc_kind == other.alloc_kind
+            && *alloc_map == other.alloc_map
+            && *cur_frame == other.cur_frame
+    }
+}
+
+impl<'a, 'mir, 'tcx, M> Hash for Memory<'a, 'mir, 'tcx, M>
+    where M: Machine<'mir, 'tcx>,
+          'tcx: 'a + 'mir,
+{
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        let Memory {
+            data,
+            alloc_kind: _,
+            alloc_map: _,
+            cur_frame,
+            tcx: _,
+        } = self;
+
+        data.hash(state);
+        cur_frame.hash(state);
+
+        // We ignore some fields which don't change between evaluation steps.
+
+        // Since HashMaps which contain the same items may have different
+        // iteration orders, we use a commutative operation (in this case
+        // addition, but XOR would also work), to combine the hash of each
+        // `Allocation`.
+        self.allocations()
+            .map(|allocs| {
+                let mut h = FxHasher::default();
+                allocs.hash(&mut h);
+                h.finish()
+            })
+            .fold(0u64, |hash, x| hash.wrapping_add(x))
+            .hash(state);
+    }
+}
+
 impl<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> {
    pub fn new(tcx: TyCtxtAt<'a, 'tcx, 'tcx>, data: M::MemoryData) -> Self {
        Memory {
@ -866,7 +926,7 @@ impl<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> {

        for i in 0..size.bytes() {
            let defined = undef_mask.get(src.offset + Size::from_bytes(i));
-            
+
            for j in 0..repeat {
                dest_allocation.undef_mask.set(
                    dest.offset + Size::from_bytes(i + (size.bytes() * j)),
--- a/src/librustc_mir/interpret/place.rs
+++ b/src/librustc_mir/interpret/place.rs
@ -7,7 +7,7 @@ use rustc::mir::interpret::{GlobalId, Value, Scalar, EvalResult, Pointer};
 use super::{EvalContext, Machine, ValTy};
 use interpret::memory::HasMemory;

-#[derive(Copy, Clone, Debug)]
+#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
 pub enum Place {
    /// A place referring to a value allocated in the `Memory` system.
    Ptr {
@ -24,7 +24,7 @@ pub enum Place {
    Local { frame: usize, local: mir::Local },
 }

-#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
 pub enum PlaceExtra {
    None,
    Length(u64),
--- a/src/librustc_mir/interpret/step.rs
+++ b/src/librustc_mir/interpret/step.rs
@ -8,13 +8,34 @@ use rustc::mir::interpret::EvalResult;
 use super::{EvalContext, Machine};

 impl<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> EvalContext<'a, 'mir, 'tcx, M> {
-    pub fn inc_step_counter_and_check_limit(&mut self, n: usize) {
-        self.terminators_remaining = self.terminators_remaining.saturating_sub(n);
-        if self.terminators_remaining == 0 {
-            // FIXME(#49980): make this warning a lint
-            self.tcx.sess.span_warn(self.frame().span, "Constant evaluating a complex constant, this might take some time");
-            self.terminators_remaining = 1_000_000;
+    pub fn inc_step_counter_and_detect_loops(&mut self) -> EvalResult<'tcx, ()> {
+        /// The number of steps between loop detector snapshots.
+        /// Should be a power of two for performance reasons.
+        const DETECTOR_SNAPSHOT_PERIOD: isize = 256;
+
+        {
+            let steps = &mut self.steps_since_detector_enabled;
+
+            *steps += 1;
+            if *steps < 0 {
+                return Ok(());
+            }
+
+            *steps %= DETECTOR_SNAPSHOT_PERIOD;
+            if *steps != 0 {
+                return Ok(());
+            }
        }
+
+        if self.loop_detector.is_empty() {
+            // First run of the loop detector
+
+            // FIXME(#49980): make this warning a lint
+            self.tcx.sess.span_warn(self.frame().span,
+                "Constant evaluating a complex constant, this might take some time");
+        }
+
+        self.loop_detector.observe_and_analyze(&self.machine, &self.stack, &self.memory)
    }

    /// Returns true as long as there are more things to do.
@ -36,7 +57,7 @@ impl<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> EvalContext<'a, 'mir, 'tcx, M> {
            return Ok(true);
        }

-        self.inc_step_counter_and_check_limit(1);
+        self.inc_step_counter_and_detect_loops()?;

        let terminator = basic_block.terminator();
        assert_eq!(old_frames, self.cur_frame());
--- a/src/test/ui/const-eval/infinite_loop.rs
+++ b/src/test/ui/const-eval/infinite_loop.rs
@ -0,0 +1,25 @@
+// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![feature(const_let)]
+
+fn main() {
+    // Tests the Collatz conjecture with an incorrect base case (0 instead of 1).
+    // The value of `n` will loop indefinitely (4 - 2 - 1 - 4).
+    let _ = [(); {
+        //~^ WARNING Constant evaluating a complex constant, this might take some time
+        //~| ERROR could not evaluate repeat length
+        let mut n = 113383; // #20 in https://oeis.org/A006884
+        while n != 0 { //~ ERROR constant contains unimplemented expression type
+            n = if n % 2 == 0 { n/2 } else { 3*n + 1 };
+        }
+        n
+    }];
+}
--- a/src/test/ui/const-eval/infinite_loop.stderr
+++ b/src/test/ui/const-eval/infinite_loop.stderr
@ -0,0 +1,41 @@
+error[E0019]: constant contains unimplemented expression type
+  --> $DIR/infinite_loop.rs:20:9
+   |
+LL | /         while n != 0 { //~ ERROR constant contains unimplemented expression type
+LL | |             n = if n % 2 == 0 { n/2 } else { 3*n + 1 };
+LL | |         }
+   | |_________^
+
+warning: Constant evaluating a complex constant, this might take some time
+  --> $DIR/infinite_loop.rs:16:18
+   |
+LL |       let _ = [(); {
+   |  __________________^
+LL | |         //~^ WARNING Constant evaluating a complex constant, this might take some time
+LL | |         //~| ERROR could not evaluate repeat length
+LL | |         let mut n = 113383; // #20 in https://oeis.org/A006884
+...  |
+LL | |         n
+LL | |     }];
+   | |_____^
+
+error[E0080]: could not evaluate repeat length
+  --> $DIR/infinite_loop.rs:16:18
+   |
+LL |       let _ = [(); {
+   |  __________________^
+LL | |         //~^ WARNING Constant evaluating a complex constant, this might take some time
+LL | |         //~| ERROR could not evaluate repeat length
+LL | |         let mut n = 113383; // #20 in https://oeis.org/A006884
+LL | |         while n != 0 { //~ ERROR constant contains unimplemented expression type
+LL | |             n = if n % 2 == 0 { n/2 } else { 3*n + 1 };
+   | |                    ---------- duplicate interpreter state observed here, const evaluation will never terminate
+LL | |         }
+LL | |         n
+LL | |     }];
+   | |_____^
+
+error: aborting due to 2 previous errors
+
+Some errors occurred: E0019, E0080.
+For more information about an error, try `rustc --explain E0019`.