Auto merge of #108815 - the8472:process-obligations-fast-skip, r=nnethercote

fast path for process_obligations Speeds up `keccak` and `cranelift-codegen` in perf.rlo.
2023-03-18 07:07:53 +00:00 · 2023-03-18 07:07:53 +00:00 · df61fcaec1
commit df61fcaec1
parent 85123d2504 7cce618d18
6 changed files with 79 additions and 6 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1365,9 +1365,9 @@ dependencies = [

 [[package]]
 name = "ena"
-version = "0.14.1"
+version = "0.14.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b2e5d13ca2353ab7d0230988629def93914a8c4015f621f9b13ed2955614731d"
+checksum = "c533630cf40e9caa44bd91aadc88a75d75a4c3a12b4cfde353cbed41daa1e1f1"
 dependencies = [
 "log",
 ]
--- a/compiler/rustc_data_structures/Cargo.toml
+++ b/compiler/rustc_data_structures/Cargo.toml
@ -9,7 +9,7 @@ edition = "2021"
 arrayvec = { version = "0.7", default-features = false }
 bitflags = "1.2.1"
 cfg-if = "1.0"
-ena = "0.14.1"
+ena = "0.14.2"
 indexmap = { version = "1.9.1" }
 jobserver_crate = { version = "0.1.13", package = "jobserver" }
 libc = "0.2"
--- a/compiler/rustc_data_structures/src/obligation_forest/mod.rs
+++ b/compiler/rustc_data_structures/src/obligation_forest/mod.rs
@ -97,7 +97,17 @@ pub trait ObligationProcessor {
    type Error: Debug;
    type OUT: OutcomeTrait<Obligation = Self::Obligation, Error = Error<Self::Obligation, Self::Error>>;

-    fn needs_process_obligation(&self, obligation: &Self::Obligation) -> bool;
+    /// Implementations can provide a fast-path to obligation-processing
+    /// by counting the prefix of the passed iterator for which
+    /// `needs_process_obligation` would return false.
+    fn skippable_obligations<'a>(
+        &'a self,
+        _it: impl Iterator<Item = &'a Self::Obligation>,
+    ) -> usize {
+        0
+    }
+
+    fn needs_process_obligation(&self, _obligation: &Self::Obligation) -> bool;

    fn process_obligation(
        &mut self,
@ -416,6 +426,10 @@ pub fn process_obligations<P>(&mut self, processor: &mut P) -> P::OUT
        loop {
            let mut has_changed = false;

+            // This is the super fast path for cheap-to-check conditions.
+            let mut index =
+                processor.skippable_obligations(self.nodes.iter().map(|n| &n.obligation));
+
            // Note that the loop body can append new nodes, and those new nodes
            // will then be processed by subsequent iterations of the loop.
            //
@ -424,9 +438,8 @@ pub fn process_obligations<P>(&mut self, processor: &mut P) -> P::OUT
            // `for index in 0..self.nodes.len() { ... }` because the range would
            // be computed with the initial length, and we would miss the appended
            // nodes. Therefore we use a `while` loop.
-            let mut index = 0;
            while let Some(node) = self.nodes.get_mut(index) {
-                // This test is extremely hot.
+                // This is the moderately fast path when the prefix skipping above didn't work out.
                if node.state.get() != NodeState::Pending
                    || !processor.needs_process_obligation(&node.obligation)
                {
--- a/compiler/rustc_infer/src/infer/mod.rs
+++ b/compiler/rustc_infer/src/infer/mod.rs
@ -187,6 +187,16 @@ pub fn projection_cache(&mut self) -> traits::ProjectionCache<'_, 'tcx> {
        self.projection_cache.with_log(&mut self.undo_log)
    }

+    #[inline]
+    fn try_type_variables_probe_ref(
+        &self,
+        vid: ty::TyVid,
+    ) -> Option<&type_variable::TypeVariableValue<'tcx>> {
+        // Uses a read-only view of the unification table, this way we don't
+        // need an undo log.
+        self.type_variable_storage.eq_relations_ref().try_probe_value(vid)
+    }
+
    #[inline]
    fn type_variables(&mut self) -> type_variable::TypeVariableTable<'_, 'tcx> {
        self.type_variable_storage.with_log(&mut self.undo_log)
@ -1646,6 +1656,28 @@ pub fn const_eval_resolve(
        tcx.const_eval_resolve_for_typeck(param_env_erased, unevaluated, span)
    }

+    /// The returned function is used in a fast path. If it returns `true` the variable is
+    /// unchanged, `false` indicates that the status is unknown.
+    #[inline]
+    pub fn is_ty_infer_var_definitely_unchanged<'a>(
+        &'a self,
+    ) -> (impl Fn(TyOrConstInferVar<'tcx>) -> bool + 'a) {
+        // This hoists the borrow/release out of the loop body.
+        let inner = self.inner.try_borrow();
+
+        return move |infer_var: TyOrConstInferVar<'tcx>| match (infer_var, &inner) {
+            (TyOrConstInferVar::Ty(ty_var), Ok(inner)) => {
+                use self::type_variable::TypeVariableValue;
+
+                match inner.try_type_variables_probe_ref(ty_var) {
+                    Some(TypeVariableValue::Unknown { .. }) => true,
+                    _ => false,
+                }
+            }
+            _ => false,
+        };
+    }
+
    /// `ty_or_const_infer_var_changed` is equivalent to one of these two:
    ///   * `shallow_resolve(ty) != ty` (where `ty.kind = ty::Infer(_)`)
    ///   * `shallow_resolve(ct) != ct` (where `ct.kind = ty::ConstKind::Infer(_)`)
--- a/compiler/rustc_infer/src/infer/type_variable.rs
+++ b/compiler/rustc_infer/src/infer/type_variable.rs
@ -190,6 +190,11 @@ pub(crate) fn with_log<'a>(
    ) -> TypeVariableTable<'a, 'tcx> {
        TypeVariableTable { storage: self, undo_log }
    }
+
+    #[inline]
+    pub(crate) fn eq_relations_ref(&self) -> &ut::UnificationTableStorage<TyVidEqKey<'tcx>> {
+        &self.eq_relations
+    }
 }

 impl<'tcx> TypeVariableTable<'_, 'tcx> {
--- a/compiler/rustc_trait_selection/src/traits/fulfill.rs
+++ b/compiler/rustc_trait_selection/src/traits/fulfill.rs
@ -211,6 +211,29 @@ impl<'a, 'tcx> ObligationProcessor for FulfillProcessor<'a, 'tcx> {
    type Error = FulfillmentErrorCode<'tcx>;
    type OUT = Outcome<Self::Obligation, Self::Error>;

+    /// Compared to `needs_process_obligation` this and its callees
+    /// contain some optimizations that come at the price of false negatives.
+    ///
+    /// They
+    /// - reduce branching by covering only the most common case
+    /// - take a read-only view of the unification tables which allows skipping undo_log
+    ///   construction.
+    /// - bail out on value-cache misses in ena to avoid pointer chasing
+    /// - hoist RefCell locking out of the loop
+    #[inline]
+    fn skippable_obligations<'b>(
+        &'b self,
+        it: impl Iterator<Item = &'b Self::Obligation>,
+    ) -> usize {
+        let is_unchanged = self.selcx.infcx.is_ty_infer_var_definitely_unchanged();
+
+        it.take_while(|o| match o.stalled_on.as_slice() {
+            [o] => is_unchanged(*o),
+            _ => false,
+        })
+        .count()
+    }
+
    /// Identifies whether a predicate obligation needs processing.
    ///
    /// This is always inlined because it has a single callsite and it is