Auto merge of #108815 - the8472:process-obligations-fast-skip, r=nnethercote

fast path for process_obligations

Speeds up `keccak` and `cranelift-codegen` in perf.rlo.
This commit is contained in:
bors 2023-03-18 07:07:53 +00:00
commit df61fcaec1
6 changed files with 79 additions and 6 deletions

View File

@ -1365,9 +1365,9 @@ dependencies = [
[[package]]
name = "ena"
version = "0.14.1"
version = "0.14.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2e5d13ca2353ab7d0230988629def93914a8c4015f621f9b13ed2955614731d"
checksum = "c533630cf40e9caa44bd91aadc88a75d75a4c3a12b4cfde353cbed41daa1e1f1"
dependencies = [
"log",
]

View File

@ -9,7 +9,7 @@ edition = "2021"
arrayvec = { version = "0.7", default-features = false }
bitflags = "1.2.1"
cfg-if = "1.0"
ena = "0.14.1"
ena = "0.14.2"
indexmap = { version = "1.9.1" }
jobserver_crate = { version = "0.1.13", package = "jobserver" }
libc = "0.2"

View File

@ -97,7 +97,17 @@ pub trait ObligationProcessor {
type Error: Debug;
type OUT: OutcomeTrait<Obligation = Self::Obligation, Error = Error<Self::Obligation, Self::Error>>;
fn needs_process_obligation(&self, obligation: &Self::Obligation) -> bool;
/// Implementations can provide a fast-path to obligation-processing
/// by counting the prefix of the passed iterator for which
/// `needs_process_obligation` would return false.
fn skippable_obligations<'a>(
&'a self,
_it: impl Iterator<Item = &'a Self::Obligation>,
) -> usize {
0
}
fn needs_process_obligation(&self, _obligation: &Self::Obligation) -> bool;
fn process_obligation(
&mut self,
@ -416,6 +426,10 @@ pub fn process_obligations<P>(&mut self, processor: &mut P) -> P::OUT
loop {
let mut has_changed = false;
// This is the super fast path for cheap-to-check conditions.
let mut index =
processor.skippable_obligations(self.nodes.iter().map(|n| &n.obligation));
// Note that the loop body can append new nodes, and those new nodes
// will then be processed by subsequent iterations of the loop.
//
@ -424,9 +438,8 @@ pub fn process_obligations<P>(&mut self, processor: &mut P) -> P::OUT
// `for index in 0..self.nodes.len() { ... }` because the range would
// be computed with the initial length, and we would miss the appended
// nodes. Therefore we use a `while` loop.
let mut index = 0;
while let Some(node) = self.nodes.get_mut(index) {
// This test is extremely hot.
// This is the moderately fast path when the prefix skipping above didn't work out.
if node.state.get() != NodeState::Pending
|| !processor.needs_process_obligation(&node.obligation)
{

View File

@ -187,6 +187,16 @@ pub fn projection_cache(&mut self) -> traits::ProjectionCache<'_, 'tcx> {
self.projection_cache.with_log(&mut self.undo_log)
}
#[inline]
fn try_type_variables_probe_ref(
&self,
vid: ty::TyVid,
) -> Option<&type_variable::TypeVariableValue<'tcx>> {
// Uses a read-only view of the unification table, this way we don't
// need an undo log.
self.type_variable_storage.eq_relations_ref().try_probe_value(vid)
}
#[inline]
fn type_variables(&mut self) -> type_variable::TypeVariableTable<'_, 'tcx> {
self.type_variable_storage.with_log(&mut self.undo_log)
@ -1646,6 +1656,28 @@ pub fn const_eval_resolve(
tcx.const_eval_resolve_for_typeck(param_env_erased, unevaluated, span)
}
/// The returned function is used in a fast path. If it returns `true` the variable is
/// unchanged, `false` indicates that the status is unknown.
#[inline]
pub fn is_ty_infer_var_definitely_unchanged<'a>(
&'a self,
) -> (impl Fn(TyOrConstInferVar<'tcx>) -> bool + 'a) {
// This hoists the borrow/release out of the loop body.
let inner = self.inner.try_borrow();
return move |infer_var: TyOrConstInferVar<'tcx>| match (infer_var, &inner) {
(TyOrConstInferVar::Ty(ty_var), Ok(inner)) => {
use self::type_variable::TypeVariableValue;
match inner.try_type_variables_probe_ref(ty_var) {
Some(TypeVariableValue::Unknown { .. }) => true,
_ => false,
}
}
_ => false,
};
}
/// `ty_or_const_infer_var_changed` is equivalent to one of these two:
/// * `shallow_resolve(ty) != ty` (where `ty.kind = ty::Infer(_)`)
/// * `shallow_resolve(ct) != ct` (where `ct.kind = ty::ConstKind::Infer(_)`)

View File

@ -190,6 +190,11 @@ pub(crate) fn with_log<'a>(
) -> TypeVariableTable<'a, 'tcx> {
TypeVariableTable { storage: self, undo_log }
}
#[inline]
pub(crate) fn eq_relations_ref(&self) -> &ut::UnificationTableStorage<TyVidEqKey<'tcx>> {
&self.eq_relations
}
}
impl<'tcx> TypeVariableTable<'_, 'tcx> {

View File

@ -211,6 +211,29 @@ impl<'a, 'tcx> ObligationProcessor for FulfillProcessor<'a, 'tcx> {
type Error = FulfillmentErrorCode<'tcx>;
type OUT = Outcome<Self::Obligation, Self::Error>;
/// Compared to `needs_process_obligation` this and its callees
/// contain some optimizations that come at the price of false negatives.
///
/// They
/// - reduce branching by covering only the most common case
/// - take a read-only view of the unification tables which allows skipping undo_log
/// construction.
/// - bail out on value-cache misses in ena to avoid pointer chasing
/// - hoist RefCell locking out of the loop
#[inline]
fn skippable_obligations<'b>(
&'b self,
it: impl Iterator<Item = &'b Self::Obligation>,
) -> usize {
let is_unchanged = self.selcx.infcx.is_ty_infer_var_definitely_unchanged();
it.take_while(|o| match o.stalled_on.as_slice() {
[o] => is_unchanged(*o),
_ => false,
})
.count()
}
/// Identifies whether a predicate obligation needs processing.
///
/// This is always inlined because it has a single callsite and it is