Auto merge of #119977 - Mark-Simulacrum:defid-cache, r=cjgillot

Cache local DefId-keyed queries without hashing

This caches local DefId-keyed queries using just an IndexVec. This costs ~5% extra max-rss at most but brings significant runtime improvement, up to 13% cycle counts (mean: 4%) on primary benchmarks. It's possible that further tweaks could reduce the memory overhead further but this win seems worth landing despite the increased memory, particularly with regards to eliminating the present set in non-incr or storing it inline (skip list?) with the main data.

We tried applying this scheme to all keys in the [first perf run] but found that it carried a significant memory hit (50%). instructions/cycle counts were also much more mixed, though that may have been due to the lack of the present set optimization (needed for fast iter() calls in incremental scenarios).

Closes https://github.com/rust-lang/rust/issues/45275

[first perf run]: https://perf.rust-lang.org/compare.html?start=30dfb9e046aeb878db04332c74de76e52fb7db10&end=6235575300d8e6e2cc6f449cb9048722ef43f9c7&stat=instructions:u
This commit is contained in:
bors 2024-01-16 21:58:10 +00:00
commit 098d4fd74c
3 changed files with 82 additions and 3 deletions

View File

@ -9,6 +9,7 @@
use crate::ty::{GenericArg, GenericArgsRef}; use crate::ty::{GenericArg, GenericArgsRef};
use rustc_hir::def_id::{CrateNum, DefId, LocalDefId, LocalModDefId, ModDefId, LOCAL_CRATE}; use rustc_hir::def_id::{CrateNum, DefId, LocalDefId, LocalModDefId, ModDefId, LOCAL_CRATE};
use rustc_hir::hir_id::{HirId, OwnerId}; use rustc_hir::hir_id::{HirId, OwnerId};
use rustc_query_system::query::DefIdCacheSelector;
use rustc_query_system::query::{DefaultCacheSelector, SingleCacheSelector, VecCacheSelector}; use rustc_query_system::query::{DefaultCacheSelector, SingleCacheSelector, VecCacheSelector};
use rustc_span::symbol::{Ident, Symbol}; use rustc_span::symbol::{Ident, Symbol};
use rustc_span::{Span, DUMMY_SP}; use rustc_span::{Span, DUMMY_SP};
@ -152,7 +153,7 @@ fn key_as_def_id(&self) -> Option<DefId> {
} }
impl Key for DefId { impl Key for DefId {
type CacheSelector = DefaultCacheSelector<Self>; type CacheSelector = DefIdCacheSelector;
fn default_span(&self, tcx: TyCtxt<'_>) -> Span { fn default_span(&self, tcx: TyCtxt<'_>) -> Span {
tcx.def_span(*self) tcx.def_span(*self)

View File

@ -2,8 +2,11 @@
use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::sharded::{self, Sharded}; use rustc_data_structures::sharded::{self, Sharded};
use rustc_data_structures::sync::OnceLock; use rustc_data_structures::sync::{Lock, OnceLock};
use rustc_hir::def_id::LOCAL_CRATE;
use rustc_index::{Idx, IndexVec}; use rustc_index::{Idx, IndexVec};
use rustc_span::def_id::DefId;
use rustc_span::def_id::DefIndex;
use std::fmt::Debug; use std::fmt::Debug;
use std::hash::Hash; use std::hash::Hash;
use std::marker::PhantomData; use std::marker::PhantomData;
@ -148,6 +151,8 @@ impl<K, V> QueryCache for VecCache<K, V>
#[inline(always)] #[inline(always)]
fn lookup(&self, key: &K) -> Option<(V, DepNodeIndex)> { fn lookup(&self, key: &K) -> Option<(V, DepNodeIndex)> {
// FIXME: lock_shard_by_hash will use high bits which are usually zero in the index() passed
// here. This makes sharding essentially useless, always selecting the zero'th shard.
let lock = self.cache.lock_shard_by_hash(key.index() as u64); let lock = self.cache.lock_shard_by_hash(key.index() as u64);
if let Some(Some(value)) = lock.get(*key) { Some(*value) } else { None } if let Some(Some(value)) = lock.get(*key) { Some(*value) } else { None }
} }
@ -168,3 +173,75 @@ fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
} }
} }
} }
pub struct DefIdCacheSelector;
impl<'tcx, V: 'tcx> CacheSelector<'tcx, V> for DefIdCacheSelector {
type Cache = DefIdCache<V>
where
V: Copy;
}
pub struct DefIdCache<V> {
/// Stores the local DefIds in a dense map. Local queries are much more often dense, so this is
/// a win over hashing query keys at marginal memory cost (~5% at most) compared to FxHashMap.
///
/// The second element of the tuple is the set of keys actually present in the IndexVec, used
/// for faster iteration in `iter()`.
// FIXME: This may want to be sharded, like VecCache. However *how* to shard an IndexVec isn't
// super clear; VecCache is effectively not sharded today (see FIXME there). For now just omit
// that complexity here.
local: Lock<(IndexVec<DefIndex, Option<(V, DepNodeIndex)>>, Vec<DefIndex>)>,
foreign: DefaultCache<DefId, V>,
}
impl<V> Default for DefIdCache<V> {
fn default() -> Self {
DefIdCache { local: Default::default(), foreign: Default::default() }
}
}
impl<V> QueryCache for DefIdCache<V>
where
V: Copy,
{
type Key = DefId;
type Value = V;
#[inline(always)]
fn lookup(&self, key: &DefId) -> Option<(V, DepNodeIndex)> {
if key.krate == LOCAL_CRATE {
let cache = self.local.lock();
cache.0.get(key.index).and_then(|v| *v)
} else {
self.foreign.lookup(key)
}
}
#[inline]
fn complete(&self, key: DefId, value: V, index: DepNodeIndex) {
if key.krate == LOCAL_CRATE {
let mut cache = self.local.lock();
let (cache, present) = &mut *cache;
let slot = cache.ensure_contains_elem(key.index, Default::default);
if slot.is_none() {
// FIXME: Only store the present set when running in incremental mode. `iter` is not
// used outside of saving caches to disk and self-profile.
present.push(key.index);
}
*slot = Some((value, index));
} else {
self.foreign.complete(key, value, index)
}
}
fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
let guard = self.local.lock();
let (cache, present) = &*guard;
for &idx in present.iter() {
let value = cache[idx].unwrap();
f(&DefId { krate: LOCAL_CRATE, index: idx }, &value.0, value.1);
}
self.foreign.iter(f);
}
}

View File

@ -10,7 +10,8 @@
mod caches; mod caches;
pub use self::caches::{ pub use self::caches::{
CacheSelector, DefaultCacheSelector, QueryCache, SingleCacheSelector, VecCacheSelector, CacheSelector, DefIdCacheSelector, DefaultCacheSelector, QueryCache, SingleCacheSelector,
VecCacheSelector,
}; };
mod config; mod config;