Auto merge of #119238 - Mark-Simulacrum:def-hash-efficiency, r=cjgillot

Skip duplicate stable crate ID encoding into metadata

Instead, we store just the local crate hash as a bare u64. On decoding,
we recombine it with the crate's stable crate ID stored separately in
metadata. The end result is that we save ~8 bytes/DefIndex in metadata
size.

One key detail here is that we no longer distinguish in encoded metadata
between present and non-present DefPathHashes. It used to be highly
likely we could distinguish as we used DefPathHash::default(), an
all-zero representation. However in theory even that is fallible as
nothing strictly prevents the StableCrateId from being zero. In review it
was pointed out that we should never have a missing hash for a DefIndex anyway,
so this shouldn't matter.
This commit is contained in:
bors 2023-12-24 07:56:37 +00:00
commit cf6427373b
5 changed files with 19 additions and 31 deletions

View File

@ -6,6 +6,7 @@
use rustc_ast as ast;
use rustc_data_structures::captures::Captures;
use rustc_data_structures::fingerprint::Fingerprint;
use rustc_data_structures::owned_slice::OwnedSlice;
use rustc_data_structures::sync::{AppendOnlyVec, AtomicBool, Lock, Lrc, OnceLock};
use rustc_data_structures::unhash::UnhashMap;
@ -1489,9 +1490,16 @@ fn def_path_hash_unlocked(
index: DefIndex,
def_path_hashes: &mut FxHashMap<DefIndex, DefPathHash>,
) -> DefPathHash {
*def_path_hashes
.entry(index)
.or_insert_with(|| self.root.tables.def_path_hashes.get(self, index))
*def_path_hashes.entry(index).or_insert_with(|| {
// This is a hack to workaround the fact that we can't easily encode/decode a Hash64
// into the FixedSizeEncoding, as Hash64 lacks a Default impl. A future refactor to
// relax the Default restriction will likely fix this.
let fingerprint = Fingerprint::new(
self.root.stable_crate_id.as_u64(),
self.root.tables.def_path_hashes.get(self, index),
);
DefPathHash::new(self.root.stable_crate_id, fingerprint.split().1)
})
}
#[inline]

View File

@ -467,13 +467,13 @@ fn encode_def_path_table(&mut self) {
let def_key = self.lazy(table.def_key(def_index));
let def_path_hash = table.def_path_hash(def_index);
self.tables.def_keys.set_some(def_index, def_key);
self.tables.def_path_hashes.set(def_index, def_path_hash);
self.tables.def_path_hashes.set(def_index, def_path_hash.local_hash().as_u64());
}
} else {
for (def_index, def_key, def_path_hash) in table.enumerated_keys_and_path_hashes() {
let def_key = self.lazy(def_key);
self.tables.def_keys.set_some(def_index, def_key);
self.tables.def_path_hashes.set(def_index, *def_path_hash);
self.tables.def_path_hashes.set(def_index, def_path_hash.local_hash().as_u64());
}
}
}

View File

@ -386,7 +386,12 @@ fn encode(&self, buf: &mut FileEncoder) -> LazyTables {
is_type_alias_impl_trait: Table<DefIndex, bool>,
type_alias_is_lazy: Table<DefIndex, bool>,
attr_flags: Table<DefIndex, AttrFlags>,
def_path_hashes: Table<DefIndex, DefPathHash>,
// The u64 is the crate-local part of the DefPathHash. All hashes in this crate have the same
// StableCrateId, so we omit encoding those into the table.
//
// Note also that this table is fully populated (no gaps) as every DefIndex should have a
// corresponding DefPathHash.
def_path_hashes: Table<DefIndex, u64>,
explicit_item_bounds: Table<DefIndex, LazyArray<(ty::Clause<'static>, Span)>>,
inferred_outlives_of: Table<DefIndex, LazyArray<(ty::Clause<'static>, Span)>>,
inherent_impls: Table<DefIndex, LazyArray<DefIndex>>,

View File

@ -1,6 +1,5 @@
use crate::rmeta::*;
use rustc_data_structures::fingerprint::Fingerprint;
use rustc_hir::def::CtorOf;
use rustc_index::Idx;
@ -44,12 +43,6 @@ fn is_default(&self) -> bool {
}
}
impl IsDefault for DefPathHash {
fn is_default(&self) -> bool {
self.0 == Fingerprint::ZERO
}
}
impl IsDefault for UnusedGenericParams {
fn is_default(&self) -> bool {
// UnusedGenericParams encodes the *un*usedness as a bitset.
@ -234,22 +227,6 @@ impl FixedSizeEncoding for Option<$ty> {
}
}
// We directly encode `DefPathHash` because a `LazyValue` would incur a 25% cost.
impl FixedSizeEncoding for DefPathHash {
type ByteArray = [u8; 16];
#[inline]
fn from_bytes(b: &[u8; 16]) -> Self {
DefPathHash(Fingerprint::from_le_bytes(*b))
}
#[inline]
fn write_to_bytes(self, b: &mut [u8; 16]) {
debug_assert!(!self.is_default());
*b = self.0.to_le_bytes();
}
}
// We directly encode RawDefId because using a `LazyValue` would incur a 50% overhead in the worst case.
impl FixedSizeEncoding for Option<RawDefId> {
type ByteArray = [u8; 8];

View File

@ -114,8 +114,6 @@ pub fn stable_crate_id(&self) -> StableCrateId {
}
/// Returns the crate-local part of the [DefPathHash].
///
/// Used for tests.
#[inline]
pub fn local_hash(&self) -> Hash64 {
self.0.split().1