From 6630d690859d882b2528a39317a701da64fe1203 Mon Sep 17 00:00:00 2001 From: Mark Rousskov Date: Fri, 22 Dec 2023 21:42:49 -0500 Subject: [PATCH] Specialize DefPathHash table to skip crate IDs Instead, we store just the local crate hash as a bare u64. On decoding, we recombine it with the crate's stable crate ID stored separately in metadata. The end result is that we save ~8 bytes/DefIndex in metadata size. One key detail here is that we no longer distinguish in encoded metadata between present and non-present DefPathHashes. It used to be highly likely we could distinguish as we used DefPathHash::default(), an all-zero representation. However in theory even that is fallible as nothing strictly prevents the StableCrateId from being zero. --- compiler/rustc_metadata/src/rmeta/decoder.rs | 14 +++++++++--- compiler/rustc_metadata/src/rmeta/encoder.rs | 4 ++-- compiler/rustc_metadata/src/rmeta/mod.rs | 7 +++++- compiler/rustc_metadata/src/rmeta/table.rs | 23 -------------------- compiler/rustc_span/src/def_id.rs | 2 -- 5 files changed, 19 insertions(+), 31 deletions(-) diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs index 281a0eafee1..b5e251f3c59 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -6,6 +6,7 @@ use rustc_ast as ast; use rustc_data_structures::captures::Captures; +use rustc_data_structures::fingerprint::Fingerprint; use rustc_data_structures::owned_slice::OwnedSlice; use rustc_data_structures::sync::{AppendOnlyVec, AtomicBool, Lock, Lrc, OnceLock}; use rustc_data_structures::unhash::UnhashMap; @@ -1489,9 +1490,16 @@ fn def_path_hash_unlocked( index: DefIndex, def_path_hashes: &mut FxHashMap, ) -> DefPathHash { - *def_path_hashes - .entry(index) - .or_insert_with(|| self.root.tables.def_path_hashes.get(self, index)) + *def_path_hashes.entry(index).or_insert_with(|| { + // This is a hack to workaround the fact that we can't easily encode/decode a Hash64 + // into the FixedSizeEncoding, as Hash64 lacks a Default impl. A future refactor to + // relax the Default restriction will likely fix this. + let fingerprint = Fingerprint::new( + self.root.stable_crate_id.as_u64(), + self.root.tables.def_path_hashes.get(self, index), + ); + DefPathHash::new(self.root.stable_crate_id, fingerprint.split().1) + }) } #[inline] diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index 057fb15ac3b..c8681f647c9 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -467,13 +467,13 @@ fn encode_def_path_table(&mut self) { let def_key = self.lazy(table.def_key(def_index)); let def_path_hash = table.def_path_hash(def_index); self.tables.def_keys.set_some(def_index, def_key); - self.tables.def_path_hashes.set(def_index, def_path_hash); + self.tables.def_path_hashes.set(def_index, def_path_hash.local_hash().as_u64()); } } else { for (def_index, def_key, def_path_hash) in table.enumerated_keys_and_path_hashes() { let def_key = self.lazy(def_key); self.tables.def_keys.set_some(def_index, def_key); - self.tables.def_path_hashes.set(def_index, *def_path_hash); + self.tables.def_path_hashes.set(def_index, def_path_hash.local_hash().as_u64()); } } } diff --git a/compiler/rustc_metadata/src/rmeta/mod.rs b/compiler/rustc_metadata/src/rmeta/mod.rs index 235f0e35cae..905218e2e1b 100644 --- a/compiler/rustc_metadata/src/rmeta/mod.rs +++ b/compiler/rustc_metadata/src/rmeta/mod.rs @@ -386,7 +386,12 @@ fn encode(&self, buf: &mut FileEncoder) -> LazyTables { is_type_alias_impl_trait: Table, type_alias_is_lazy: Table, attr_flags: Table, - def_path_hashes: Table, + // The u64 is the crate-local part of the DefPathHash. All hashes in this crate have the same + // StableCrateId, so we omit encoding those into the table. + // + // Note also that this table is fully populated (no gaps) as every DefIndex should have a + // corresponding DefPathHash. + def_path_hashes: Table, explicit_item_bounds: Table, Span)>>, inferred_outlives_of: Table, Span)>>, inherent_impls: Table>, diff --git a/compiler/rustc_metadata/src/rmeta/table.rs b/compiler/rustc_metadata/src/rmeta/table.rs index 667fc301991..d53ee4836bb 100644 --- a/compiler/rustc_metadata/src/rmeta/table.rs +++ b/compiler/rustc_metadata/src/rmeta/table.rs @@ -1,6 +1,5 @@ use crate::rmeta::*; -use rustc_data_structures::fingerprint::Fingerprint; use rustc_hir::def::CtorOf; use rustc_index::Idx; @@ -44,12 +43,6 @@ fn is_default(&self) -> bool { } } -impl IsDefault for DefPathHash { - fn is_default(&self) -> bool { - self.0 == Fingerprint::ZERO - } -} - impl IsDefault for UnusedGenericParams { fn is_default(&self) -> bool { // UnusedGenericParams encodes the *un*usedness as a bitset. @@ -234,22 +227,6 @@ impl FixedSizeEncoding for Option<$ty> { } } -// We directly encode `DefPathHash` because a `LazyValue` would incur a 25% cost. -impl FixedSizeEncoding for DefPathHash { - type ByteArray = [u8; 16]; - - #[inline] - fn from_bytes(b: &[u8; 16]) -> Self { - DefPathHash(Fingerprint::from_le_bytes(*b)) - } - - #[inline] - fn write_to_bytes(self, b: &mut [u8; 16]) { - debug_assert!(!self.is_default()); - *b = self.0.to_le_bytes(); - } -} - // We directly encode RawDefId because using a `LazyValue` would incur a 50% overhead in the worst case. impl FixedSizeEncoding for Option { type ByteArray = [u8; 8]; diff --git a/compiler/rustc_span/src/def_id.rs b/compiler/rustc_span/src/def_id.rs index b2d51ac6c0d..e397fab5459 100644 --- a/compiler/rustc_span/src/def_id.rs +++ b/compiler/rustc_span/src/def_id.rs @@ -114,8 +114,6 @@ pub fn stable_crate_id(&self) -> StableCrateId { } /// Returns the crate-local part of the [DefPathHash]. - /// - /// Used for tests. #[inline] pub fn local_hash(&self) -> Hash64 { self.0.split().1