Encode less metadata for proc-macro crates

Currently, we serialize the same crate metadata for proc-macro crates as
we do for normal crates. This is quite wasteful - almost none of this
metadata is ever used, and much of it can't even be deserialized (if it
contains a foreign `CrateNum`).

This PR changes metadata encoding to skip encoding the majority of crate
metadata for proc-macro crates. Most of the `Lazy<[T]>` fields are left
completetly empty, while the non-lazy fields are left as-is.

Additionally, proc-macros now have a def span that does not include
their body. This was done for normal functions in #75465, but was missed
for proc-macros.

As a result of this PR, we should only ever encode local `CrateNum`s
when encoding proc-macro crates. I've added a specialized serialization
impl for `CrateNum` to assert this.
This commit is contained in:
Aaron Hill 2020-09-18 16:18:10 -04:00
parent 6f9a8a7f9b
commit b9653568a7
No known key found for this signature in database
GPG Key ID: B4087E510E98B164
4 changed files with 168 additions and 60 deletions

View File

@ -707,7 +707,11 @@ impl CrateRoot<'_> {
impl<'a, 'tcx> CrateMetadataRef<'a> {
fn is_proc_macro(&self, id: DefIndex) -> bool {
self.root.proc_macro_data.and_then(|data| data.decode(self).find(|x| *x == id)).is_some()
self.root
.proc_macro_data
.as_ref()
.and_then(|data| data.macros.decode(self).find(|x| *x == id))
.is_some()
}
fn maybe_kind(&self, item_id: DefIndex) -> Option<EntryKind> {
@ -729,7 +733,15 @@ fn kind(&self, item_id: DefIndex) -> EntryKind {
fn raw_proc_macro(&self, id: DefIndex) -> &ProcMacro {
// DefIndex's in root.proc_macro_data have a one-to-one correspondence
// with items in 'raw_proc_macros'.
let pos = self.root.proc_macro_data.unwrap().decode(self).position(|i| i == id).unwrap();
let pos = self
.root
.proc_macro_data
.as_ref()
.unwrap()
.macros
.decode(self)
.position(|i| i == id)
.unwrap();
&self.raw_proc_macros.unwrap()[pos]
}
@ -766,7 +778,12 @@ fn def_kind(&self, index: DefIndex) -> DefKind {
}
fn get_span(&self, index: DefIndex, sess: &Session) -> Span {
self.root.tables.span.get(self, index).unwrap().decode((self, sess))
self.root
.tables
.span
.get(self, index)
.unwrap_or_else(|| panic!("Missing span for {:?}", index))
.decode((self, sess))
}
fn load_proc_macro(&self, id: DefIndex, sess: &Session) -> SyntaxExtension {
@ -942,7 +959,7 @@ fn get_type(&self, id: DefIndex, tcx: TyCtxt<'tcx>) -> Ty<'tcx> {
fn get_stability(&self, id: DefIndex) -> Option<attr::Stability> {
match self.is_proc_macro(id) {
true => self.root.proc_macro_stability,
true => self.root.proc_macro_data.as_ref().unwrap().stability,
false => self.root.tables.stability.get(self, id).map(|stab| stab.decode(self)),
}
}
@ -1035,24 +1052,20 @@ fn each_child_of_item<F>(&self, id: DefIndex, mut callback: F, sess: &Session)
where
F: FnMut(Export<hir::HirId>),
{
if let Some(proc_macros_ids) = self.root.proc_macro_data.map(|d| d.decode(self)) {
if let Some(data) = &self.root.proc_macro_data {
/* If we are loading as a proc macro, we want to return the view of this crate
* as a proc macro crate.
*/
if id == CRATE_DEF_INDEX {
for def_index in proc_macros_ids {
let macros = data.macros.decode(self);
for def_index in macros {
let raw_macro = self.raw_proc_macro(def_index);
let res = Res::Def(
DefKind::Macro(macro_kind(raw_macro)),
self.local_def_id(def_index),
);
let ident = self.item_ident(def_index, sess);
callback(Export {
ident,
res,
vis: ty::Visibility::Public,
span: self.get_span(def_index, sess),
});
callback(Export { ident, res, vis: ty::Visibility::Public, span: ident.span });
}
}
return;
@ -1559,12 +1572,19 @@ fn def_path_hash(&self, index: DefIndex) -> DefPathHash {
fn all_def_path_hashes_and_def_ids(&self) -> Vec<(DefPathHash, DefId)> {
let mut def_path_hashes = self.def_path_hash_cache.lock();
(0..self.num_def_ids())
.map(|index| {
let index = DefIndex::from_usize(index);
(self.def_path_hash_unlocked(index, &mut def_path_hashes), self.local_def_id(index))
})
.collect()
let mut def_index_to_data = |index| {
(self.def_path_hash_unlocked(index, &mut def_path_hashes), self.local_def_id(index))
};
if let Some(data) = &self.root.proc_macro_data {
std::iter::once(CRATE_DEF_INDEX)
.chain(data.macros.decode(self))
.map(def_index_to_data)
.collect()
} else {
(0..self.num_def_ids())
.map(|index| def_index_to_data(DefIndex::from_usize(index)))
.collect()
}
}
/// Get the `DepNodeIndex` corresponding this crate. The result of this

View File

@ -179,8 +179,11 @@ fn into_args(self) -> (DefId, DefId) {
})
}
proc_macro_decls_static => {
cdata.root.proc_macro_decls_static.map(|index| {
DefId { krate: def_id.krate, index }
cdata.root.proc_macro_data.as_ref().map(|data| {
DefId {
krate: def_id.krate,
index: data.proc_macro_decls_static,
}
})
}
crate_disambiguator => { cdata.root.disambiguator }

View File

@ -68,6 +68,17 @@ pub(super) struct EncodeContext<'a, 'tcx> {
hygiene_ctxt: &'a HygieneEncodeContext,
}
/// If the current crate is a proc-macro, returns early with `Lazy:empty()`.
/// This is useful for skipping the encoding of things that aren't needed
/// for proc-macro crates.
macro_rules! empty_proc_macro {
($self:ident) => {
if $self.is_proc_macro {
return Lazy::empty();
}
};
}
macro_rules! encoder_methods {
($($name:ident($ty:ty);)*) => {
$(fn $name(&mut self, value: $ty) -> Result<(), Self::Error> {
@ -138,6 +149,15 @@ fn encode(&self, e: &mut EncodeContext<'a, 'tcx>) -> opaque::EncodeResult {
}
}
impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for CrateNum {
fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) -> opaque::EncodeResult {
if *self != LOCAL_CRATE && s.is_proc_macro {
panic!("Attempted to encode non-local CrateNum {:?} for proc-macro crate", self);
}
s.emit_u32(self.as_u32())
}
}
impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for DefIndex {
fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) -> opaque::EncodeResult {
s.emit_u32(self.as_u32())
@ -418,6 +438,13 @@ fn encode_info_for_items(&mut self) {
let krate = self.tcx.hir().krate();
let vis = Spanned { span: rustc_span::DUMMY_SP, node: hir::VisibilityKind::Public };
self.encode_info_for_mod(hir::CRATE_HIR_ID, &krate.item.module, &krate.item.attrs, &vis);
// Proc-macro crates only export proc-macro items, which are looked
// up using `proc_macro_data`
if self.is_proc_macro {
return;
}
krate.visit_all_item_likes(&mut self.as_deep_visitor());
for macro_def in krate.exported_macros {
self.visit_macro_def(macro_def);
@ -426,11 +453,22 @@ fn encode_info_for_items(&mut self) {
fn encode_def_path_table(&mut self) {
let table = self.tcx.hir().definitions().def_path_table();
for (def_index, def_key, def_path_hash) in table.enumerated_keys_and_path_hashes() {
let def_key = self.lazy(def_key);
let def_path_hash = self.lazy(def_path_hash);
self.tables.def_keys.set(def_index, def_key);
self.tables.def_path_hashes.set(def_index, def_path_hash);
if self.is_proc_macro {
for def_index in std::iter::once(CRATE_DEF_INDEX)
.chain(self.tcx.hir().krate().proc_macros.iter().map(|p| p.owner.local_def_index))
{
let def_key = self.lazy(table.def_key(def_index));
let def_path_hash = self.lazy(table.def_path_hash(def_index));
self.tables.def_keys.set(def_index, def_key);
self.tables.def_path_hashes.set(def_index, def_path_hash);
}
} else {
for (def_index, def_key, def_path_hash) in table.enumerated_keys_and_path_hashes() {
let def_key = self.lazy(def_key);
let def_path_hash = self.lazy(def_path_hash);
self.tables.def_keys.set(def_index, def_key);
self.tables.def_path_hashes.set(def_index, def_path_hash);
}
}
}
@ -497,13 +535,7 @@ fn encode_source_map(&mut self) -> Lazy<[rustc_span::SourceFile]> {
self.lazy(adapted.iter().map(|rc| &**rc))
}
fn is_proc_macro(&self) -> bool {
self.tcx.sess.crate_types().contains(&CrateType::ProcMacro)
}
fn encode_crate_root(&mut self) -> Lazy<CrateRoot<'tcx>> {
let is_proc_macro = self.is_proc_macro();
let mut i = self.position();
// Encode the crate deps
@ -575,15 +607,16 @@ fn encode_crate_root(&mut self) -> Lazy<CrateRoot<'tcx>> {
self.lazy(interpret_alloc_index)
};
i = self.position();
let tables = self.tables.encode(&mut self.opaque);
let tables_bytes = self.position() - i;
// Encode the proc macro data
// Encode the proc macro data. This affects 'tables',
// so we need to do this before we encode the tables
i = self.position();
let proc_macro_data = self.encode_proc_macros();
let proc_macro_data_bytes = self.position() - i;
i = self.position();
let tables = self.tables.encode(&mut self.opaque);
let tables_bytes = self.position() - i;
// Encode exported symbols info. This is prefetched in `encode_metadata` so we encode
// this as late as possible to give the prefetching as much time as possible to complete.
i = self.position();
@ -624,18 +657,7 @@ fn encode_crate_root(&mut self) -> Lazy<CrateRoot<'tcx>> {
has_panic_handler: tcx.has_panic_handler(LOCAL_CRATE),
has_default_lib_allocator,
plugin_registrar_fn: tcx.plugin_registrar_fn(LOCAL_CRATE).map(|id| id.index),
proc_macro_decls_static: if is_proc_macro {
let id = tcx.proc_macro_decls_static(LOCAL_CRATE).unwrap();
Some(id.index)
} else {
None
},
proc_macro_data,
proc_macro_stability: if is_proc_macro {
tcx.lookup_stability(DefId::local(CRATE_DEF_INDEX)).copied()
} else {
None
},
compiler_builtins: tcx.sess.contains_name(&attrs, sym::compiler_builtins),
needs_allocator: tcx.sess.contains_name(&attrs, sym::needs_allocator),
needs_panic_runtime: tcx.sess.contains_name(&attrs, sym::needs_panic_runtime),
@ -800,8 +822,13 @@ fn encode_info_for_mod(
let def_id = local_def_id.to_def_id();
debug!("EncodeContext::encode_info_for_mod({:?})", def_id);
let data = ModData {
reexports: match tcx.module_exports(local_def_id) {
// If we are encoding a proc-macro crates, `encode_info_for_mod` will
// only ever get called for the crate root. We still want to encode
// the crate root for consistency with other crates (some of the resolver
// code uses it). However, we skip encoding anything relating to child
// items - we encode information about proc-macros later on.
let reexports = if !self.is_proc_macro {
match tcx.module_exports(local_def_id) {
Some(exports) => {
let hir = self.tcx.hir();
self.lazy(
@ -811,7 +838,13 @@ fn encode_info_for_mod(
)
}
_ => Lazy::empty(),
},
}
} else {
Lazy::empty()
};
let data = ModData {
reexports,
expansion: tcx.hir().definitions().expansion_that_defined(local_def_id),
};
@ -819,9 +852,13 @@ fn encode_info_for_mod(
record!(self.tables.visibility[def_id] <- ty::Visibility::from_hir(vis, id, self.tcx));
record!(self.tables.span[def_id] <- self.tcx.def_span(def_id));
record!(self.tables.attributes[def_id] <- attrs);
record!(self.tables.children[def_id] <- md.item_ids.iter().map(|item_id| {
tcx.hir().local_def_id(item_id.id).local_def_index
}));
if self.is_proc_macro {
record!(self.tables.children[def_id] <- &[]);
} else {
record!(self.tables.children[def_id] <- md.item_ids.iter().map(|item_id| {
tcx.hir().local_def_id(item_id.id).local_def_index
}));
}
self.encode_stability(def_id);
self.encode_deprecation(def_id);
}
@ -1481,11 +1518,13 @@ fn encode_info_for_anon_const(&mut self, def_id: LocalDefId) {
}
fn encode_native_libraries(&mut self) -> Lazy<[NativeLib]> {
empty_proc_macro!(self);
let used_libraries = self.tcx.native_libraries(LOCAL_CRATE);
self.lazy(used_libraries.iter().cloned())
}
fn encode_foreign_modules(&mut self) -> Lazy<[ForeignModule]> {
empty_proc_macro!(self);
let foreign_modules = self.tcx.foreign_modules(LOCAL_CRATE);
self.lazy(foreign_modules.iter().cloned())
}
@ -1509,17 +1548,37 @@ fn encode_hygiene(&mut self) -> (SyntaxContextTable, ExpnDataTable) {
(syntax_contexts.encode(&mut self.opaque), expn_data_table.encode(&mut self.opaque))
}
fn encode_proc_macros(&mut self) -> Option<Lazy<[DefIndex]>> {
fn encode_proc_macros(&mut self) -> Option<ProcMacroData> {
let is_proc_macro = self.tcx.sess.crate_types().contains(&CrateType::ProcMacro);
if is_proc_macro {
let tcx = self.tcx;
Some(self.lazy(tcx.hir().krate().proc_macros.iter().map(|p| p.owner.local_def_index)))
let hir = tcx.hir();
let proc_macro_decls_static = tcx.proc_macro_decls_static(LOCAL_CRATE).unwrap().index;
let stability = tcx.lookup_stability(DefId::local(CRATE_DEF_INDEX)).copied();
let macros = self.lazy(hir.krate().proc_macros.iter().map(|p| p.owner.local_def_index));
// Normally, this information is encoded when we walk the items
// defined in this crate. However, we skip doing that for proc-macro crates,
// so we manually encode just the information that we need
for proc_macro in &hir.krate().proc_macros {
let id = proc_macro.owner.local_def_index;
let span = self.lazy(hir.span(*proc_macro));
// Proc-macros may have attributes like `#[allow_internal_unstable]`,
// so downstream crates need access to them.
let attrs = self.lazy(hir.attrs(*proc_macro));
self.tables.span.set(id, span);
self.tables.attributes.set(id, attrs);
}
Some(ProcMacroData { proc_macro_decls_static, stability, macros })
} else {
None
}
}
fn encode_crate_deps(&mut self) -> Lazy<[CrateDep]> {
empty_proc_macro!(self);
let crates = self.tcx.crates();
let mut deps = crates
@ -1555,18 +1614,21 @@ fn encode_crate_deps(&mut self) -> Lazy<[CrateDep]> {
}
fn encode_lib_features(&mut self) -> Lazy<[(Symbol, Option<Symbol>)]> {
empty_proc_macro!(self);
let tcx = self.tcx;
let lib_features = tcx.lib_features();
self.lazy(lib_features.to_vec())
}
fn encode_diagnostic_items(&mut self) -> Lazy<[(Symbol, DefIndex)]> {
empty_proc_macro!(self);
let tcx = self.tcx;
let diagnostic_items = tcx.diagnostic_items(LOCAL_CRATE);
self.lazy(diagnostic_items.iter().map(|(&name, def_id)| (name, def_id.index)))
}
fn encode_lang_items(&mut self) -> Lazy<[(DefIndex, usize)]> {
empty_proc_macro!(self);
let tcx = self.tcx;
let lang_items = tcx.lang_items();
let lang_items = lang_items.items().iter();
@ -1581,12 +1643,14 @@ fn encode_lang_items(&mut self) -> Lazy<[(DefIndex, usize)]> {
}
fn encode_lang_items_missing(&mut self) -> Lazy<[lang_items::LangItem]> {
empty_proc_macro!(self);
let tcx = self.tcx;
self.lazy(&tcx.lang_items().missing)
}
/// Encodes an index, mapping each trait to its (local) implementations.
fn encode_impls(&mut self) -> Lazy<[TraitImpls]> {
empty_proc_macro!(self);
debug!("EncodeContext::encode_impls()");
let tcx = self.tcx;
let mut visitor = ImplVisitor { tcx, impls: FxHashMap::default() };
@ -1625,6 +1689,7 @@ fn encode_exported_symbols(
&mut self,
exported_symbols: &[(ExportedSymbol<'tcx>, SymbolExportLevel)],
) -> Lazy<[(ExportedSymbol<'tcx>, SymbolExportLevel)]> {
empty_proc_macro!(self);
// The metadata symbol name is special. It should not show up in
// downstream crates.
let metadata_symbol_name = SymbolName::new(self.tcx, &metadata_symbol_name(self.tcx));
@ -1641,6 +1706,7 @@ fn encode_exported_symbols(
}
fn encode_dylib_dependency_formats(&mut self) -> Lazy<[Option<LinkagePreference>]> {
empty_proc_macro!(self);
let formats = self.tcx.dependency_formats(LOCAL_CRATE);
for (ty, arr) in formats.iter() {
if *ty != CrateType::Dylib {

View File

@ -172,6 +172,29 @@ macro_rules! Lazy {
type SyntaxContextTable = Lazy<Table<u32, Lazy<SyntaxContextData>>>;
type ExpnDataTable = Lazy<Table<u32, Lazy<ExpnData>>>;
#[derive(MetadataEncodable, MetadataDecodable)]
crate struct ProcMacroData {
proc_macro_decls_static: DefIndex,
stability: Option<attr::Stability>,
macros: Lazy<[DefIndex]>,
}
/// Serialized metadata for a crate.
/// When compiling a proc-macro crate, we encode many of
/// the `Lazy<[T]>` fields as `Lazy::empty()`. This serves two purposes:
///
/// 1. We avoid performing unnecessary work. Proc-macro crates can only
/// export proc-macros functions, which are compiled into a shared library.
/// As a result, a large amount of the information we normally store
/// (e.g. optimized MIR) is unneeded by downstream crates.
/// 2. We avoid serializing invalid `CrateNum`s. When we deserialize
/// a proc-macro crate, we don't load any of its dependencies (since we
/// just need to invoke a native function from the shared library).
/// This means that any foreign `CrateNum`s that we serialize cannot be
/// deserialized, since we will not know how to map them into the current
/// compilation session. If we were to serialize a proc-macro crate like
/// a normal crate, much of what we serialized would be unusable in addition
/// to being unused.
#[derive(MetadataEncodable, MetadataDecodable)]
crate struct CrateRoot<'tcx> {
name: Symbol,
@ -185,8 +208,6 @@ macro_rules! Lazy {
has_panic_handler: bool,
has_default_lib_allocator: bool,
plugin_registrar_fn: Option<DefIndex>,
proc_macro_decls_static: Option<DefIndex>,
proc_macro_stability: Option<attr::Stability>,
crate_deps: Lazy<[CrateDep]>,
dylib_dependency_formats: Lazy<[Option<LinkagePreference>]>,
@ -198,12 +219,10 @@ macro_rules! Lazy {
foreign_modules: Lazy<[ForeignModule]>,
impls: Lazy<[TraitImpls]>,
interpret_alloc_index: Lazy<[u32]>,
proc_macro_data: Option<ProcMacroData>,
tables: LazyTables<'tcx>,
/// The DefIndex's of any proc macros declared by this crate.
proc_macro_data: Option<Lazy<[DefIndex]>>,
exported_symbols: Lazy!([(ExportedSymbol<'tcx>, SymbolExportLevel)]),
syntax_contexts: SyntaxContextTable,