From f20ceb1c6fd7fa29a91677be548d6f8ca6c4b172 Mon Sep 17 00:00:00 2001 From: Camille GILLOT Date: Sat, 6 Aug 2022 22:33:06 +0200 Subject: [PATCH] Encode index of SourceFile along with span. --- compiler/rustc_metadata/src/rmeta/decoder.rs | 33 +++---- compiler/rustc_metadata/src/rmeta/encoder.rs | 96 ++++++++++---------- compiler/rustc_span/src/lib.rs | 2 + compiler/rustc_span/src/source_map.rs | 2 + compiler/rustc_span/src/source_map/tests.rs | 1 + 5 files changed, 65 insertions(+), 69 deletions(-) diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs index 40dc4fb052d..444b5884187 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -527,6 +527,9 @@ fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Span { bug!("Cannot decode Span without Session.") }; + // Index of the file in the corresponding crate's list of encoded files. + let metadata_index = usize::decode(decoder); + // There are two possibilities here: // 1. This is a 'local span', which is located inside a `SourceFile` // that came from this crate. In this case, we use the source map data @@ -587,27 +590,9 @@ fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Span { foreign_data.imported_source_files(sess) }; - let source_file = { - // Optimize for the case that most spans within a translated item - // originate from the same source_file. - let last_source_file = &imported_source_files[decoder.last_source_file_index]; - - if lo >= last_source_file.original_start_pos && lo <= last_source_file.original_end_pos - { - last_source_file - } else { - let index = imported_source_files - .binary_search_by_key(&lo, |source_file| source_file.original_start_pos) - .unwrap_or_else(|index| index - 1); - - // Don't try to cache the index for foreign spans, - // as this would require a map from CrateNums to indices - if tag == TAG_VALID_SPAN_LOCAL { - decoder.last_source_file_index = index; - } - &imported_source_files[index] - } - }; + // Optimize for the case that most spans within a translated item + // originate from the same source_file. + let source_file = &imported_source_files[metadata_index]; // Make sure our binary search above is correct. debug_assert!( @@ -1545,7 +1530,8 @@ fn filter<'a>(sess: &Session, path: Option<&'a Path>) -> Option<&'a Path> { let external_source_map = self.root.source_map.decode(self); external_source_map - .map(|source_file_to_import| { + .enumerate() + .map(|(source_file_index, source_file_to_import)| { // We can't reuse an existing SourceFile, so allocate a new one // containing the information we need. let rustc_span::SourceFile { @@ -1605,6 +1591,9 @@ fn filter<'a>(sess: &Session, path: Option<&'a Path>) -> Option<&'a Path> { normalized_pos, start_pos, end_pos, + source_file_index + .try_into() + .expect("cannot import more than U32_MAX files"), ); debug!( "CrateMetaData::imported_source_files alloc \ diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index 33278367ce3..de78ab41f05 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -17,7 +17,6 @@ use rustc_hir::intravisit::{self, Visitor}; use rustc_hir::lang_items; use rustc_hir::{AnonConst, GenericParamKind}; -use rustc_index::bit_set::GrowableBitSet; use rustc_middle::hir::nested_filter; use rustc_middle::middle::dependency_format::Linkage; use rustc_middle::middle::exported_symbols::{ @@ -66,13 +65,10 @@ pub(super) struct EncodeContext<'a, 'tcx> { // The indices (into the `SourceMap`'s `MonotonicVec`) // of all of the `SourceFiles` that we need to serialize. // When we serialize a `Span`, we insert the index of its - // `SourceFile` into the `GrowableBitSet`. - // - // This needs to be a `GrowableBitSet` and not a - // regular `BitSet` because we may actually import new `SourceFiles` - // during metadata encoding, due to executing a query - // with a result containing a foreign `Span`. - required_source_files: Option>, + // `SourceFile` into the `FxIndexSet`. + // The order inside the `FxIndexSet` is used as on-disk + // order of `SourceFiles`, and encoded inside `Span`s. + required_source_files: Option>, is_proc_macro: bool, hygiene_ctxt: &'a HygieneEncodeContext, } @@ -245,10 +241,6 @@ fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) { return TAG_PARTIAL_SPAN.encode(s); } - let source_files = s.required_source_files.as_mut().expect("Already encoded SourceMap!"); - // Record the fact that we need to encode the data for this `SourceFile` - source_files.insert(s.source_file_cache.1); - // There are two possible cases here: // 1. This span comes from a 'foreign' crate - e.g. some crate upstream of the // crate we are writing metadata for. When the metadata for *this* crate gets @@ -265,30 +257,38 @@ fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) { // if we're a proc-macro crate. // This allows us to avoid loading the dependencies of proc-macro crates: all of // the information we need to decode `Span`s is stored in the proc-macro crate. - let (tag, lo, hi) = if s.source_file_cache.0.is_imported() && !s.is_proc_macro { - // To simplify deserialization, we 'rebase' this span onto the crate it originally came from - // (the crate that 'owns' the file it references. These rebased 'lo' and 'hi' values - // are relative to the source map information for the 'foreign' crate whose CrateNum - // we write into the metadata. This allows `imported_source_files` to binary - // search through the 'foreign' crate's source map information, using the - // deserialized 'lo' and 'hi' values directly. - // - // All of this logic ensures that the final result of deserialization is a 'normal' - // Span that can be used without any additional trouble. - let external_start_pos = { - // Introduce a new scope so that we drop the 'lock()' temporary - match &*s.source_file_cache.0.external_src.lock() { - ExternalSource::Foreign { original_start_pos, .. } => *original_start_pos, - src => panic!("Unexpected external source {:?}", src), - } - }; - let lo = (span.lo - s.source_file_cache.0.start_pos) + external_start_pos; - let hi = (span.hi - s.source_file_cache.0.start_pos) + external_start_pos; + let (tag, lo, hi, metadata_index) = + if s.source_file_cache.0.is_imported() && !s.is_proc_macro { + // To simplify deserialization, we 'rebase' this span onto the crate it originally came from + // (the crate that 'owns' the file it references. These rebased 'lo' and 'hi' values + // are relative to the source map information for the 'foreign' crate whose CrateNum + // we write into the metadata. This allows `imported_source_files` to binary + // search through the 'foreign' crate's source map information, using the + // deserialized 'lo' and 'hi' values directly. + // + // All of this logic ensures that the final result of deserialization is a 'normal' + // Span that can be used without any additional trouble. + let (external_start_pos, metadata_index) = { + // Introduce a new scope so that we drop the 'lock()' temporary + match &*s.source_file_cache.0.external_src.lock() { + ExternalSource::Foreign { original_start_pos, metadata_index, .. } => { + (*original_start_pos, *metadata_index as usize) + } + src => panic!("Unexpected external source {:?}", src), + } + }; + let lo = (span.lo - s.source_file_cache.0.start_pos) + external_start_pos; + let hi = (span.hi - s.source_file_cache.0.start_pos) + external_start_pos; - (TAG_VALID_SPAN_FOREIGN, lo, hi) - } else { - (TAG_VALID_SPAN_LOCAL, span.lo, span.hi) - }; + (TAG_VALID_SPAN_FOREIGN, lo, hi, metadata_index) + } else { + // Record the fact that we need to encode the data for this `SourceFile` + let source_files = + s.required_source_files.as_mut().expect("Already encoded SourceMap!"); + let (source_file_index, _) = source_files.insert_full(s.source_file_cache.1); + + (TAG_VALID_SPAN_LOCAL, span.lo, span.hi, source_file_index) + }; tag.encode(s); lo.encode(s); @@ -298,6 +298,9 @@ fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) { let len = hi - lo; len.encode(s); + // Encode the index of the `SourceFile` for the span, in order to make decoding faster. + metadata_index.encode(s); + if tag == TAG_VALID_SPAN_FOREIGN { // This needs to be two lines to avoid holding the `s.source_file_cache` // while calling `cnum.encode(s)` @@ -460,18 +463,17 @@ fn encode_source_map(&mut self) -> LazyArray { let working_directory = &self.tcx.sess.opts.working_dir; - let adapted = all_source_files + // Only serialize `SourceFile`s that were used during the encoding of a `Span`. + // + // The order in which we encode source files is important here: the on-disk format for + // `Span` contains the index of the corresponding `SourceFile`. + let adapted = required_source_files .iter() - .enumerate() - .filter(|(idx, source_file)| { - // Only serialize `SourceFile`s that were used - // during the encoding of a `Span` - required_source_files.contains(*idx) && - // Don't serialize imported `SourceFile`s, unless - // we're in a proc-macro crate. - (!source_file.is_imported() || self.is_proc_macro) - }) - .map(|(_, source_file)| { + .map(|&source_file_index| &all_source_files[source_file_index]) + .map(|source_file| { + // Don't serialize imported `SourceFile`s, unless we're in a proc-macro crate. + assert!(!source_file.is_imported() || self.is_proc_macro); + // At export time we expand all source file paths to absolute paths because // downstream compilation sessions can have a different compiler working // directory, so relative paths from this or any other upstream crate @@ -2228,7 +2230,7 @@ fn encode_metadata_impl(tcx: TyCtxt<'_>, path: &Path) { let source_map_files = tcx.sess.source_map().files(); let source_file_cache = (source_map_files[0].clone(), 0); - let required_source_files = Some(GrowableBitSet::with_capacity(source_map_files.len())); + let required_source_files = Some(FxIndexSet::default()); drop(source_map_files); let hygiene_ctxt = HygieneEncodeContext::default(); diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index cf306928151..bae05f2f02e 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -1098,6 +1098,8 @@ pub enum ExternalSource { original_start_pos: BytePos, /// The end of this SourceFile within the source_map of its original crate. original_end_pos: BytePos, + /// Index of the file inside metadata. + metadata_index: u32, }, } diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs index 28381157d50..387777f7af6 100644 --- a/compiler/rustc_span/src/source_map.rs +++ b/compiler/rustc_span/src/source_map.rs @@ -337,6 +337,7 @@ pub fn new_imported_source_file( mut file_local_normalized_pos: Vec, original_start_pos: BytePos, original_end_pos: BytePos, + metadata_index: u32, ) -> Lrc { let start_pos = self .allocate_address_space(source_len) @@ -383,6 +384,7 @@ pub fn new_imported_source_file( kind: ExternalSourceKind::AbsentOk, original_start_pos, original_end_pos, + metadata_index, }), start_pos, end_pos, diff --git a/compiler/rustc_span/src/source_map/tests.rs b/compiler/rustc_span/src/source_map/tests.rs index be827cea874..2cada019b7f 100644 --- a/compiler/rustc_span/src/source_map/tests.rs +++ b/compiler/rustc_span/src/source_map/tests.rs @@ -252,6 +252,7 @@ fn t10() { normalized_pos, start_pos, end_pos, + 0, ); assert!(