Properly handle Spans that reference imported SourceFiles

Previously, metadata encoding used DUMMY_SP to represent any spans that
referenced an 'imported' SourceFile - e.g. a SourceFile from an upstream
dependency. These leads to sub-optimal error messages in certain cases
(see the included test).

This PR changes how we encode and decode spans in crate metadata. We
encode spans in one of two ways:

* 'Local' spans, which reference non-imported SourceFiles, are encoded
  exactly as before.
* 'Foreign' spans, which reference imported SourceFiles, are encoded
  with the CrateNum of their 'originating' crate. Additionally, their
'lo' and 'high' values are rebased on top of the 'originating' crate,
which allows them to be used with the SourceMap data encoded for that
crate.

The `ExternalSource` enum is renamed to `ExternalSourceKind`. There is
now a struct called `ExternalSource`, which holds an
`ExternalSourceKind` along with the original line number information for
the file. This is used during `Span` serialization to rebase spans onto
their 'owning' crate.
This commit is contained in:
Aaron Hill 2020-02-07 14:02:24 -05:00
parent 57e1da59cd
commit 5e2856122a
No known key found for this signature in database
GPG Key ID: B4087E510E98B164
11 changed files with 234 additions and 52 deletions

View File

@ -10,7 +10,7 @@ use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
use rustc_data_structures::svh::Svh;
use rustc_hir as hir;
use rustc_hir::def_id::CRATE_DEF_INDEX;
use rustc_hir::def_id::{CrateNum, DefIndex, LOCAL_CRATE};
use rustc_hir::def_id::{DefIndex, LOCAL_CRATE};
use rustc_hir::intravisit::{self, NestedVisitorMap, Visitor};
use rustc_hir::*;
use rustc_index::vec::{Idx, IndexVec};
@ -175,7 +175,7 @@ impl<'a, 'hir> NodeCollector<'a, 'hir> {
.source_map
.files()
.iter()
.filter(|source_file| CrateNum::from_u32(source_file.crate_of_origin) == LOCAL_CRATE)
.filter(|source_file| source_file.cnum == LOCAL_CRATE)
.map(|source_file| source_file.name_hash)
.collect();

View File

@ -5,7 +5,6 @@ use crate::ich::StableHashingContext;
use rustc_ast::ast;
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
use rustc_hir::def_id::{CrateNum, DefId, CRATE_DEF_INDEX};
use rustc_span::SourceFile;
use smallvec::SmallVec;
@ -59,7 +58,7 @@ impl<'a> HashStable<StableHashingContext<'a>> for SourceFile {
name_hash,
name_was_remapped,
unmapped_path: _,
crate_of_origin,
cnum,
// Do not hash the source as it is not encoded
src: _,
src_hash,
@ -75,9 +74,6 @@ impl<'a> HashStable<StableHashingContext<'a>> for SourceFile {
(name_hash as u64).hash_stable(hcx, hasher);
name_was_remapped.hash_stable(hcx, hasher);
DefId { krate: CrateNum::from_u32(crate_of_origin), index: CRATE_DEF_INDEX }
.hash_stable(hcx, hasher);
src_hash.hash_stable(hcx, hasher);
// We only hash the relative position within this source_file
@ -101,6 +97,8 @@ impl<'a> HashStable<StableHashingContext<'a>> for SourceFile {
for &char_pos in normalized_pos.iter() {
stable_normalized_pos(char_pos, start_pos).hash_stable(hcx, hasher);
}
cnum.hash_stable(hcx, hasher);
}
}

View File

@ -386,7 +386,7 @@ impl<'a, 'tcx> SpecializedDecoder<Span> for DecodeContext<'a, 'tcx> {
return Ok(DUMMY_SP);
}
debug_assert_eq!(tag, TAG_VALID_SPAN);
debug_assert!(tag == TAG_VALID_SPAN_LOCAL || tag == TAG_VALID_SPAN_FOREIGN);
let lo = BytePos::decode(self)?;
let len = BytePos::decode(self)?;
@ -398,7 +398,68 @@ impl<'a, 'tcx> SpecializedDecoder<Span> for DecodeContext<'a, 'tcx> {
bug!("Cannot decode Span without Session.")
};
let imported_source_files = self.cdata().imported_source_files(&sess.source_map());
// There are two possibilities here:
// 1. This is a 'local span', which is located inside a `SourceFile`
// that came from this crate. In this case, we use the source map data
// encoded in this crate. This branch should be taken nearly all of the time.
// 2. This is a 'foreign span', which is located inside a `SourceFile`
// that came from a *different* crate (some crate upstream of the one
// whose metadata we're looking at). For example, consider this dependency graph:
//
// A -> B -> C
//
// Suppose that we're currently compiling crate A, and start deserializing
// metadata from crate B. When we deserialize a Span from crate B's metadata,
// there are two posibilites:
//
// 1. The span references a file from crate B. This makes it a 'local' span,
// which means that we can use crate B's serialized source map information.
// 2. The span references a file from crate C. This makes it a 'foreign' span,
// which means we need to use Crate *C* (not crate B) to determine the source
// map information. We only record source map information for a file in the
// crate that 'owns' it, so deserializing a Span may require us to look at
// a transitive dependency.
//
// When we encode a foreign span, we adjust its 'lo' and 'high' values
// to be based on the *foreign* crate (e.g. crate C), not the crate
// we are writing metadata for (e.g. crate B). This allows us to
// treat the 'local' and 'foreign' cases almost identically during deserialization:
// we can call `imported_source_files` for the proper crate, and binary search
// through the returned slice using our span.
let imported_source_files = if tag == TAG_VALID_SPAN_LOCAL {
self.cdata().imported_source_files(sess.source_map())
} else {
// FIXME: We don't decode dependencies of proc-macros.
// Remove this once #69976 is merged
if self.cdata().root.is_proc_macro_crate() {
debug!(
"SpecializedDecoder<Span>::specialized_decode: skipping span for proc-macro crate {:?}",
self.cdata().cnum
);
// Decode `CrateNum` as u32 - using `CrateNum::decode` will ICE
// since we don't have `cnum_map` populated.
// This advances the decoder position so that we can continue
// to read metadata.
let _ = u32::decode(self)?;
return Ok(DUMMY_SP);
}
// tag is TAG_VALID_SPAN_FOREIGN, checked by `debug_assert` above
let cnum = CrateNum::decode(self)?;
debug!(
"SpecializedDecoder<Span>::specialized_decode: loading source files from cnum {:?}",
cnum
);
// Decoding 'foreign' spans should be rare enough that it's
// not worth it to maintain a per-CrateNum cache for `last_source_file_index`.
// We just set it to 0, to ensure that we don't try to access something out
// of bounds for our initial 'guess'
self.last_source_file_index = 0;
let foreign_data = self.cdata().cstore.get_crate_data(cnum);
foreign_data.imported_source_files(sess.source_map())
};
let source_file = {
// Optimize for the case that most spans within a translated item
// originate from the same source_file.
@ -412,16 +473,32 @@ impl<'a, 'tcx> SpecializedDecoder<Span> for DecodeContext<'a, 'tcx> {
.binary_search_by_key(&lo, |source_file| source_file.original_start_pos)
.unwrap_or_else(|index| index - 1);
self.last_source_file_index = index;
// Don't try to cache the index for foreign spans,
// as this would require a map from CrateNums to indices
if tag == TAG_VALID_SPAN_LOCAL {
self.last_source_file_index = index;
}
&imported_source_files[index]
}
};
// Make sure our binary search above is correct.
debug_assert!(lo >= source_file.original_start_pos && lo <= source_file.original_end_pos);
debug_assert!(
lo >= source_file.original_start_pos && lo <= source_file.original_end_pos,
"Bad binary search: lo={:?} source_file.original_start_pos={:?} source_file.original_end_pos={:?}",
lo,
source_file.original_start_pos,
source_file.original_end_pos
);
// Make sure we correctly filtered out invalid spans during encoding
debug_assert!(hi >= source_file.original_start_pos && hi <= source_file.original_end_pos);
debug_assert!(
hi >= source_file.original_start_pos && hi <= source_file.original_end_pos,
"Bad binary search: hi={:?} source_file.original_start_pos={:?} source_file.original_end_pos={:?}",
hi,
source_file.original_start_pos,
source_file.original_end_pos
);
let lo =
(lo + source_file.translated_source_file.start_pos) - source_file.original_start_pos;
@ -1425,14 +1502,16 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
let local_version = local_source_map.new_imported_source_file(
name,
name_was_remapped,
self.cnum.as_u32(),
src_hash,
name_hash,
source_length,
self.cnum,
lines,
multibyte_chars,
non_narrow_chars,
normalized_pos,
start_pos,
end_pos,
);
debug!(
"CrateMetaData::imported_source_files alloc \

View File

@ -1,6 +1,7 @@
use crate::rmeta::table::FixedSizeEncoding;
use crate::rmeta::*;
use log::{debug, trace};
use rustc::hir::map::definitions::DefPathTable;
use rustc::hir::map::Map;
use rustc::middle::cstore::{EncodedMetadata, ForeignModule, LinkagePreference, NativeLibrary};
@ -29,9 +30,7 @@ use rustc_serialize::{opaque, Encodable, Encoder, SpecializedEncoder};
use rustc_session::config::{self, CrateType};
use rustc_span::source_map::Spanned;
use rustc_span::symbol::{kw, sym, Ident, Symbol};
use rustc_span::{self, FileName, SourceFile, Span};
use log::{debug, trace};
use rustc_span::{self, ExternalSource, FileName, SourceFile, Span};
use std::hash::Hash;
use std::num::NonZeroUsize;
use std::path::Path;
@ -165,20 +164,56 @@ impl<'tcx> SpecializedEncoder<Span> for EncodeContext<'tcx> {
return TAG_INVALID_SPAN.encode(self);
}
// HACK(eddyb) there's no way to indicate which crate a Span is coming
// from right now, so decoding would fail to find the SourceFile if
// it's not local to the crate the Span is found in.
if self.source_file_cache.is_imported() {
return TAG_INVALID_SPAN.encode(self);
}
// There are two possible cases here:
// 1. This span comes from a 'foreign' crate - e.g. some crate upstream of the
// crate we are writing metadata for. When the metadata for *this* crate gets
// deserialized, the deserializer will need to know which crate it originally came
// from. We use `TAG_VALID_SPAN_FOREIGN` to indicate that a `CrateNum` should
// be deserialized after the rest of the span data, which tells the deserializer
// which crate contains the source map information.
// 2. This span comes from our own crate. No special hamdling is needed - we just
// write `TAG_VALID_SPAN_LOCAL` to let the deserializer know that it should use
// our own source map information.
let (tag, lo, hi) = if self.source_file_cache.is_imported() {
// To simplify deserialization, we 'rebase' this span onto the crate it originally came from
// (the crate that 'owns' the file it references. These rebased 'lo' and 'hi' values
// are relative to the source map information for the 'foreign' crate whose CrateNum
// we write into the metadata. This allows `imported_source_files` to binary
// search through the 'foreign' crate's source map information, using the
// deserialized 'lo' and 'hi' values directly.
//
// All of this logic ensures that the final result of deserialization is a 'normal'
// Span that can be used without any additional trouble.
let external_start_pos = {
// Introduce a new scope so that we drop the 'lock()' temporary
match &*self.source_file_cache.external_src.lock() {
ExternalSource::Foreign { original_start_pos, .. } => *original_start_pos,
src => panic!("Unexpected external source {:?}", src),
}
};
let lo = (span.lo - self.source_file_cache.start_pos) + external_start_pos;
let hi = (span.hi - self.source_file_cache.start_pos) + external_start_pos;
TAG_VALID_SPAN.encode(self)?;
span.lo.encode(self)?;
(TAG_VALID_SPAN_FOREIGN, lo, hi)
} else {
(TAG_VALID_SPAN_LOCAL, span.lo, span.hi)
};
tag.encode(self)?;
lo.encode(self)?;
// Encode length which is usually less than span.hi and profits more
// from the variable-length integer encoding that we use.
let len = span.hi - span.lo;
len.encode(self)
let len = hi - lo;
len.encode(self)?;
if tag == TAG_VALID_SPAN_FOREIGN {
// This needs to be two lines to avoid holding the `self.source_file_cache`
// while calling `cnum.encode(self)`
let cnum = self.source_file_cache.cnum;
cnum.encode(self)?;
}
Ok(())
// Don't encode the expansion context.
}

View File

@ -405,5 +405,6 @@ struct GeneratorData<'tcx> {
}
// Tags used for encoding Spans:
const TAG_VALID_SPAN: u8 = 0;
const TAG_INVALID_SPAN: u8 = 1;
const TAG_VALID_SPAN_LOCAL: u8 = 0;
const TAG_VALID_SPAN_FOREIGN: u8 = 1;
const TAG_INVALID_SPAN: u8 = 2;

View File

@ -27,7 +27,7 @@ pub mod hygiene;
use hygiene::Transparency;
pub use hygiene::{DesugaringKind, ExpnData, ExpnId, ExpnKind, MacroKind, SyntaxContext};
pub mod def_id;
use def_id::DefId;
use def_id::{CrateNum, DefId, LOCAL_CRATE};
mod span_encoding;
pub use span_encoding::{Span, DUMMY_SP};
@ -839,30 +839,42 @@ pub struct NormalizedPos {
pub diff: u32,
}
/// The state of the lazy external source loading mechanism of a `SourceFile`.
#[derive(PartialEq, Eq, Clone)]
#[derive(PartialEq, Eq, Clone, Debug)]
pub enum ExternalSource {
/// No external source has to be loaded, since the `SourceFile` represents a local crate.
Unneeded,
Foreign {
kind: ExternalSourceKind,
/// This SourceFile's byte-offset within the source_map of its original crate
original_start_pos: BytePos,
/// The end of this SourceFile within the source_map of its original crate
original_end_pos: BytePos,
},
}
/// The state of the lazy external source loading mechanism of a `SourceFile`.
#[derive(PartialEq, Eq, Clone, Debug)]
pub enum ExternalSourceKind {
/// The external source has been loaded already.
Present(String),
/// No attempt has been made to load the external source.
AbsentOk,
/// A failed attempt has been made to load the external source.
AbsentErr,
/// No external source has to be loaded, since the `SourceFile` represents a local crate.
Unneeded,
}
impl ExternalSource {
pub fn is_absent(&self) -> bool {
match *self {
ExternalSource::Present(_) => false,
match self {
ExternalSource::Foreign { kind: ExternalSourceKind::Present(_), .. } => false,
_ => true,
}
}
pub fn get_source(&self) -> Option<&str> {
match *self {
ExternalSource::Present(ref src) => Some(src),
match self {
ExternalSource::Foreign { kind: ExternalSourceKind::Present(ref src), .. } => Some(src),
_ => None,
}
}
@ -883,8 +895,6 @@ pub struct SourceFile {
/// The unmapped path of the file that the source came from.
/// Set to `None` if the `SourceFile` was imported from an external crate.
pub unmapped_path: Option<FileName>,
/// Indicates which crate this `SourceFile` was imported from.
pub crate_of_origin: u32,
/// The complete source code.
pub src: Option<Lrc<String>>,
/// The source code's hash.
@ -906,6 +916,8 @@ pub struct SourceFile {
pub normalized_pos: Vec<NormalizedPos>,
/// A hash of the filename, used for speeding up hashing in incremental compilation.
pub name_hash: u128,
/// Indicates which crate this `SourceFile` was imported from.
pub cnum: CrateNum,
}
impl Encodable for SourceFile {
@ -972,7 +984,8 @@ impl Encodable for SourceFile {
s.emit_struct_field("multibyte_chars", 6, |s| self.multibyte_chars.encode(s))?;
s.emit_struct_field("non_narrow_chars", 7, |s| self.non_narrow_chars.encode(s))?;
s.emit_struct_field("name_hash", 8, |s| self.name_hash.encode(s))?;
s.emit_struct_field("normalized_pos", 9, |s| self.normalized_pos.encode(s))
s.emit_struct_field("normalized_pos", 9, |s| self.normalized_pos.encode(s))?;
s.emit_struct_field("cnum", 10, |s| self.cnum.encode(s))
})
}
}
@ -1022,24 +1035,24 @@ impl Decodable for SourceFile {
let name_hash: u128 = d.read_struct_field("name_hash", 8, |d| Decodable::decode(d))?;
let normalized_pos: Vec<NormalizedPos> =
d.read_struct_field("normalized_pos", 9, |d| Decodable::decode(d))?;
let cnum: CrateNum = d.read_struct_field("cnum", 10, |d| Decodable::decode(d))?;
Ok(SourceFile {
name,
name_was_remapped,
unmapped_path: None,
// `crate_of_origin` has to be set by the importer.
// This value matches up with `rustc_hir::def_id::INVALID_CRATE`.
// That constant is not available here, unfortunately.
crate_of_origin: std::u32::MAX - 1,
start_pos,
end_pos,
src: None,
src_hash,
external_src: Lock::new(ExternalSource::AbsentOk),
// Unused - the metadata decoder will construct
// a new SourceFile, filling in `external_src` properly
external_src: Lock::new(ExternalSource::Unneeded),
lines,
multibyte_chars,
non_narrow_chars,
normalized_pos,
name_hash,
cnum,
})
})
}
@ -1081,7 +1094,6 @@ impl SourceFile {
name,
name_was_remapped,
unmapped_path: Some(unmapped_path),
crate_of_origin: 0,
src: Some(Lrc::new(src)),
src_hash,
external_src: Lock::new(ExternalSource::Unneeded),
@ -1092,6 +1104,7 @@ impl SourceFile {
non_narrow_chars,
normalized_pos,
name_hash,
cnum: LOCAL_CRATE,
}
}
@ -1109,21 +1122,27 @@ impl SourceFile {
where
F: FnOnce() -> Option<String>,
{
if *self.external_src.borrow() == ExternalSource::AbsentOk {
if matches!(
*self.external_src.borrow(),
ExternalSource::Foreign { kind: ExternalSourceKind::AbsentOk, .. }
) {
let src = get_src();
let mut external_src = self.external_src.borrow_mut();
// Check that no-one else have provided the source while we were getting it
if *external_src == ExternalSource::AbsentOk {
if let ExternalSource::Foreign {
kind: src_kind @ ExternalSourceKind::AbsentOk, ..
} = &mut *external_src
{
if let Some(src) = src {
let mut hasher: StableHasher = StableHasher::new();
hasher.write(src.as_bytes());
if hasher.finish::<u128>() == self.src_hash {
*external_src = ExternalSource::Present(src);
*src_kind = ExternalSourceKind::Present(src);
return true;
}
} else {
*external_src = ExternalSource::AbsentErr;
*src_kind = ExternalSourceKind::AbsentErr;
}
false

View File

@ -296,14 +296,16 @@ impl SourceMap {
&self,
filename: FileName,
name_was_remapped: bool,
crate_of_origin: u32,
src_hash: u128,
name_hash: u128,
source_len: usize,
cnum: CrateNum,
mut file_local_lines: Vec<BytePos>,
mut file_local_multibyte_chars: Vec<MultiByteChar>,
mut file_local_non_narrow_chars: Vec<NonNarrowChar>,
mut file_local_normalized_pos: Vec<NormalizedPos>,
original_start_pos: BytePos,
original_end_pos: BytePos,
) -> Lrc<SourceFile> {
let start_pos = self
.allocate_address_space(source_len)
@ -332,10 +334,13 @@ impl SourceMap {
name: filename,
name_was_remapped,
unmapped_path: None,
crate_of_origin,
src: None,
src_hash,
external_src: Lock::new(ExternalSource::AbsentOk),
external_src: Lock::new(ExternalSource::Foreign {
kind: ExternalSourceKind::AbsentOk,
original_start_pos,
original_end_pos,
}),
start_pos,
end_pos,
lines: file_local_lines,
@ -343,6 +348,7 @@ impl SourceMap {
non_narrow_chars: file_local_non_narrow_chars,
normalized_pos: file_local_normalized_pos,
name_hash,
cnum,
});
let mut files = self.files.borrow_mut();

View File

@ -0,0 +1,9 @@
#[macro_export]
macro_rules! define_parse_error {
() => {
#[macro_export]
macro_rules! parse_error {
() => { parse error }
}
}
}

View File

@ -0,0 +1,3 @@
extern crate transitive_dep_three;
transitive_dep_three::define_parse_error!();

View File

@ -0,0 +1,13 @@
// Tests that we properly serialize/deserialize spans from transitive dependencies
// (e.g. imported SourceFiles)
//
// The order of these next lines is important, since we need
// transitive_dep_two.rs to be able to reference transitive_dep_three.rs
//
// aux-build: transitive_dep_three.rs
// aux-build: transitive_dep_two.rs
// compile-flags: -Z macro-backtrace
extern crate transitive_dep_two;
transitive_dep_two::parse_error!(); //~ ERROR expected one of

View File

@ -0,0 +1,19 @@
error: expected one of `!` or `::`, found `error`
--> $DIR/auxiliary/transitive_dep_three.rs:6:27
|
LL | / macro_rules! parse_error {
LL | | () => { parse error }
| | ^^^^^ expected one of `!` or `::`
LL | | }
| |_________- in this expansion of `transitive_dep_two::parse_error!`
|
::: $DIR/transitive-dep-span.rs:13:1
|
LL | transitive_dep_two::parse_error!();
| -----------------------------------
| |
| in this macro invocation
| in this macro invocation
error: aborting due to previous error