From 45439945c9cb1122c882cefbe0e38c3bb6f20514 Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Fri, 24 Nov 2017 14:00:33 +0100 Subject: [PATCH] incr.comp.: Store Spans as (file,line,col,length) in incr.comp. cache. The previous method ran into problems because ICH would treat Spans as (file,line,col) but the cache contained byte offsets and its possible for the latter to change while the former stayed stable. --- src/librustc/ich/hcx.rs | 77 ++++------ src/librustc/ty/maps/on_disk_cache.rs | 208 +++++++++++++++++--------- src/libsyntax/codemap.rs | 2 +- src/libsyntax_pos/lib.rs | 5 + 4 files changed, 178 insertions(+), 114 deletions(-) diff --git a/src/librustc/ich/hcx.rs b/src/librustc/ich/hcx.rs index 6d7322fdafd..d95b825b9e5 100644 --- a/src/librustc/ich/hcx.rs +++ b/src/librustc/ich/hcx.rs @@ -28,7 +28,7 @@ use syntax::codemap::CodeMap; use syntax::ext::hygiene::SyntaxContext; use syntax::symbol::Symbol; -use syntax_pos::Span; +use syntax_pos::{Span, DUMMY_SP}; use rustc_data_structures::stable_hasher::{HashStable, StableHashingContextProvider, StableHasher, StableHasherResult, @@ -362,63 +362,52 @@ impl<'gcx> HashStable> for Span { fn hash_stable(&self, hcx: &mut StableHashingContext<'gcx>, hasher: &mut StableHasher) { - use syntax_pos::Pos; + const TAG_VALID_SPAN: u8 = 0; + const TAG_INVALID_SPAN: u8 = 1; + const TAG_EXPANSION: u8 = 0; + const TAG_NO_EXPANSION: u8 = 1; if !hcx.hash_spans { return } + if *self == DUMMY_SP { + return std_hash::Hash::hash(&TAG_INVALID_SPAN, hasher); + } + // If this is not an empty or invalid span, we want to hash the last // position that belongs to it, as opposed to hashing the first // position past it. let span = self.data(); - let span_hi = if span.hi > span.lo { - // We might end up in the middle of a multibyte character here, - // but that's OK, since we are not trying to decode anything at - // this position. - span.hi - ::syntax_pos::BytePos(1) - } else { - span.hi - }; - { - let loc1 = hcx.codemap().byte_pos_to_line_and_col(span.lo); - let loc1 = loc1.as_ref() - .map(|&(ref fm, line, col)| (&fm.name[..], line, col.to_usize())) - .unwrap_or(("???", 0, 0)); - - let loc2 = hcx.codemap().byte_pos_to_line_and_col(span_hi); - let loc2 = loc2.as_ref() - .map(|&(ref fm, line, col)| (&fm.name[..], line, col.to_usize())) - .unwrap_or(("???", 0, 0)); - - if loc1.0 == loc2.0 { - std_hash::Hash::hash(&0u8, hasher); - - std_hash::Hash::hash(loc1.0, hasher); - std_hash::Hash::hash(&loc1.1, hasher); - std_hash::Hash::hash(&loc1.2, hasher); - - // Do not hash the file name twice - std_hash::Hash::hash(&loc2.1, hasher); - std_hash::Hash::hash(&loc2.2, hasher); - } else { - std_hash::Hash::hash(&1u8, hasher); - - std_hash::Hash::hash(loc1.0, hasher); - std_hash::Hash::hash(&loc1.1, hasher); - std_hash::Hash::hash(&loc1.2, hasher); - - std_hash::Hash::hash(loc2.0, hasher); - std_hash::Hash::hash(&loc2.1, hasher); - std_hash::Hash::hash(&loc2.2, hasher); - } + if span.hi < span.lo { + return std_hash::Hash::hash(&TAG_INVALID_SPAN, hasher); } + let (file_lo, line_lo, col_lo) = match hcx.codemap() + .byte_pos_to_line_and_col(span.lo) { + Some(pos) => pos, + None => { + return std_hash::Hash::hash(&TAG_INVALID_SPAN, hasher); + } + }; + + if !file_lo.contains(span.hi) { + return std_hash::Hash::hash(&TAG_INVALID_SPAN, hasher); + } + + let len = span.hi - span.lo; + + std_hash::Hash::hash(&TAG_VALID_SPAN, hasher); + std_hash::Hash::hash(&file_lo.name, hasher); + std_hash::Hash::hash(&line_lo, hasher); + std_hash::Hash::hash(&col_lo, hasher); + std_hash::Hash::hash(&len, hasher); + if span.ctxt == SyntaxContext::empty() { - 0u8.hash_stable(hcx, hasher); + TAG_NO_EXPANSION.hash_stable(hcx, hasher); } else { - 1u8.hash_stable(hcx, hasher); + TAG_EXPANSION.hash_stable(hcx, hasher); span.ctxt.outer().expn_info().hash_stable(hcx, hasher); } } diff --git a/src/librustc/ty/maps/on_disk_cache.rs b/src/librustc/ty/maps/on_disk_cache.rs index 6ca787ad19e..40103571afa 100644 --- a/src/librustc/ty/maps/on_disk_cache.rs +++ b/src/librustc/ty/maps/on_disk_cache.rs @@ -14,6 +14,7 @@ use hir::def_id::{CrateNum, DefIndex, DefId, LocalDefId, RESERVED_FOR_INCR_COMP_CACHE, LOCAL_CRATE}; use hir::map::definitions::DefPathHash; +use ich::CachingCodemapView; use middle::cstore::CrateStore; use mir; use rustc_data_structures::fx::FxHashMap; @@ -23,11 +24,11 @@ UseSpecializedDecodable, UseSpecializedEncodable}; use session::{CrateDisambiguator, Session}; use std::cell::RefCell; -use std::collections::BTreeMap; use std::mem; +use std::rc::Rc; use syntax::ast::NodeId; use syntax::codemap::{CodeMap, StableFilemapId}; -use syntax_pos::{BytePos, Span, DUMMY_SP}; +use syntax_pos::{BytePos, Span, DUMMY_SP, FileMap}; use syntax_pos::hygiene::{Mark, SyntaxContext, ExpnInfo}; use ty; use ty::codec::{self as ty_codec, TyDecoder, TyEncoder}; @@ -45,6 +46,9 @@ const TAG_EXPANSION_INFO_SHORTHAND: u8 = 1; const TAG_EXPANSION_INFO_INLINE: u8 = 2; +const TAG_VALID_SPAN: u8 = 0; +const TAG_INVALID_SPAN: u8 = 1; + /// `OnDiskCache` provides an interface to incr. comp. data cached from the /// previous compilation session. This data will eventually include the results /// of a few selected queries (like `typeck_tables_of` and `mir_optimized`) and @@ -64,8 +68,11 @@ pub struct OnDiskCache<'sess> { prev_cnums: Vec<(u32, String, CrateDisambiguator)>, cnum_map: RefCell>>>, - prev_filemap_starts: BTreeMap, codemap: &'sess CodeMap, + file_index_to_stable_id: FxHashMap, + + // These two fields caches that are populated lazily during decoding. + file_index_to_file: RefCell>>, synthetic_expansion_infos: RefCell>, // A map from dep-node to the position of the cached query result in @@ -76,13 +83,16 @@ pub struct OnDiskCache<'sess> { // This type is used only for (de-)serialization. #[derive(RustcEncodable, RustcDecodable)] struct Header { - prev_filemap_starts: BTreeMap, + file_index_to_stable_id: FxHashMap, prev_cnums: Vec<(u32, String, CrateDisambiguator)>, } type EncodedPrevDiagnostics = Vec<(SerializedDepNodeIndex, Vec)>; type EncodedQueryResultIndex = Vec<(SerializedDepNodeIndex, usize)>; +#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug, RustcEncodable, RustcDecodable)] +struct FileMapIndex(u32); + impl<'sess> OnDiskCache<'sess> { /// Create a new OnDiskCache instance from the serialized data in `data`. pub fn new(sess: &'sess Session, data: Vec, start_pos: usize) -> OnDiskCache<'sess> { @@ -97,15 +107,17 @@ pub fn new(sess: &'sess Session, data: Vec, start_pos: usize) -> OnDiskCache }; let mut synthetic_expansion_infos = FxHashMap(); + let mut file_index_to_file = FxHashMap(); let (prev_diagnostics, query_result_index) = { let mut decoder = CacheDecoder { tcx: None, opaque: opaque::Decoder::new(&data[..], post_header_pos), codemap: sess.codemap(), - prev_filemap_starts: &header.prev_filemap_starts, cnum_map: &IndexVec::new(), synthetic_expansion_infos: &mut synthetic_expansion_infos, + file_index_to_file: &mut file_index_to_file, + file_index_to_stable_id: &header.file_index_to_stable_id, }; // Decode Diagnostics @@ -138,7 +150,8 @@ pub fn new(sess: &'sess Session, data: Vec, start_pos: usize) -> OnDiskCache OnDiskCache { serialized_data: data, prev_diagnostics, - prev_filemap_starts: header.prev_filemap_starts, + file_index_to_stable_id: header.file_index_to_stable_id, + file_index_to_file: RefCell::new(file_index_to_file), prev_cnums: header.prev_cnums, cnum_map: RefCell::new(None), codemap: sess.codemap(), @@ -152,7 +165,8 @@ pub fn new_empty(codemap: &'sess CodeMap) -> OnDiskCache<'sess> { OnDiskCache { serialized_data: Vec::new(), prev_diagnostics: FxHashMap(), - prev_filemap_starts: BTreeMap::new(), + file_index_to_stable_id: FxHashMap(), + file_index_to_file: RefCell::new(FxHashMap()), prev_cnums: vec![], cnum_map: RefCell::new(None), codemap, @@ -172,23 +186,32 @@ pub fn serialize<'a, 'tcx, E>(&self, // Serializing the DepGraph should not modify it: let _in_ignore = tcx.dep_graph.in_ignore(); + // Allocate FileMapIndices + let (file_to_file_index, file_index_to_stable_id) = { + let mut file_to_file_index = FxHashMap(); + let mut file_index_to_stable_id = FxHashMap(); + + for (index, file) in tcx.sess.codemap().files().iter().enumerate() { + let index = FileMapIndex(index as u32); + let file_ptr: *const FileMap = &**file as *const _; + file_to_file_index.insert(file_ptr, index); + file_index_to_stable_id.insert(index, StableFilemapId::new(&file)); + } + + (file_to_file_index, file_index_to_stable_id) + }; + let mut encoder = CacheEncoder { tcx, encoder, type_shorthands: FxHashMap(), predicate_shorthands: FxHashMap(), expn_info_shorthands: FxHashMap(), + codemap: CachingCodemapView::new(tcx.sess.codemap()), + file_to_file_index, }; - // Encode the file header - let prev_filemap_starts: BTreeMap<_, _> = self - .codemap - .files() - .iter() - .map(|fm| (fm.start_pos, StableFilemapId::new(fm))) - .collect(); - let sorted_cnums = sorted_cnums_including_local_crate(cstore); let prev_cnums: Vec<_> = sorted_cnums.iter().map(|&cnum| { @@ -198,7 +221,7 @@ pub fn serialize<'a, 'tcx, E>(&self, }).collect(); Header { - prev_filemap_starts, + file_index_to_stable_id, prev_cnums, }.encode(&mut encoder)?; @@ -282,14 +305,16 @@ pub fn load_query_result<'a, 'tcx, T>(&self, } let mut synthetic_expansion_infos = self.synthetic_expansion_infos.borrow_mut(); + let mut file_index_to_file = self.file_index_to_file.borrow_mut(); let mut decoder = CacheDecoder { tcx: Some(tcx), opaque: opaque::Decoder::new(&self.serialized_data[..], pos), codemap: self.codemap, - prev_filemap_starts: &self.prev_filemap_starts, cnum_map: cnum_map.as_ref().unwrap(), - synthetic_expansion_infos: &mut *synthetic_expansion_infos, + file_index_to_file: &mut file_index_to_file, + file_index_to_stable_id: &self.file_index_to_stable_id, + synthetic_expansion_infos: &mut synthetic_expansion_infos, }; match decode_tagged(&mut decoder, dep_node_index) { @@ -363,20 +388,26 @@ struct CacheDecoder<'a, 'tcx: 'a, 'x> { tcx: Option>, opaque: opaque::Decoder<'x>, codemap: &'x CodeMap, - prev_filemap_starts: &'x BTreeMap, cnum_map: &'x IndexVec>, synthetic_expansion_infos: &'x mut FxHashMap, + file_index_to_file: &'x mut FxHashMap>, + file_index_to_stable_id: &'x FxHashMap, } impl<'a, 'tcx, 'x> CacheDecoder<'a, 'tcx, 'x> { - fn find_filemap_prev_bytepos(&self, - prev_bytepos: BytePos) - -> Option<(BytePos, StableFilemapId)> { - for (start, id) in self.prev_filemap_starts.range(BytePos(0) ..= prev_bytepos).rev() { - return Some((*start, *id)) - } + fn file_index_to_file(&mut self, index: FileMapIndex) -> Rc { + let CacheDecoder { + ref mut file_index_to_file, + ref file_index_to_stable_id, + ref codemap, + .. + } = *self; - None + file_index_to_file.entry(index).or_insert_with(|| { + let stable_id = file_index_to_stable_id[&index]; + codemap.filemap_by_stable_id(stable_id) + .expect("Failed to lookup FileMap in new context.") + }).clone() } } @@ -466,50 +497,55 @@ fn map_encoded_cnum_to_current(&self, cnum: CrateNum) -> CrateNum { impl<'a, 'tcx, 'x> SpecializedDecoder for CacheDecoder<'a, 'tcx, 'x> { fn specialized_decode(&mut self) -> Result { - let lo = BytePos::decode(self)?; - let hi = BytePos::decode(self)?; + let tag: u8 = Decodable::decode(self)?; - if let Some((prev_filemap_start, filemap_id)) = self.find_filemap_prev_bytepos(lo) { - if let Some(current_filemap) = self.codemap.filemap_by_stable_id(filemap_id) { - let lo = (lo + current_filemap.start_pos) - prev_filemap_start; - let hi = (hi + current_filemap.start_pos) - prev_filemap_start; - - let expn_info_tag = u8::decode(self)?; - - let ctxt = match expn_info_tag { - TAG_NO_EXPANSION_INFO => { - SyntaxContext::empty() - } - TAG_EXPANSION_INFO_INLINE => { - let pos = self.position(); - let expn_info: ExpnInfo = Decodable::decode(self)?; - let ctxt = SyntaxContext::allocate_directly(expn_info); - self.synthetic_expansion_infos.insert(pos, ctxt); - ctxt - } - TAG_EXPANSION_INFO_SHORTHAND => { - let pos = usize::decode(self)?; - if let Some(ctxt) = self.synthetic_expansion_infos.get(&pos).cloned() { - ctxt - } else { - let expn_info = self.with_position(pos, |this| { - ExpnInfo::decode(this) - })?; - let ctxt = SyntaxContext::allocate_directly(expn_info); - self.synthetic_expansion_infos.insert(pos, ctxt); - ctxt - } - } - _ => { - unreachable!() - } - }; - - return Ok(Span::new(lo, hi, ctxt)); - } + if tag == TAG_INVALID_SPAN { + return Ok(DUMMY_SP); + } else { + debug_assert_eq!(tag, TAG_VALID_SPAN); } - Ok(DUMMY_SP) + let file_lo_index = FileMapIndex::decode(self)?; + let line_lo = usize::decode(self)?; + let col_lo = BytePos::decode(self)?; + let len = BytePos::decode(self)?; + + let file_lo = self.file_index_to_file(file_lo_index); + let lo = file_lo.lines.borrow()[line_lo - 1] + col_lo; + let hi = lo + len; + + let expn_info_tag = u8::decode(self)?; + + let ctxt = match expn_info_tag { + TAG_NO_EXPANSION_INFO => { + SyntaxContext::empty() + } + TAG_EXPANSION_INFO_INLINE => { + let pos = self.position(); + let expn_info: ExpnInfo = Decodable::decode(self)?; + let ctxt = SyntaxContext::allocate_directly(expn_info); + self.synthetic_expansion_infos.insert(pos, ctxt); + ctxt + } + TAG_EXPANSION_INFO_SHORTHAND => { + let pos = usize::decode(self)?; + if let Some(ctxt) = self.synthetic_expansion_infos.get(&pos).cloned() { + ctxt + } else { + let expn_info = self.with_position(pos, |this| { + ExpnInfo::decode(this) + })?; + let ctxt = SyntaxContext::allocate_directly(expn_info); + self.synthetic_expansion_infos.insert(pos, ctxt); + ctxt + } + } + _ => { + unreachable!() + } + }; + + Ok(Span::new(lo, hi, ctxt)) } } @@ -609,11 +645,17 @@ struct CacheEncoder<'enc, 'a, 'tcx, E> type_shorthands: FxHashMap, usize>, predicate_shorthands: FxHashMap, usize>, expn_info_shorthands: FxHashMap, + codemap: CachingCodemapView<'tcx>, + file_to_file_index: FxHashMap<*const FileMap, FileMapIndex>, } impl<'enc, 'a, 'tcx, E> CacheEncoder<'enc, 'a, 'tcx, E> where E: 'enc + ty_codec::TyEncoder { + fn filemap_index(&mut self, filemap: Rc) -> FileMapIndex { + self.file_to_file_index[&(&*filemap as *const FileMap)] + } + /// Encode something with additional information that allows to do some /// sanity checks when decoding the data again. This method will first /// encode the specified tag, then the given value, then the number of @@ -639,10 +681,38 @@ impl<'enc, 'a, 'tcx, E> SpecializedEncoder for CacheEncoder<'enc, 'a, 'tcx where E: 'enc + ty_codec::TyEncoder { fn specialized_encode(&mut self, span: &Span) -> Result<(), Self::Error> { + + if *span == DUMMY_SP { + return TAG_INVALID_SPAN.encode(self); + } + let span_data = span.data(); - span_data.lo.encode(self)?; - span_data.hi.encode(self)?; + if span_data.hi < span_data.lo { + return TAG_INVALID_SPAN.encode(self); + } + + let (file_lo, line_lo, col_lo) = match self.codemap + .byte_pos_to_line_and_col(span_data.lo) { + Some(pos) => pos, + None => { + return TAG_INVALID_SPAN.encode(self); + } + }; + + if !file_lo.contains(span_data.hi) { + return TAG_INVALID_SPAN.encode(self); + } + + let len = span_data.hi - span_data.lo; + + let filemap_index = self.filemap_index(file_lo); + + TAG_VALID_SPAN.encode(self)?; + filemap_index.encode(self)?; + line_lo.encode(self)?; + col_lo.encode(self)?; + len.encode(self)?; if span_data.ctxt == SyntaxContext::empty() { TAG_NO_EXPANSION_INFO.encode(self) diff --git a/src/libsyntax/codemap.rs b/src/libsyntax/codemap.rs index 3464db2a811..3aac5334a38 100644 --- a/src/libsyntax/codemap.rs +++ b/src/libsyntax/codemap.rs @@ -105,7 +105,7 @@ fn read_file(&self, path: &Path) -> io::Result { // This is a FileMap identifier that is used to correlate FileMaps between // subsequent compilation sessions (which is something we need to do during // incremental compilation). -#[derive(Copy, Clone, PartialEq, Eq, Hash, RustcEncodable, RustcDecodable)] +#[derive(Copy, Clone, PartialEq, Eq, Hash, RustcEncodable, RustcDecodable, Debug)] pub struct StableFilemapId(u128); impl StableFilemapId { diff --git a/src/libsyntax_pos/lib.rs b/src/libsyntax_pos/lib.rs index 47755dc1d54..bf059cac891 100644 --- a/src/libsyntax_pos/lib.rs +++ b/src/libsyntax_pos/lib.rs @@ -931,6 +931,11 @@ pub fn line_bounds(&self, line_index: usize) -> (BytePos, BytePos) { (lines[line_index], lines[line_index + 1]) } } + + #[inline] + pub fn contains(&self, byte_pos: BytePos) -> bool { + byte_pos >= self.start_pos && byte_pos <= self.end_pos + } } /// Remove utf-8 BOM if any.