From 95150d72465db491f4b04b73545e106462bd003b Mon Sep 17 00:00:00 2001 From: Ben Kimock Date: Fri, 3 May 2024 21:17:57 -0400 Subject: [PATCH] Add a footer in FileEncoder and check for it in MemDecoder --- compiler/rustc_codegen_ssa/src/lib.rs | 5 ++- compiler/rustc_driver_impl/messages.ftl | 2 ++ compiler/rustc_driver_impl/src/lib.rs | 8 +++-- .../src/session_diagnostics.rs | 6 ++++ compiler/rustc_incremental/messages.ftl | 2 ++ compiler/rustc_incremental/src/errors.rs | 6 ++++ .../rustc_incremental/src/persist/load.rs | 21 ++++++++--- compiler/rustc_metadata/src/locator.rs | 7 +++- compiler/rustc_metadata/src/rmeta/decoder.rs | 28 +++++++++++---- .../src/rmeta/def_path_hash_map.rs | 2 +- .../rustc_middle/src/query/on_disk_cache.rs | 35 +++++++++---------- .../src/dep_graph/serialized.rs | 10 ++---- compiler/rustc_serialize/src/opaque.rs | 16 ++++++--- compiler/rustc_serialize/tests/leb128.rs | 13 ++++--- compiler/rustc_serialize/tests/opaque.rs | 2 +- src/librustdoc/scrape_examples.rs | 4 ++- 16 files changed, 115 insertions(+), 52 deletions(-) diff --git a/compiler/rustc_codegen_ssa/src/lib.rs b/compiler/rustc_codegen_ssa/src/lib.rs index 4eb24d71009..9bf8fde55fc 100644 --- a/compiler/rustc_codegen_ssa/src/lib.rs +++ b/compiler/rustc_codegen_ssa/src/lib.rs @@ -195,6 +195,7 @@ pub enum CodegenErrors { EmptyVersionNumber, EncodingVersionMismatch { version_array: String, rlink_version: u32 }, RustcVersionMismatch { rustc_version: String }, + CorruptFile, } pub fn provide(providers: &mut Providers) { @@ -265,7 +266,9 @@ pub fn deserialize_rlink( }); } - let mut decoder = MemDecoder::new(&data[4..], 0); + let Some(mut decoder) = MemDecoder::new(&data[4..], 0) else { + return Err(CodegenErrors::CorruptFile); + }; let rustc_version = decoder.read_str(); if rustc_version != sess.cfg_version { return Err(CodegenErrors::RustcVersionMismatch { diff --git a/compiler/rustc_driver_impl/messages.ftl b/compiler/rustc_driver_impl/messages.ftl index 5b39248302e..31837e01764 100644 --- a/compiler/rustc_driver_impl/messages.ftl +++ b/compiler/rustc_driver_impl/messages.ftl @@ -10,6 +10,8 @@ driver_impl_ice_path_error = the ICE couldn't be written to `{$path}`: {$error} driver_impl_ice_path_error_env = the environment variable `RUSTC_ICE` is set to `{$env_var}` driver_impl_ice_version = rustc {$version} running on {$triple} +driver_impl_rlink_corrupt_file = corrupt metadata encountered in `{$file}` + driver_impl_rlink_empty_version_number = The input does not contain version number driver_impl_rlink_encoding_version_mismatch = .rlink file was produced with encoding version `{$version_array}`, but the current version is `{$rlink_version}` diff --git a/compiler/rustc_driver_impl/src/lib.rs b/compiler/rustc_driver_impl/src/lib.rs index ba6b9ef0784..30cc41ecea0 100644 --- a/compiler/rustc_driver_impl/src/lib.rs +++ b/compiler/rustc_driver_impl/src/lib.rs @@ -96,7 +96,7 @@ pub(super) fn install() {} use crate::session_diagnostics::{ RLinkEmptyVersionNumber, RLinkEncodingVersionMismatch, RLinkRustcVersionMismatch, - RLinkWrongFileType, RlinkNotAFile, RlinkUnableToRead, + RLinkWrongFileType, RlinkCorruptFile, RlinkNotAFile, RlinkUnableToRead, }; rustc_fluent_macro::fluent_messages! { "../messages.ftl" } @@ -645,8 +645,7 @@ fn process_rlink(sess: &Session, compiler: &interface::Compiler) { match err { CodegenErrors::WrongFileType => dcx.emit_fatal(RLinkWrongFileType), CodegenErrors::EmptyVersionNumber => dcx.emit_fatal(RLinkEmptyVersionNumber), - CodegenErrors::EncodingVersionMismatch { version_array, rlink_version } => sess - .dcx() + CodegenErrors::EncodingVersionMismatch { version_array, rlink_version } => dcx .emit_fatal(RLinkEncodingVersionMismatch { version_array, rlink_version }), CodegenErrors::RustcVersionMismatch { rustc_version } => { dcx.emit_fatal(RLinkRustcVersionMismatch { @@ -654,6 +653,9 @@ fn process_rlink(sess: &Session, compiler: &interface::Compiler) { current_version: sess.cfg_version, }) } + CodegenErrors::CorruptFile => { + dcx.emit_fatal(RlinkCorruptFile { file: file.display().to_string() }); + } }; } }; diff --git a/compiler/rustc_driver_impl/src/session_diagnostics.rs b/compiler/rustc_driver_impl/src/session_diagnostics.rs index 1a9683e840a..864b122233a 100644 --- a/compiler/rustc_driver_impl/src/session_diagnostics.rs +++ b/compiler/rustc_driver_impl/src/session_diagnostics.rs @@ -32,6 +32,12 @@ pub(crate) struct RLinkRustcVersionMismatch<'a> { #[diag(driver_impl_rlink_no_a_file)] pub(crate) struct RlinkNotAFile; +#[derive(Diagnostic)] +#[diag(driver_impl_rlink_corrupt_file)] +pub(crate) struct RlinkCorruptFile { + pub file: String, +} + #[derive(Diagnostic)] #[diag(driver_impl_ice)] pub(crate) struct Ice; diff --git a/compiler/rustc_incremental/messages.ftl b/compiler/rustc_incremental/messages.ftl index e74173b24a9..de2177ebb6e 100644 --- a/compiler/rustc_incremental/messages.ftl +++ b/compiler/rustc_incremental/messages.ftl @@ -21,6 +21,8 @@ incremental_cargo_help_2 = incremental_copy_workproduct_to_cache = error copying object file `{$from}` to incremental directory as `{$to}`: {$err} +incremental_corrupt_file = corrupt incremental compilation artifact found at `{$path}`. This file will automatically be ignored and deleted. If you see this message repeatedly or can provoke it without manually manipulating the compiler's artifacts, please file an issue. The incremental compilation system relies on hardlinks and filesystem locks behaving correctly, and may not deal well with OS crashes, so whatever information you can provide about your filesystem or other state may be very relevant. + incremental_create_dep_graph = failed to create dependency graph at `{$path}`: {$err} incremental_create_incr_comp_dir = diff --git a/compiler/rustc_incremental/src/errors.rs b/compiler/rustc_incremental/src/errors.rs index 61bb0353a9f..e94a7fb876b 100644 --- a/compiler/rustc_incremental/src/errors.rs +++ b/compiler/rustc_incremental/src/errors.rs @@ -306,3 +306,9 @@ pub struct DeleteWorkProduct<'a> { pub path: &'a Path, pub err: std::io::Error, } + +#[derive(Diagnostic)] +#[diag(incremental_corrupt_file)] +pub struct CorruptFile<'a> { + pub path: &'a Path, +} diff --git a/compiler/rustc_incremental/src/persist/load.rs b/compiler/rustc_incremental/src/persist/load.rs index 26aaa24771f..c96d22ce45d 100644 --- a/compiler/rustc_incremental/src/persist/load.rs +++ b/compiler/rustc_incremental/src/persist/load.rs @@ -115,7 +115,12 @@ fn load_dep_graph(sess: &Session) -> LoadResult<(Arc, WorkPr if let LoadResult::Ok { data: (work_products_data, start_pos) } = load_result { // Decode the list of work_products - let mut work_product_decoder = MemDecoder::new(&work_products_data[..], start_pos); + let Some(mut work_product_decoder) = + MemDecoder::new(&work_products_data[..], start_pos) + else { + sess.dcx().emit_warn(errors::CorruptFile { path: &work_products_path }); + return LoadResult::DataOutOfDate; + }; let work_products: Vec = Decodable::decode(&mut work_product_decoder); @@ -145,7 +150,10 @@ fn load_dep_graph(sess: &Session) -> LoadResult<(Arc, WorkPr LoadResult::DataOutOfDate => LoadResult::DataOutOfDate, LoadResult::LoadDepGraph(path, err) => LoadResult::LoadDepGraph(path, err), LoadResult::Ok { data: (bytes, start_pos) } => { - let mut decoder = MemDecoder::new(&bytes, start_pos); + let Some(mut decoder) = MemDecoder::new(&bytes, start_pos) else { + sess.dcx().emit_warn(errors::CorruptFile { path: &path }); + return LoadResult::DataOutOfDate; + }; let prev_commandline_args_hash = u64::decode(&mut decoder); if prev_commandline_args_hash != expected_hash { @@ -181,9 +189,14 @@ pub fn load_query_result_cache(sess: &Session) -> Option> { let _prof_timer = sess.prof.generic_activity("incr_comp_load_query_result_cache"); - match load_data(&query_cache_path(sess), sess) { + let path = query_cache_path(sess); + match load_data(&path, sess) { LoadResult::Ok { data: (bytes, start_pos) } => { - Some(OnDiskCache::new(sess, bytes, start_pos)) + let cache = OnDiskCache::new(sess, bytes, start_pos).unwrap_or_else(|| { + sess.dcx().emit_warn(errors::CorruptFile { path: &path }); + OnDiskCache::new_empty(sess.source_map()) + }); + Some(cache) } _ => Some(OnDiskCache::new_empty(sess.source_map())), } diff --git a/compiler/rustc_metadata/src/locator.rs b/compiler/rustc_metadata/src/locator.rs index 7de03be6da6..fa33eccd2a1 100644 --- a/compiler/rustc_metadata/src/locator.rs +++ b/compiler/rustc_metadata/src/locator.rs @@ -853,7 +853,12 @@ fn get_metadata_section<'p>( slice_owned(mmap, Deref::deref) } }; - let blob = MetadataBlob(raw_bytes); + let Some(blob) = MetadataBlob::new(raw_bytes) else { + return Err(MetadataError::LoadFailure(format!( + "corrupt metadata encountered in {}", + filename.display() + ))); + }; match blob.check_compatibility(cfg_version) { Ok(()) => Ok(blob), Err(None) => Err(MetadataError::LoadFailure(format!( diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs index bb68c6eaf09..1058f58da13 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -40,10 +40,9 @@ mod cstore_impl; /// A reference to the raw binary version of crate metadata. -/// A `MetadataBlob` internally is just a reference counted pointer to -/// the actual data, so cloning it is cheap. -#[derive(Clone)] -pub(crate) struct MetadataBlob(pub(crate) OwnedSlice); +/// This struct applies [`MemDecoder`]'s validation when constructed +/// so that later constructions are guaranteed to succeed. +pub(crate) struct MetadataBlob(OwnedSlice); impl std::ops::Deref for MetadataBlob { type Target = [u8]; @@ -54,6 +53,16 @@ fn deref(&self) -> &[u8] { } } +impl MetadataBlob { + pub fn new(slice: OwnedSlice) -> Option { + if MemDecoder::new(&*slice, 0).is_some() { Some(Self(slice)) } else { None } + } + + pub fn bytes(&self) -> &OwnedSlice { + &self.0 + } +} + /// A map from external crate numbers (as decoded from some crate file) to /// local crate numbers (as generated during this session). Each external /// crate may refer to types in other external crates, and each has their @@ -165,7 +174,14 @@ fn tcx(self) -> Option> { fn decoder(self, pos: usize) -> DecodeContext<'a, 'tcx> { let tcx = self.tcx(); DecodeContext { - opaque: MemDecoder::new(self.blob(), pos), + // FIXME: This unwrap should never panic because we check that it won't when creating + // `MetadataBlob`. Ideally we'd just have a `MetadataDecoder` and hand out subslices of + // it as we do elsewhere in the compiler using `MetadataDecoder::split_at`. But we own + // the data for the decoder so holding onto the `MemDecoder` too would make us a + // self-referential struct which is downright goofy because `MetadataBlob` is already + // self-referential. Probably `MemDecoder` should contain an `OwnedSlice`, but that + // demands a significant refactoring due to our crate graph. + opaque: MemDecoder::new(self.blob(), pos).unwrap(), cdata: self.cdata(), blob: self.blob(), sess: self.sess().or(tcx.map(|tcx| tcx.sess)), @@ -393,7 +409,7 @@ fn with_position(&mut self, pos: usize, f: F) -> R where F: FnOnce(&mut Self) -> R, { - let new_opaque = MemDecoder::new(self.opaque.data(), pos); + let new_opaque = self.opaque.split_at(pos); let old_opaque = mem::replace(&mut self.opaque, new_opaque); let old_state = mem::replace(&mut self.lazy_state, LazyState::NoNode); let r = f(self); diff --git a/compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs b/compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs index 9950bc1c31f..861bf6b2769 100644 --- a/compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs +++ b/compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs @@ -48,7 +48,7 @@ impl<'a, 'tcx> Decodable> for DefPathHashMapRef<'static> fn decode(d: &mut DecodeContext<'a, 'tcx>) -> DefPathHashMapRef<'static> { let len = d.read_usize(); let pos = d.position(); - let o = d.blob().clone().0.slice(|blob| &blob[pos..pos + len]); + let o = d.blob().bytes().clone().slice(|blob| &blob[pos..pos + len]); // Although we already have the data we need via the `OwnedSlice`, we still need // to advance the `DecodeContext`'s position so it's in a valid state after diff --git a/compiler/rustc_middle/src/query/on_disk_cache.rs b/compiler/rustc_middle/src/query/on_disk_cache.rs index 2dcb58729ff..6815e4263d5 100644 --- a/compiler/rustc_middle/src/query/on_disk_cache.rs +++ b/compiler/rustc_middle/src/query/on_disk_cache.rs @@ -154,24 +154,22 @@ fn new(tcx: TyCtxt<'_>, file: &SourceFile) -> EncodedSourceFileId { impl<'sess> OnDiskCache<'sess> { /// Creates a new `OnDiskCache` instance from the serialized data in `data`. - pub fn new(sess: &'sess Session, data: Mmap, start_pos: usize) -> Self { - debug_assert!(sess.opts.incremental.is_some()); + pub fn new(sess: &'sess Session, data: Mmap, start_pos: usize) -> Option { + assert!(sess.opts.incremental.is_some()); - // Wrap in a scope so we can borrow `data`. - let footer: Footer = { - let mut decoder = MemDecoder::new(&data, start_pos); + let mut decoder = MemDecoder::new(&data, start_pos)?; - // Decode the *position* of the footer, which can be found in the - // last 8 bytes of the file. - let footer_pos = decoder - .with_position(decoder.len() - IntEncodedWithFixedSize::ENCODED_SIZE, |decoder| { - IntEncodedWithFixedSize::decode(decoder).0 as usize - }); - // Decode the file footer, which contains all the lookup tables, etc. - decoder.with_position(footer_pos, |decoder| decode_tagged(decoder, TAG_FILE_FOOTER)) - }; + // Decode the *position* of the footer, which can be found in the + // last 8 bytes of the file. + let footer_pos = decoder + .with_position(decoder.len() - IntEncodedWithFixedSize::ENCODED_SIZE, |decoder| { + IntEncodedWithFixedSize::decode(decoder).0 as usize + }); + // Decode the file footer, which contains all the lookup tables, etc. + let footer: Footer = + decoder.with_position(footer_pos, |decoder| decode_tagged(decoder, TAG_FILE_FOOTER)); - Self { + Some(Self { serialized_data: RwLock::new(Some(data)), file_index_to_stable_id: footer.file_index_to_stable_id, file_index_to_file: Default::default(), @@ -184,7 +182,7 @@ pub fn new(sess: &'sess Session, data: Mmap, start_pos: usize) -> Self { expn_data: footer.expn_data, foreign_expn_data: footer.foreign_expn_data, hygiene_context: Default::default(), - } + }) } pub fn new_empty(source_map: &'sess SourceMap) -> Self { @@ -437,7 +435,8 @@ fn with_decoder<'a, 'tcx, T, F: for<'s> FnOnce(&mut CacheDecoder<'s, 'tcx>) -> T let serialized_data = self.serialized_data.read(); let mut decoder = CacheDecoder { tcx, - opaque: MemDecoder::new(serialized_data.as_deref().unwrap_or(&[]), pos.to_usize()), + opaque: MemDecoder::new(serialized_data.as_deref().unwrap_or(&[]), pos.to_usize()) + .unwrap(), source_map: self.source_map, file_index_to_file: &self.file_index_to_file, file_index_to_stable_id: &self.file_index_to_stable_id, @@ -558,7 +557,7 @@ fn with_position(&mut self, pos: usize, f: F) -> R { debug_assert!(pos < self.opaque.len()); - let new_opaque = MemDecoder::new(self.opaque.data(), pos); + let new_opaque = self.opaque.split_at(pos); let old_opaque = mem::replace(&mut self.opaque, new_opaque); let r = f(self); self.opaque = old_opaque; diff --git a/compiler/rustc_query_system/src/dep_graph/serialized.rs b/compiler/rustc_query_system/src/dep_graph/serialized.rs index b426bb888f4..8e91d9dd60b 100644 --- a/compiler/rustc_query_system/src/dep_graph/serialized.rs +++ b/compiler/rustc_query_system/src/dep_graph/serialized.rs @@ -182,15 +182,13 @@ impl SerializedDepGraph { pub fn decode(d: &mut MemDecoder<'_>) -> Arc { // The last 16 bytes are the node count and edge count. debug!("position: {:?}", d.position()); - let (node_count, edge_count, graph_size) = - d.with_position(d.len() - 3 * IntEncodedWithFixedSize::ENCODED_SIZE, |d| { + let (node_count, edge_count) = + d.with_position(d.len() - 2 * IntEncodedWithFixedSize::ENCODED_SIZE, |d| { debug!("position: {:?}", d.position()); let node_count = IntEncodedWithFixedSize::decode(d).0 as usize; let edge_count = IntEncodedWithFixedSize::decode(d).0 as usize; - let graph_size = IntEncodedWithFixedSize::decode(d).0 as usize; - (node_count, edge_count, graph_size) + (node_count, edge_count) }); - assert_eq!(d.len(), graph_size); debug!("position: {:?}", d.position()); debug!(?node_count, ?edge_count); @@ -606,8 +604,6 @@ fn finish(self, profiler: &SelfProfilerRef) -> FileEncodeResult { debug!("position: {:?}", encoder.position()); IntEncodedWithFixedSize(node_count).encode(&mut encoder); IntEncodedWithFixedSize(edge_count).encode(&mut encoder); - let graph_size = encoder.position() + IntEncodedWithFixedSize::ENCODED_SIZE; - IntEncodedWithFixedSize(graph_size as u64).encode(&mut encoder); debug!("position: {:?}", encoder.position()); // Drop the encoder so that nothing is written after the counts. let result = encoder.finish(); diff --git a/compiler/rustc_serialize/src/opaque.rs b/compiler/rustc_serialize/src/opaque.rs index eec83c02d35..1b4b1d2436f 100644 --- a/compiler/rustc_serialize/src/opaque.rs +++ b/compiler/rustc_serialize/src/opaque.rs @@ -17,6 +17,8 @@ pub type FileEncodeResult = Result; +const FOOTER: &[u8] = b"rust-end-file"; + /// The size of the buffer in `FileEncoder`. const BUF_SIZE: usize = 8192; @@ -181,6 +183,7 @@ fn panic_invalid_write(written: usize) { } pub fn finish(&mut self) -> FileEncodeResult { + self.write_all(FOOTER); self.flush(); #[cfg(debug_assertions)] { @@ -261,15 +264,18 @@ pub struct MemDecoder<'a> { impl<'a> MemDecoder<'a> { #[inline] - pub fn new(data: &'a [u8], position: usize) -> MemDecoder<'a> { + pub fn new(data: &'a [u8], position: usize) -> Option> { + let data = data.strip_suffix(FOOTER)?; let Range { start, end } = data.as_ptr_range(); - MemDecoder { start, current: data[position..].as_ptr(), end, _marker: PhantomData } + Some(MemDecoder { start, current: data[position..].as_ptr(), end, _marker: PhantomData }) } #[inline] - pub fn data(&self) -> &'a [u8] { - // SAFETY: This recovers the original slice, only using members we never modify. - unsafe { std::slice::from_raw_parts(self.start, self.len()) } + pub fn split_at(&self, position: usize) -> MemDecoder<'a> { + assert!(position <= self.len()); + // SAFETY: We checked above that this offset is within the original slice + let current = unsafe { self.start.add(position) }; + MemDecoder { start: self.start, current, end: self.end, _marker: PhantomData } } #[inline] diff --git a/compiler/rustc_serialize/tests/leb128.rs b/compiler/rustc_serialize/tests/leb128.rs index dc9b32a968b..2bf4f38d3c3 100644 --- a/compiler/rustc_serialize/tests/leb128.rs +++ b/compiler/rustc_serialize/tests/leb128.rs @@ -1,4 +1,5 @@ use rustc_serialize::leb128::*; +use rustc_serialize::opaque::MemDecoder; use rustc_serialize::Decoder; macro_rules! impl_test_unsigned_leb128 { @@ -25,13 +26,15 @@ fn $test_name() { let n = $write_fn_name(&mut buf, x); stream.extend(&buf[..n]); } + let stream_end = stream.len(); + stream.extend(b"rust-end-file"); - let mut decoder = rustc_serialize::opaque::MemDecoder::new(&stream, 0); + let mut decoder = MemDecoder::new(&stream, 0).unwrap(); for &expected in &values { let actual = $read_fn_name(&mut decoder); assert_eq!(expected, actual); } - assert_eq!(stream.len(), decoder.position()); + assert_eq!(stream_end, decoder.position()); } }; } @@ -72,13 +75,15 @@ fn $test_name() { let n = $write_fn_name(&mut buf, x); stream.extend(&buf[..n]); } + let stream_end = stream.len(); + stream.extend(b"rust-end-file"); - let mut decoder = rustc_serialize::opaque::MemDecoder::new(&stream, 0); + let mut decoder = MemDecoder::new(&stream, 0).unwrap(); for &expected in &values { let actual = $read_fn_name(&mut decoder); assert_eq!(expected, actual); } - assert_eq!(stream.len(), decoder.position()); + assert_eq!(stream_end, decoder.position()); } }; } diff --git a/compiler/rustc_serialize/tests/opaque.rs b/compiler/rustc_serialize/tests/opaque.rs index 45ff85f38d2..833151d82be 100644 --- a/compiler/rustc_serialize/tests/opaque.rs +++ b/compiler/rustc_serialize/tests/opaque.rs @@ -42,7 +42,7 @@ fn check_round_trip< encoder.finish().unwrap(); let data = fs::read(&tmpfile).unwrap(); - let mut decoder = MemDecoder::new(&data[..], 0); + let mut decoder = MemDecoder::new(&data[..], 0).unwrap(); for value in values { let decoded = Decodable::decode(&mut decoder); assert_eq!(value, decoded); diff --git a/src/librustdoc/scrape_examples.rs b/src/librustdoc/scrape_examples.rs index 9c9b386edda..0bf98886965 100644 --- a/src/librustdoc/scrape_examples.rs +++ b/src/librustdoc/scrape_examples.rs @@ -344,7 +344,9 @@ pub(crate) fn load_call_locations( Ok(bytes) => bytes, Err(e) => dcx.fatal(format!("failed to load examples: {e}")), }; - let mut decoder = MemDecoder::new(&bytes, 0); + let Some(mut decoder) = MemDecoder::new(&bytes, 0) else { + dcx.fatal(format!("Corrupt metadata encountered in {path}")) + }; let calls = AllCallLocations::decode(&mut decoder); for (function, fn_calls) in calls.into_iter() {