From 66dc08ad604cdb75cbc2a89d3551c51fbc6cc20e Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Tue, 13 Aug 2019 19:51:32 +0300 Subject: [PATCH 1/2] Make sure that all file loading happens via SourceMap That way, callers don't need to repeat "let's add this to sm manually for tracking dependencies" trick. It should make it easier to switch to using `FileLoader` for binary files in the future as well --- src/libsyntax/ext/expand.rs | 13 +++++------ src/libsyntax/source_map.rs | 20 +++++++++++++++++ src/libsyntax_ext/source_util.rs | 37 ++++++++++---------------------- 3 files changed, 36 insertions(+), 34 deletions(-) diff --git a/src/libsyntax/ext/expand.rs b/src/libsyntax/ext/expand.rs index 402b42dfbc8..e9789764383 100644 --- a/src/libsyntax/ext/expand.rs +++ b/src/libsyntax/ext/expand.rs @@ -25,7 +25,6 @@ use syntax_pos::{Span, DUMMY_SP, FileName}; use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::sync::Lrc; -use std::fs; use std::io::ErrorKind; use std::{iter, mem}; use std::ops::DerefMut; @@ -1239,13 +1238,11 @@ impl<'a, 'b> MutVisitor for InvocationCollector<'a, 'b> { } let filename = self.cx.resolve_path(&*file.as_str(), it.span()); - match fs::read_to_string(&filename) { - Ok(src) => { - let src_interned = Symbol::intern(&src); - - // Add this input file to the code map to make it available as - // dependency information - self.cx.source_map().new_source_file(filename.into(), src); + match self.cx.source_map().load_file(&filename) { + Ok(source_file) => { + let src = source_file.src.as_ref() + .expect("freshly loaded file should have a source"); + let src_interned = Symbol::intern(src.as_str()); let include_info = vec![ ast::NestedMetaItem::MetaItem( diff --git a/src/libsyntax/source_map.rs b/src/libsyntax/source_map.rs index 74cab00d3c1..ceaa5ee3aa5 100644 --- a/src/libsyntax/source_map.rs +++ b/src/libsyntax/source_map.rs @@ -170,6 +170,26 @@ impl SourceMap { Ok(self.new_source_file(filename, src)) } + /// Loads source file as a binary blob. + /// + /// Unlike `load_file`, guarantees that no normalization like BOM-removal + /// takes place. + pub fn load_binary_file(&self, path: &Path) -> io::Result> { + // Ideally, this should use `self.file_loader`, but it can't + // deal with binary files yet. + let bytes = fs::read(path)?; + + // We need to add file to the `SourceMap`, so that it is present + // in dep-info. There's also an edge case that file might be both + // loaded as a binary via `include_bytes!` and as proper `SourceFile` + // via `mod`, so we try to use real file contents and not just an + // empty string. + let text = std::str::from_utf8(&bytes).unwrap_or("") + .to_string(); + self.new_source_file(path.to_owned().into(), text); + Ok(bytes) + } + pub fn files(&self) -> MappedLockGuard<'_, Vec>> { LockGuard::map(self.files.borrow(), |files| &mut files.source_files) } diff --git a/src/libsyntax_ext/source_util.rs b/src/libsyntax_ext/source_util.rs index cbc01b48afd..e008ed710e4 100644 --- a/src/libsyntax_ext/source_util.rs +++ b/src/libsyntax_ext/source_util.rs @@ -9,8 +9,6 @@ use syntax::tokenstream; use smallvec::SmallVec; use syntax_pos::{self, Pos, Span}; -use std::fs; -use std::io::ErrorKind; use rustc_data_structures::sync::Lrc; // These macros all relate to the file system; they either return @@ -114,20 +112,17 @@ pub fn expand_include_str(cx: &mut ExtCtxt<'_>, sp: Span, tts: &[tokenstream::To None => return DummyResult::any(sp) }; let file = cx.resolve_path(file, sp); - match fs::read_to_string(&file) { - Ok(src) => { - let interned_src = Symbol::intern(&src); - - // Add this input file to the code map to make it available as - // dependency information - cx.source_map().new_source_file(file.into(), src); - - base::MacEager::expr(cx.expr_str(sp, interned_src)) + match cx.source_map().load_binary_file(&file) { + Ok(bytes) => match std::str::from_utf8(&bytes) { + Ok(src) => { + let interned_src = Symbol::intern(&src); + base::MacEager::expr(cx.expr_str(sp, interned_src)) + } + Err(_) => { + cx.span_err(sp, &format!("{} wasn't a utf-8 file", file.display())); + DummyResult::any(sp) + } }, - Err(ref e) if e.kind() == ErrorKind::InvalidData => { - cx.span_err(sp, &format!("{} wasn't a utf-8 file", file.display())); - DummyResult::any(sp) - } Err(e) => { cx.span_err(sp, &format!("couldn't read {}: {}", file.display(), e)); DummyResult::any(sp) @@ -142,18 +137,8 @@ pub fn expand_include_bytes(cx: &mut ExtCtxt<'_>, sp: Span, tts: &[tokenstream:: None => return DummyResult::any(sp) }; let file = cx.resolve_path(file, sp); - match fs::read(&file) { + match cx.source_map().load_binary_file(&file) { Ok(bytes) => { - // Add the contents to the source map if it contains UTF-8. - let (contents, bytes) = match String::from_utf8(bytes) { - Ok(s) => { - let bytes = s.as_bytes().to_owned(); - (s, bytes) - }, - Err(e) => (String::new(), e.into_bytes()), - }; - cx.source_map().new_source_file(file.into(), contents); - base::MacEager::expr(cx.expr_lit(sp, ast::LitKind::ByteStr(Lrc::new(bytes)))) }, Err(e) => { From 14bc998df9f15042342ac8e649a4adadf17a65f8 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 15 Aug 2019 10:32:52 +0300 Subject: [PATCH 2/2] Add regression test for include_str! normalization --- src/test/ui/.gitattributes | 1 + src/test/ui/include-macros/data.bin | 2 ++ src/test/ui/include-macros/normalization.rs | 12 ++++++++++++ 3 files changed, 15 insertions(+) create mode 100644 src/test/ui/include-macros/data.bin create mode 100644 src/test/ui/include-macros/normalization.rs diff --git a/src/test/ui/.gitattributes b/src/test/ui/.gitattributes index b62ade73aa9..489dc8ad111 100644 --- a/src/test/ui/.gitattributes +++ b/src/test/ui/.gitattributes @@ -1,2 +1,3 @@ lexer-crlf-line-endings-string-literal-doc-comment.rs -text trailing-carriage-return-in-string.rs -text +*.bin -text diff --git a/src/test/ui/include-macros/data.bin b/src/test/ui/include-macros/data.bin new file mode 100644 index 00000000000..ce4e0b8311a --- /dev/null +++ b/src/test/ui/include-macros/data.bin @@ -0,0 +1,2 @@ +This file starts with BOM. +Lines are separated by \r\n. diff --git a/src/test/ui/include-macros/normalization.rs b/src/test/ui/include-macros/normalization.rs new file mode 100644 index 00000000000..889f08e606e --- /dev/null +++ b/src/test/ui/include-macros/normalization.rs @@ -0,0 +1,12 @@ +// run-pass + +fn main() { + assert_eq!( + &include_bytes!("data.bin")[..], + &b"\xEF\xBB\xBFThis file starts with BOM.\r\nLines are separated by \\r\\n.\r\n"[..], + ); + assert_eq!( + include_str!("data.bin"), + "\u{FEFF}This file starts with BOM.\r\nLines are separated by \\r\\n.\r\n", + ); +}