Rollup merge of #63525 - matklad:centraliza-file-loading, r=petrochenkov

Make sure that all file loading happens via SourceMap

That way, callers don't need to repeat "let's add this to sm manually
for tracking dependencies" trick.

It should make it easier to switch to using `FileLoader` for binary
files in the future as well

cc #62948

r? @petrochenkov
This commit is contained in:
Mazdak Farrokhzad 2019-08-16 18:22:24 +02:00 committed by GitHub
commit c83d3c3281
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 51 additions and 34 deletions

View File

@ -25,7 +25,6 @@ use syntax_pos::{Span, DUMMY_SP, FileName};
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::sync::Lrc;
use std::fs;
use std::io::ErrorKind;
use std::{iter, mem};
use std::ops::DerefMut;
@ -1241,13 +1240,11 @@ impl<'a, 'b> MutVisitor for InvocationCollector<'a, 'b> {
}
let filename = self.cx.resolve_path(&*file.as_str(), it.span());
match fs::read_to_string(&filename) {
Ok(src) => {
let src_interned = Symbol::intern(&src);
// Add this input file to the code map to make it available as
// dependency information
self.cx.source_map().new_source_file(filename.into(), src);
match self.cx.source_map().load_file(&filename) {
Ok(source_file) => {
let src = source_file.src.as_ref()
.expect("freshly loaded file should have a source");
let src_interned = Symbol::intern(src.as_str());
let include_info = vec![
ast::NestedMetaItem::MetaItem(

View File

@ -171,6 +171,26 @@ impl SourceMap {
Ok(self.new_source_file(filename, src))
}
/// Loads source file as a binary blob.
///
/// Unlike `load_file`, guarantees that no normalization like BOM-removal
/// takes place.
pub fn load_binary_file(&self, path: &Path) -> io::Result<Vec<u8>> {
// Ideally, this should use `self.file_loader`, but it can't
// deal with binary files yet.
let bytes = fs::read(path)?;
// We need to add file to the `SourceMap`, so that it is present
// in dep-info. There's also an edge case that file might be both
// loaded as a binary via `include_bytes!` and as proper `SourceFile`
// via `mod`, so we try to use real file contents and not just an
// empty string.
let text = std::str::from_utf8(&bytes).unwrap_or("")
.to_string();
self.new_source_file(path.to_owned().into(), text);
Ok(bytes)
}
pub fn files(&self) -> MappedLockGuard<'_, Vec<Lrc<SourceFile>>> {
LockGuard::map(self.files.borrow(), |files| &mut files.source_files)
}

View File

@ -9,8 +9,6 @@ use syntax::tokenstream;
use smallvec::SmallVec;
use syntax_pos::{self, Pos, Span};
use std::fs;
use std::io::ErrorKind;
use rustc_data_structures::sync::Lrc;
// These macros all relate to the file system; they either return
@ -114,20 +112,17 @@ pub fn expand_include_str(cx: &mut ExtCtxt<'_>, sp: Span, tts: &[tokenstream::To
None => return DummyResult::any(sp)
};
let file = cx.resolve_path(file, sp);
match fs::read_to_string(&file) {
Ok(src) => {
let interned_src = Symbol::intern(&src);
// Add this input file to the code map to make it available as
// dependency information
cx.source_map().new_source_file(file.into(), src);
base::MacEager::expr(cx.expr_str(sp, interned_src))
match cx.source_map().load_binary_file(&file) {
Ok(bytes) => match std::str::from_utf8(&bytes) {
Ok(src) => {
let interned_src = Symbol::intern(&src);
base::MacEager::expr(cx.expr_str(sp, interned_src))
}
Err(_) => {
cx.span_err(sp, &format!("{} wasn't a utf-8 file", file.display()));
DummyResult::any(sp)
}
},
Err(ref e) if e.kind() == ErrorKind::InvalidData => {
cx.span_err(sp, &format!("{} wasn't a utf-8 file", file.display()));
DummyResult::any(sp)
}
Err(e) => {
cx.span_err(sp, &format!("couldn't read {}: {}", file.display(), e));
DummyResult::any(sp)
@ -142,18 +137,8 @@ pub fn expand_include_bytes(cx: &mut ExtCtxt<'_>, sp: Span, tts: &[tokenstream::
None => return DummyResult::any(sp)
};
let file = cx.resolve_path(file, sp);
match fs::read(&file) {
match cx.source_map().load_binary_file(&file) {
Ok(bytes) => {
// Add the contents to the source map if it contains UTF-8.
let (contents, bytes) = match String::from_utf8(bytes) {
Ok(s) => {
let bytes = s.as_bytes().to_owned();
(s, bytes)
},
Err(e) => (String::new(), e.into_bytes()),
};
cx.source_map().new_source_file(file.into(), contents);
base::MacEager::expr(cx.expr_lit(sp, ast::LitKind::ByteStr(Lrc::new(bytes))))
},
Err(e) => {

View File

@ -1,2 +1,3 @@
lexer-crlf-line-endings-string-literal-doc-comment.rs -text
trailing-carriage-return-in-string.rs -text
*.bin -text

View File

@ -0,0 +1,2 @@
This file starts with BOM.
Lines are separated by \r\n.

View File

@ -0,0 +1,12 @@
// run-pass
fn main() {
assert_eq!(
&include_bytes!("data.bin")[..],
&b"\xEF\xBB\xBFThis file starts with BOM.\r\nLines are separated by \\r\\n.\r\n"[..],
);
assert_eq!(
include_str!("data.bin"),
"\u{FEFF}This file starts with BOM.\r\nLines are separated by \\r\\n.\r\n",
);
}