Allow for space between each filemap in the codemap
So if a filemap's last byte is at position n in the codemap, then n+1 will not refer to any filemap, and the next filemap will begin an n+2. This is useful for empty files, it means that every file (even empty ones) has a byte in the codemap. Closes #23301, #26504
This commit is contained in:
parent
691ce23479
commit
007246c17f
@ -115,6 +115,10 @@ impl Sub for CharPos {
|
||||
/// are *absolute* positions from the beginning of the codemap, not positions
|
||||
/// relative to FileMaps. Methods on the CodeMap can be used to relate spans back
|
||||
/// to the original source.
|
||||
/// You must be careful if the span crosses more than one file - you will not be
|
||||
/// able to use many of the functions on spans in codemap and you cannot assume
|
||||
/// that the length of the span = hi - lo; there may be space in the BytePos
|
||||
/// range between files.
|
||||
#[derive(Clone, Copy, Hash)]
|
||||
pub struct Span {
|
||||
pub lo: BytePos,
|
||||
@ -339,7 +343,7 @@ pub struct MultiByteChar {
|
||||
pub bytes: usize,
|
||||
}
|
||||
|
||||
/// A single source in the CodeMap
|
||||
/// A single source in the CodeMap.
|
||||
pub struct FileMap {
|
||||
/// The name of the file that the source came from, source that doesn't
|
||||
/// originate from files has names between angle brackets by convention,
|
||||
@ -508,6 +512,9 @@ impl FileMap {
|
||||
lines.get(line_number).map(|&line| {
|
||||
let begin: BytePos = line - self.start_pos;
|
||||
let begin = begin.to_usize();
|
||||
// We can't use `lines.get(line_number+1)` because we might
|
||||
// be parsing when we call this function and thus the current
|
||||
// line is the last one we have line info for.
|
||||
let slice = &src[begin..];
|
||||
match slice.find('\n') {
|
||||
Some(e) => &slice[..e],
|
||||
@ -598,27 +605,27 @@ impl CodeMap {
|
||||
Ok(self.new_filemap(path.to_str().unwrap().to_string(), src))
|
||||
}
|
||||
|
||||
pub fn new_filemap(&self, filename: FileName, mut src: String) -> Rc<FileMap> {
|
||||
let mut files = self.files.borrow_mut();
|
||||
let start_pos = match files.last() {
|
||||
fn next_start_pos(&self) -> usize {
|
||||
let files = self.files.borrow();
|
||||
match files.last() {
|
||||
None => 0,
|
||||
Some(last) => last.end_pos.to_usize(),
|
||||
};
|
||||
// Add one so there is some space between files. This lets us distinguish
|
||||
// positions in the codemap, even in the presence of zero-length files.
|
||||
Some(last) => last.end_pos.to_usize() + 1,
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new filemap without setting its line information. If you don't
|
||||
/// intend to set the line information yourself, you should use new_filemap_and_lines.
|
||||
pub fn new_filemap(&self, filename: FileName, mut src: String) -> Rc<FileMap> {
|
||||
let start_pos = self.next_start_pos();
|
||||
let mut files = self.files.borrow_mut();
|
||||
|
||||
// Remove utf-8 BOM if any.
|
||||
if src.starts_with("\u{feff}") {
|
||||
src.drain(..3);
|
||||
}
|
||||
|
||||
// Append '\n' in case it's not already there.
|
||||
// This is a workaround to prevent CodeMap.lookup_filemap_idx from
|
||||
// accidentally overflowing into the next filemap in case the last byte
|
||||
// of span is also the last byte of filemap, which leads to incorrect
|
||||
// results from CodeMap.span_to_*.
|
||||
if !src.is_empty() && !src.ends_with("\n") {
|
||||
src.push('\n');
|
||||
}
|
||||
|
||||
let end_pos = start_pos + src.len();
|
||||
|
||||
let filemap = Rc::new(FileMap {
|
||||
@ -645,11 +652,8 @@ impl CodeMap {
|
||||
mut file_local_lines: Vec<BytePos>,
|
||||
mut file_local_multibyte_chars: Vec<MultiByteChar>)
|
||||
-> Rc<FileMap> {
|
||||
let start_pos = self.next_start_pos();
|
||||
let mut files = self.files.borrow_mut();
|
||||
let start_pos = match files.last() {
|
||||
None => 0,
|
||||
Some(last) => last.end_pos.to_usize(),
|
||||
};
|
||||
|
||||
let end_pos = Pos::from_usize(start_pos + source_len);
|
||||
let start_pos = Pos::from_usize(start_pos);
|
||||
@ -686,39 +690,61 @@ impl CodeMap {
|
||||
|
||||
/// Lookup source information about a BytePos
|
||||
pub fn lookup_char_pos(&self, pos: BytePos) -> Loc {
|
||||
let FileMapAndLine {fm: f, line: a} = self.lookup_line(pos);
|
||||
let line = a + 1; // Line numbers start at 1
|
||||
let chpos = self.bytepos_to_file_charpos(pos);
|
||||
let linebpos = (*f.lines.borrow())[a];
|
||||
let linechpos = self.bytepos_to_file_charpos(linebpos);
|
||||
debug!("byte pos {:?} is on the line at byte pos {:?}",
|
||||
pos, linebpos);
|
||||
debug!("char pos {:?} is on the line at char pos {:?}",
|
||||
chpos, linechpos);
|
||||
debug!("byte is on line: {}", line);
|
||||
assert!(chpos >= linechpos);
|
||||
Loc {
|
||||
file: f,
|
||||
line: line,
|
||||
col: chpos - linechpos
|
||||
match self.lookup_line(pos) {
|
||||
Ok(FileMapAndLine { fm: f, line: a }) => {
|
||||
let line = a + 1; // Line numbers start at 1
|
||||
let linebpos = (*f.lines.borrow())[a];
|
||||
let linechpos = self.bytepos_to_file_charpos(linebpos);
|
||||
debug!("byte pos {:?} is on the line at byte pos {:?}",
|
||||
pos, linebpos);
|
||||
debug!("char pos {:?} is on the line at char pos {:?}",
|
||||
chpos, linechpos);
|
||||
debug!("byte is on line: {}", line);
|
||||
assert!(chpos >= linechpos);
|
||||
Loc {
|
||||
file: f,
|
||||
line: line,
|
||||
col: chpos - linechpos,
|
||||
}
|
||||
}
|
||||
Err(f) => {
|
||||
Loc {
|
||||
file: f,
|
||||
line: 0,
|
||||
col: chpos,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn lookup_line(&self, pos: BytePos) -> FileMapAndLine {
|
||||
// If the relevant filemap is empty, we don't return a line number.
|
||||
fn lookup_line(&self, pos: BytePos) -> Result<FileMapAndLine, Rc<FileMap>> {
|
||||
let idx = self.lookup_filemap_idx(pos);
|
||||
|
||||
let files = self.files.borrow();
|
||||
let f = (*files)[idx].clone();
|
||||
|
||||
let len = f.lines.borrow().len();
|
||||
if len == 0 {
|
||||
return Err(f);
|
||||
}
|
||||
|
||||
let mut a = 0;
|
||||
{
|
||||
let lines = f.lines.borrow();
|
||||
let mut b = lines.len();
|
||||
while b - a > 1 {
|
||||
let m = (a + b) / 2;
|
||||
if (*lines)[m] > pos { b = m; } else { a = m; }
|
||||
if (*lines)[m] > pos {
|
||||
b = m;
|
||||
} else {
|
||||
a = m;
|
||||
}
|
||||
}
|
||||
assert!(a <= lines.len());
|
||||
}
|
||||
FileMapAndLine {fm: f, line: a}
|
||||
Ok(FileMapAndLine { fm: f, line: a })
|
||||
}
|
||||
|
||||
pub fn lookup_char_pos_adj(&self, pos: BytePos) -> LocWithOpt {
|
||||
@ -880,12 +906,15 @@ impl CodeMap {
|
||||
CharPos(bpos.to_usize() - map.start_pos.to_usize() - total_extra_bytes)
|
||||
}
|
||||
|
||||
// Return the index of the filemap (in self.files) which contains pos.
|
||||
fn lookup_filemap_idx(&self, pos: BytePos) -> usize {
|
||||
let files = self.files.borrow();
|
||||
let files = &*files;
|
||||
let len = files.len();
|
||||
let count = files.len();
|
||||
|
||||
// Binary search for the filemap.
|
||||
let mut a = 0;
|
||||
let mut b = len;
|
||||
let mut b = count;
|
||||
while b - a > 1 {
|
||||
let m = (a + b) / 2;
|
||||
if files[m].start_pos > pos {
|
||||
@ -894,26 +923,8 @@ impl CodeMap {
|
||||
a = m;
|
||||
}
|
||||
}
|
||||
// There can be filemaps with length 0. These have the same start_pos as
|
||||
// the previous filemap, but are not the filemaps we want (because they
|
||||
// are length 0, they cannot contain what we are looking for). So,
|
||||
// rewind until we find a useful filemap.
|
||||
loop {
|
||||
let lines = files[a].lines.borrow();
|
||||
let lines = lines;
|
||||
if !lines.is_empty() {
|
||||
break;
|
||||
}
|
||||
if a == 0 {
|
||||
panic!("position {} does not resolve to a source location",
|
||||
pos.to_usize());
|
||||
}
|
||||
a -= 1;
|
||||
}
|
||||
if a >= len {
|
||||
panic!("position {} does not resolve to a source location",
|
||||
pos.to_usize())
|
||||
}
|
||||
|
||||
assert!(a < count, "position {} does not resolve to a source location", pos.to_usize());
|
||||
|
||||
return a;
|
||||
}
|
||||
@ -1027,10 +1038,13 @@ mod tests {
|
||||
let fm = cm.new_filemap("blork.rs".to_string(),
|
||||
"first line.\nsecond line".to_string());
|
||||
fm.next_line(BytePos(0));
|
||||
// Test we can get lines with partial line info.
|
||||
assert_eq!(fm.get_line(0), Some("first line."));
|
||||
// TESTING BROKEN BEHAVIOR:
|
||||
// TESTING BROKEN BEHAVIOR: line break declared before actual line break.
|
||||
fm.next_line(BytePos(10));
|
||||
assert_eq!(fm.get_line(1), Some("."));
|
||||
fm.next_line(BytePos(12));
|
||||
assert_eq!(fm.get_line(2), Some("second line"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -1056,9 +1070,9 @@ mod tests {
|
||||
|
||||
fm1.next_line(BytePos(0));
|
||||
fm1.next_line(BytePos(12));
|
||||
fm2.next_line(BytePos(24));
|
||||
fm3.next_line(BytePos(24));
|
||||
fm3.next_line(BytePos(34));
|
||||
fm2.next_line(fm2.start_pos);
|
||||
fm3.next_line(fm3.start_pos);
|
||||
fm3.next_line(fm3.start_pos + BytePos(12));
|
||||
|
||||
cm
|
||||
}
|
||||
@ -1068,11 +1082,15 @@ mod tests {
|
||||
// Test lookup_byte_offset
|
||||
let cm = init_code_map();
|
||||
|
||||
let fmabp1 = cm.lookup_byte_offset(BytePos(22));
|
||||
let fmabp1 = cm.lookup_byte_offset(BytePos(23));
|
||||
assert_eq!(fmabp1.fm.name, "blork.rs");
|
||||
assert_eq!(fmabp1.pos, BytePos(22));
|
||||
assert_eq!(fmabp1.pos, BytePos(23));
|
||||
|
||||
let fmabp2 = cm.lookup_byte_offset(BytePos(24));
|
||||
let fmabp1 = cm.lookup_byte_offset(BytePos(24));
|
||||
assert_eq!(fmabp1.fm.name, "empty.rs");
|
||||
assert_eq!(fmabp1.pos, BytePos(0));
|
||||
|
||||
let fmabp2 = cm.lookup_byte_offset(BytePos(25));
|
||||
assert_eq!(fmabp2.fm.name, "blork2.rs");
|
||||
assert_eq!(fmabp2.pos, BytePos(0));
|
||||
}
|
||||
@ -1085,7 +1103,7 @@ mod tests {
|
||||
let cp1 = cm.bytepos_to_file_charpos(BytePos(22));
|
||||
assert_eq!(cp1, CharPos(22));
|
||||
|
||||
let cp2 = cm.bytepos_to_file_charpos(BytePos(24));
|
||||
let cp2 = cm.bytepos_to_file_charpos(BytePos(25));
|
||||
assert_eq!(cp2, CharPos(0));
|
||||
}
|
||||
|
||||
@ -1099,7 +1117,7 @@ mod tests {
|
||||
assert_eq!(loc1.line, 2);
|
||||
assert_eq!(loc1.col, CharPos(10));
|
||||
|
||||
let loc2 = cm.lookup_char_pos(BytePos(24));
|
||||
let loc2 = cm.lookup_char_pos(BytePos(25));
|
||||
assert_eq!(loc2.file.name, "blork2.rs");
|
||||
assert_eq!(loc2.line, 1);
|
||||
assert_eq!(loc2.col, CharPos(0));
|
||||
@ -1115,18 +1133,18 @@ mod tests {
|
||||
"first line€€.\n€ second line".to_string());
|
||||
|
||||
fm1.next_line(BytePos(0));
|
||||
fm1.next_line(BytePos(22));
|
||||
fm2.next_line(BytePos(40));
|
||||
fm2.next_line(BytePos(58));
|
||||
fm1.next_line(BytePos(28));
|
||||
fm2.next_line(fm2.start_pos);
|
||||
fm2.next_line(fm2.start_pos + BytePos(20));
|
||||
|
||||
fm1.record_multibyte_char(BytePos(3), 3);
|
||||
fm1.record_multibyte_char(BytePos(9), 3);
|
||||
fm1.record_multibyte_char(BytePos(12), 3);
|
||||
fm1.record_multibyte_char(BytePos(15), 3);
|
||||
fm1.record_multibyte_char(BytePos(18), 3);
|
||||
fm2.record_multibyte_char(BytePos(50), 3);
|
||||
fm2.record_multibyte_char(BytePos(53), 3);
|
||||
fm2.record_multibyte_char(BytePos(58), 3);
|
||||
fm2.record_multibyte_char(fm2.start_pos + BytePos(10), 3);
|
||||
fm2.record_multibyte_char(fm2.start_pos + BytePos(13), 3);
|
||||
fm2.record_multibyte_char(fm2.start_pos + BytePos(18), 3);
|
||||
|
||||
cm
|
||||
}
|
||||
|
@ -854,11 +854,12 @@ mod test {
|
||||
println!("done");
|
||||
let vec = data.lock().unwrap().clone();
|
||||
let vec: &[u8] = &vec;
|
||||
println!("{}", from_utf8(vec).unwrap());
|
||||
assert_eq!(vec, "dummy.txt: 8 \n\
|
||||
dummy.txt: 9 \n\
|
||||
dummy.txt:10 \n\
|
||||
dummy.txt:11 \n\
|
||||
dummy.txt:12 \n".as_bytes());
|
||||
let str = from_utf8(vec).unwrap();
|
||||
println!("{}", str);
|
||||
assert_eq!(str, "dummy.txt: 8 line8\n\
|
||||
dummy.txt: 9 line9\n\
|
||||
dummy.txt:10 line10\n\
|
||||
dummy.txt:11 e-lä-vän\n\
|
||||
dummy.txt:12 tolv\n");
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user