Refactor the lexer to use FilePos types

This commit is contained in:
Brian Anderson 2012-11-14 22:27:53 -08:00
parent b1dff40bae
commit 4a0f4f5e31
6 changed files with 85 additions and 54 deletions

@ -200,11 +200,8 @@ pub impl FileMap {
start_pos);
}
fn next_line(@self, +chpos: CharPos, +byte_pos: BytePos) {
self.lines.push(FilePos {
ch: chpos,
byte: byte_pos + self.start_pos.byte
});
fn next_line(@self, +pos: FilePos) {
self.lines.push(pos);
}
pub fn get_line(@self, line: int) -> ~str unsafe {
@ -231,6 +228,18 @@ pub impl CodeMap {
}
pub fn add_filemap(@self, filemap: @FileMap) {
let expected_byte_pos = if self.files.len() == 0 {
0
} else {
let last_start = self.files.last().start_pos.byte.to_uint();
let last_len = self.files.last().src.len();
last_start + last_len
};
let actual_byte_pos = filemap.start_pos.byte.to_uint();
debug!("codemap: adding filemap: %s", filemap.name);
debug!("codemap: expected offset: %u", expected_byte_pos);
debug!("codemap: actual offset: %u", actual_byte_pos);
assert expected_byte_pos == actual_byte_pos;
self.files.push(filemap);
}

@ -58,10 +58,13 @@ fn expand_include(cx: ext_ctxt, sp: span, arg: ast::mac_arg,
_body: ast::mac_body) -> @ast::expr {
let args = get_mac_args(cx, sp, arg, 1u, option::Some(1u), ~"include");
let file = expr_to_str(cx, args[0], ~"#include_str requires a string");
let p = parse::new_parser_from_file(cx.parse_sess(), cx.cfg(),
&res_rel_file(cx, sp, &Path(file)),
parse::parser::SOURCE_FILE);
return p.parse_expr();
let (p, rdr) = parse::new_parser_etc_from_file(
cx.parse_sess(), cx.cfg(),
&res_rel_file(cx, sp, &Path(file)),
parse::parser::SOURCE_FILE);
let e = p.parse_expr();
parse::update_parse_sess_position(&cx.parse_sess(), &rdr);
return e;
}
fn expand_include_str(cx: ext_ctxt, sp: codemap::span, arg: ast::mac_arg,

@ -11,6 +11,7 @@ export parse_crate_from_source_str;
export parse_expr_from_source_str, parse_item_from_source_str;
export parse_stmt_from_source_str;
export parse_from_source_str;
export update_parse_sess_position;
use parser::Parser;
use attr::parser_attr;
@ -76,7 +77,7 @@ fn parse_crate_from_crate_file(input: &Path, cfg: ast::crate_cfg,
let leading_attrs = p.parse_inner_attrs_and_next();
let { inner: crate_attrs, next: first_cdir_attr } = leading_attrs;
let cdirs = p.parse_crate_directives(token::EOF, first_cdir_attr);
eval::update_parse_sess_position(&sess, &rdr);
update_parse_sess_position(&sess, &rdr);
let cx = @{sess: sess, cfg: /* FIXME (#2543) */ copy p.cfg};
let companionmod = input.filestem().map(|s| Path(*s));
let (m, attrs) = eval::eval_crate_directives_to_mod(
@ -96,7 +97,7 @@ fn parse_crate_from_source_file(input: &Path, cfg: ast::crate_cfg,
let (p, rdr) = new_parser_etc_from_file(sess, cfg, input,
parser::SOURCE_FILE);
let r = p.parse_crate_mod(cfg);
eval::update_parse_sess_position(&sess, &rdr);
update_parse_sess_position(&sess, &rdr);
return r;
}
@ -106,7 +107,7 @@ fn parse_crate_from_source_str(name: ~str, source: @~str, cfg: ast::crate_cfg,
codemap::FssNone, source);
let r = p.parse_crate_mod(cfg);
p.abort_if_errors();
eval::update_parse_sess_position(&sess, &rdr);
update_parse_sess_position(&sess, &rdr);
return r;
}
@ -116,7 +117,7 @@ fn parse_expr_from_source_str(name: ~str, source: @~str, cfg: ast::crate_cfg,
codemap::FssNone, source);
let r = p.parse_expr();
p.abort_if_errors();
eval::update_parse_sess_position(&sess, &rdr);
update_parse_sess_position(&sess, &rdr);
return r;
}
@ -127,7 +128,7 @@ fn parse_item_from_source_str(name: ~str, source: @~str, cfg: ast::crate_cfg,
codemap::FssNone, source);
let r = p.parse_item(attrs);
p.abort_if_errors();
eval::update_parse_sess_position(&sess, &rdr);
update_parse_sess_position(&sess, &rdr);
return r;
}
@ -138,7 +139,7 @@ fn parse_stmt_from_source_str(name: ~str, source: @~str, cfg: ast::crate_cfg,
codemap::FssNone, source);
let r = p.parse_stmt(attrs);
p.abort_if_errors();
eval::update_parse_sess_position(&sess, &rdr);
update_parse_sess_position(&sess, &rdr);
return r;
}
@ -155,7 +156,7 @@ fn parse_from_source_str<T>(f: fn (p: Parser) -> T,
p.reader.fatal(~"expected end-of-string");
}
p.abort_if_errors();
eval::update_parse_sess_position(&sess, &rdr);
update_parse_sess_position(&sess, &rdr);
move r
}
@ -216,3 +217,10 @@ fn new_parser_from_tt(sess: parse_sess, cfg: ast::crate_cfg,
None, tt);
return Parser(sess, cfg, trdr as reader, parser::SOURCE_FILE)
}
fn update_parse_sess_position(sess: &parse_sess, r: &lexer::string_reader) {
sess.pos = FilePos {
ch: r.last_pos.ch,
byte: r.last_pos.byte
};
}

@ -131,7 +131,7 @@ fn consume_non_eol_whitespace(rdr: string_reader) {
fn push_blank_line_comment(rdr: string_reader, comments: &mut ~[cmnt]) {
debug!(">>> blank-line comment");
let v: ~[~str] = ~[];
comments.push({style: blank_line, lines: v, pos: rdr.chpos});
comments.push({style: blank_line, lines: v, pos: rdr.last_pos.ch});
}
fn consume_whitespace_counting_blank_lines(rdr: string_reader,
@ -148,7 +148,7 @@ fn consume_whitespace_counting_blank_lines(rdr: string_reader,
fn read_shebang_comment(rdr: string_reader, code_to_the_left: bool,
comments: &mut ~[cmnt]) {
debug!(">>> shebang comment");
let p = rdr.chpos;
let p = rdr.last_pos.ch;
debug!("<<< shebang comment");
comments.push({
style: if code_to_the_left { trailing } else { isolated },
@ -160,7 +160,7 @@ fn read_shebang_comment(rdr: string_reader, code_to_the_left: bool,
fn read_line_comments(rdr: string_reader, code_to_the_left: bool,
comments: &mut ~[cmnt]) {
debug!(">>> line comments");
let p = rdr.chpos;
let p = rdr.last_pos.ch;
let mut lines: ~[~str] = ~[];
while rdr.curr == '/' && nextch(rdr) == '/' {
let line = read_one_line_comment(rdr);
@ -209,7 +209,7 @@ fn trim_whitespace_prefix_and_push_line(lines: &mut ~[~str],
fn read_block_comment(rdr: string_reader, code_to_the_left: bool,
comments: &mut ~[cmnt]) {
debug!(">>> block comment");
let p = rdr.chpos;
let p = rdr.last_pos.ch;
let mut lines: ~[~str] = ~[];
let mut col: CharPos = rdr.col;
bump(rdr);
@ -319,7 +319,7 @@ fn gather_comments_and_literals(span_diagnostic: diagnostic::span_handler,
}
let bstart = rdr.pos;
let bstart = rdr.pos.byte;
rdr.next_token();
//discard, and look ahead; we're working with internal state
let {tok: tok, sp: sp} = rdr.peek();

@ -1,8 +1,8 @@
use parse::update_parse_sess_position;
use parser::{Parser, SOURCE_FILE};
use attr::parser_attr;
export eval_crate_directives_to_mod;
export update_parse_sess_position;
type ctx =
@{sess: parse::parse_sess,
@ -74,13 +74,6 @@ fn parse_companion_mod(cx: ctx, prefix: &Path, suffix: &Option<Path>)
}
}
fn update_parse_sess_position(sess: &parse_sess, r: &lexer::string_reader) {
sess.pos = FilePos {
ch: r.chpos,
byte: sess.pos.byte + r.pos
};
}
fn cdir_path_opt(default: ~str, attrs: ~[ast::attribute]) -> ~str {
match ::attr::first_attr_value_str_by_name(attrs, ~"path") {
Some(d) => d,

@ -1,5 +1,5 @@
use diagnostic::span_handler;
use codemap::{span, CodeMap, CharPos, BytePos};
use codemap::{span, CodeMap, CharPos, BytePos, FilePos};
use ext::tt::transcribe::{tt_reader, new_tt_reader, dup_tt_reader,
tt_next_token};
@ -21,10 +21,10 @@ trait reader {
type string_reader = @{
span_diagnostic: span_handler,
src: @~str,
mut pos: FilePos,
mut last_pos: FilePos,
mut col: CharPos,
mut pos: BytePos,
mut curr: char,
mut chpos: CharPos,
filemap: @codemap::FileMap,
interner: @token::ident_interner,
/* cached: */
@ -48,9 +48,10 @@ fn new_low_level_string_reader(span_diagnostic: span_handler,
// Force the initial reader bump to start on a fresh line
let initial_char = '\n';
let r = @{span_diagnostic: span_diagnostic, src: filemap.src,
mut col: CharPos(0), mut pos: BytePos(0),
mut pos: filemap.start_pos,
mut last_pos: filemap.start_pos,
mut col: CharPos(0),
mut curr: initial_char,
mut chpos: filemap.start_pos.ch,
filemap: filemap, interner: itr,
/* dummy values; not read */
mut peek_tok: token::EOF,
@ -61,7 +62,9 @@ fn new_low_level_string_reader(span_diagnostic: span_handler,
fn dup_string_reader(&&r: string_reader) -> string_reader {
@{span_diagnostic: r.span_diagnostic, src: r.src,
mut col: r.col, mut pos: r.pos, mut curr: r.curr, mut chpos: r.chpos,
mut pos: r.pos,
mut last_pos: r.last_pos,
mut col: r.col, mut curr: r.curr,
filemap: r.filemap, interner: r.interner,
mut peek_tok: r.peek_tok, mut peek_span: r.peek_span}
}
@ -116,34 +119,48 @@ fn string_advance_token(&&r: string_reader) {
if is_eof(r) {
r.peek_tok = token::EOF;
} else {
let start_chpos = r.chpos;
let start_chpos = r.last_pos.ch;
r.peek_tok = next_token_inner(r);
r.peek_span = ast_util::mk_sp(start_chpos, r.chpos);
r.peek_span = ast_util::mk_sp(start_chpos, r.last_pos.ch);
};
}
fn byte_offset(rdr: string_reader) -> BytePos {
(rdr.pos.byte - rdr.filemap.start_pos.byte)
}
fn get_str_from(rdr: string_reader, start: BytePos) -> ~str unsafe {
// I'm pretty skeptical about this subtraction. What if there's a
// multi-byte character before the mark?
return str::slice(*rdr.src, start.to_uint() - 1u, rdr.pos.to_uint() - 1u);
return str::slice(*rdr.src, start.to_uint() - 1u,
byte_offset(rdr).to_uint() - 1u);
}
fn bump(rdr: string_reader) {
if rdr.pos.to_uint() < (*rdr.src).len() {
rdr.last_pos = rdr.pos;
let current_byte_offset = byte_offset(rdr).to_uint();;
if current_byte_offset < (*rdr.src).len() {
let last_char = rdr.curr;
let next = str::char_range_at(*rdr.src, current_byte_offset);
let byte_offset_diff = next.next - current_byte_offset;
rdr.pos = FilePos {
ch: rdr.pos.ch + CharPos(1u),
byte: rdr.pos.byte + BytePos(byte_offset_diff)
};
rdr.curr = next.ch;
rdr.col += CharPos(1u);
rdr.chpos += CharPos(1u);
if rdr.curr == '\n' {
rdr.filemap.next_line(rdr.chpos, rdr.pos);
if last_char == '\n' {
rdr.filemap.next_line(rdr.last_pos);
rdr.col = CharPos(0u);
}
let next = str::char_range_at(*rdr.src, rdr.pos.to_uint());
rdr.pos = BytePos(next.next);
rdr.curr = next.ch;
} else {
// XXX: What does this accomplish?
if (rdr.curr != -1 as char) {
rdr.chpos += CharPos(1u);
rdr.pos = FilePos {
ch: rdr.pos.ch + CharPos(1u),
byte: rdr.pos.byte + BytePos(1u)
};
rdr.col += CharPos(1u);
rdr.curr = -1 as char;
}
@ -153,8 +170,9 @@ fn is_eof(rdr: string_reader) -> bool {
rdr.curr == -1 as char
}
fn nextch(rdr: string_reader) -> char {
if rdr.pos.to_uint() < (*rdr.src).len() {
return str::char_at(*rdr.src, rdr.pos.to_uint());
let offset = byte_offset(rdr).to_uint();
if offset < (*rdr.src).len() {
return str::char_at(*rdr.src, offset);
} else { return -1 as char; }
}
@ -211,7 +229,7 @@ fn consume_any_line_comment(rdr: string_reader)
bump(rdr);
// line comments starting with "///" or "//!" are doc-comments
if rdr.curr == '/' || rdr.curr == '!' {
let start_chpos = rdr.chpos - CharPos(2u);
let start_chpos = rdr.pos.ch - CharPos(2u);
let mut acc = ~"//";
while rdr.curr != '\n' && !is_eof(rdr) {
str::push_char(&mut acc, rdr.curr);
@ -219,7 +237,7 @@ fn consume_any_line_comment(rdr: string_reader)
}
return Some({
tok: token::DOC_COMMENT(rdr.interner.intern(@acc)),
sp: ast_util::mk_sp(start_chpos, rdr.chpos)
sp: ast_util::mk_sp(start_chpos, rdr.pos.ch)
});
} else {
while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); }
@ -234,7 +252,7 @@ fn consume_any_line_comment(rdr: string_reader)
if nextch(rdr) == '!' {
let cmap = @CodeMap::new();
(*cmap).files.push(rdr.filemap);
let loc = cmap.lookup_char_pos_adj(rdr.chpos);
let loc = cmap.lookup_char_pos_adj(rdr.last_pos.ch);
if loc.line == 1u && loc.col == CharPos(0u) {
while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); }
return consume_whitespace_and_comments(rdr);
@ -250,7 +268,7 @@ fn consume_block_comment(rdr: string_reader)
// block comments starting with "/**" or "/*!" are doc-comments
if rdr.curr == '*' || rdr.curr == '!' {
let start_chpos = rdr.chpos - CharPos(2u);
let start_chpos = rdr.pos.ch - CharPos(2u);
let mut acc = ~"/*";
while !(rdr.curr == '*' && nextch(rdr) == '/') && !is_eof(rdr) {
str::push_char(&mut acc, rdr.curr);
@ -264,7 +282,7 @@ fn consume_block_comment(rdr: string_reader)
bump(rdr);
return Some({
tok: token::DOC_COMMENT(rdr.interner.intern(@acc)),
sp: ast_util::mk_sp(start_chpos, rdr.chpos)
sp: ast_util::mk_sp(start_chpos, rdr.pos.ch)
});
}
} else {
@ -584,7 +602,7 @@ fn next_token_inner(rdr: string_reader) -> token::Token {
return token::LIT_INT(c2 as i64, ast::ty_char);
}
'"' => {
let n = rdr.pos;
let n = byte_offset(rdr);
bump(rdr);
while rdr.curr != '"' {
if is_eof(rdr) {