Refactor the lexer to use FilePos types
This commit is contained in:
parent
b1dff40bae
commit
4a0f4f5e31
src/libsyntax
@ -200,11 +200,8 @@ pub impl FileMap {
|
||||
start_pos);
|
||||
}
|
||||
|
||||
fn next_line(@self, +chpos: CharPos, +byte_pos: BytePos) {
|
||||
self.lines.push(FilePos {
|
||||
ch: chpos,
|
||||
byte: byte_pos + self.start_pos.byte
|
||||
});
|
||||
fn next_line(@self, +pos: FilePos) {
|
||||
self.lines.push(pos);
|
||||
}
|
||||
|
||||
pub fn get_line(@self, line: int) -> ~str unsafe {
|
||||
@ -231,6 +228,18 @@ pub impl CodeMap {
|
||||
}
|
||||
|
||||
pub fn add_filemap(@self, filemap: @FileMap) {
|
||||
let expected_byte_pos = if self.files.len() == 0 {
|
||||
0
|
||||
} else {
|
||||
let last_start = self.files.last().start_pos.byte.to_uint();
|
||||
let last_len = self.files.last().src.len();
|
||||
last_start + last_len
|
||||
};
|
||||
let actual_byte_pos = filemap.start_pos.byte.to_uint();
|
||||
debug!("codemap: adding filemap: %s", filemap.name);
|
||||
debug!("codemap: expected offset: %u", expected_byte_pos);
|
||||
debug!("codemap: actual offset: %u", actual_byte_pos);
|
||||
assert expected_byte_pos == actual_byte_pos;
|
||||
self.files.push(filemap);
|
||||
}
|
||||
|
||||
|
@ -58,10 +58,13 @@ fn expand_include(cx: ext_ctxt, sp: span, arg: ast::mac_arg,
|
||||
_body: ast::mac_body) -> @ast::expr {
|
||||
let args = get_mac_args(cx, sp, arg, 1u, option::Some(1u), ~"include");
|
||||
let file = expr_to_str(cx, args[0], ~"#include_str requires a string");
|
||||
let p = parse::new_parser_from_file(cx.parse_sess(), cx.cfg(),
|
||||
&res_rel_file(cx, sp, &Path(file)),
|
||||
parse::parser::SOURCE_FILE);
|
||||
return p.parse_expr();
|
||||
let (p, rdr) = parse::new_parser_etc_from_file(
|
||||
cx.parse_sess(), cx.cfg(),
|
||||
&res_rel_file(cx, sp, &Path(file)),
|
||||
parse::parser::SOURCE_FILE);
|
||||
let e = p.parse_expr();
|
||||
parse::update_parse_sess_position(&cx.parse_sess(), &rdr);
|
||||
return e;
|
||||
}
|
||||
|
||||
fn expand_include_str(cx: ext_ctxt, sp: codemap::span, arg: ast::mac_arg,
|
||||
|
@ -11,6 +11,7 @@ export parse_crate_from_source_str;
|
||||
export parse_expr_from_source_str, parse_item_from_source_str;
|
||||
export parse_stmt_from_source_str;
|
||||
export parse_from_source_str;
|
||||
export update_parse_sess_position;
|
||||
|
||||
use parser::Parser;
|
||||
use attr::parser_attr;
|
||||
@ -76,7 +77,7 @@ fn parse_crate_from_crate_file(input: &Path, cfg: ast::crate_cfg,
|
||||
let leading_attrs = p.parse_inner_attrs_and_next();
|
||||
let { inner: crate_attrs, next: first_cdir_attr } = leading_attrs;
|
||||
let cdirs = p.parse_crate_directives(token::EOF, first_cdir_attr);
|
||||
eval::update_parse_sess_position(&sess, &rdr);
|
||||
update_parse_sess_position(&sess, &rdr);
|
||||
let cx = @{sess: sess, cfg: /* FIXME (#2543) */ copy p.cfg};
|
||||
let companionmod = input.filestem().map(|s| Path(*s));
|
||||
let (m, attrs) = eval::eval_crate_directives_to_mod(
|
||||
@ -96,7 +97,7 @@ fn parse_crate_from_source_file(input: &Path, cfg: ast::crate_cfg,
|
||||
let (p, rdr) = new_parser_etc_from_file(sess, cfg, input,
|
||||
parser::SOURCE_FILE);
|
||||
let r = p.parse_crate_mod(cfg);
|
||||
eval::update_parse_sess_position(&sess, &rdr);
|
||||
update_parse_sess_position(&sess, &rdr);
|
||||
return r;
|
||||
}
|
||||
|
||||
@ -106,7 +107,7 @@ fn parse_crate_from_source_str(name: ~str, source: @~str, cfg: ast::crate_cfg,
|
||||
codemap::FssNone, source);
|
||||
let r = p.parse_crate_mod(cfg);
|
||||
p.abort_if_errors();
|
||||
eval::update_parse_sess_position(&sess, &rdr);
|
||||
update_parse_sess_position(&sess, &rdr);
|
||||
return r;
|
||||
}
|
||||
|
||||
@ -116,7 +117,7 @@ fn parse_expr_from_source_str(name: ~str, source: @~str, cfg: ast::crate_cfg,
|
||||
codemap::FssNone, source);
|
||||
let r = p.parse_expr();
|
||||
p.abort_if_errors();
|
||||
eval::update_parse_sess_position(&sess, &rdr);
|
||||
update_parse_sess_position(&sess, &rdr);
|
||||
return r;
|
||||
}
|
||||
|
||||
@ -127,7 +128,7 @@ fn parse_item_from_source_str(name: ~str, source: @~str, cfg: ast::crate_cfg,
|
||||
codemap::FssNone, source);
|
||||
let r = p.parse_item(attrs);
|
||||
p.abort_if_errors();
|
||||
eval::update_parse_sess_position(&sess, &rdr);
|
||||
update_parse_sess_position(&sess, &rdr);
|
||||
return r;
|
||||
}
|
||||
|
||||
@ -138,7 +139,7 @@ fn parse_stmt_from_source_str(name: ~str, source: @~str, cfg: ast::crate_cfg,
|
||||
codemap::FssNone, source);
|
||||
let r = p.parse_stmt(attrs);
|
||||
p.abort_if_errors();
|
||||
eval::update_parse_sess_position(&sess, &rdr);
|
||||
update_parse_sess_position(&sess, &rdr);
|
||||
return r;
|
||||
}
|
||||
|
||||
@ -155,7 +156,7 @@ fn parse_from_source_str<T>(f: fn (p: Parser) -> T,
|
||||
p.reader.fatal(~"expected end-of-string");
|
||||
}
|
||||
p.abort_if_errors();
|
||||
eval::update_parse_sess_position(&sess, &rdr);
|
||||
update_parse_sess_position(&sess, &rdr);
|
||||
move r
|
||||
}
|
||||
|
||||
@ -216,3 +217,10 @@ fn new_parser_from_tt(sess: parse_sess, cfg: ast::crate_cfg,
|
||||
None, tt);
|
||||
return Parser(sess, cfg, trdr as reader, parser::SOURCE_FILE)
|
||||
}
|
||||
|
||||
fn update_parse_sess_position(sess: &parse_sess, r: &lexer::string_reader) {
|
||||
sess.pos = FilePos {
|
||||
ch: r.last_pos.ch,
|
||||
byte: r.last_pos.byte
|
||||
};
|
||||
}
|
||||
|
@ -131,7 +131,7 @@ fn consume_non_eol_whitespace(rdr: string_reader) {
|
||||
fn push_blank_line_comment(rdr: string_reader, comments: &mut ~[cmnt]) {
|
||||
debug!(">>> blank-line comment");
|
||||
let v: ~[~str] = ~[];
|
||||
comments.push({style: blank_line, lines: v, pos: rdr.chpos});
|
||||
comments.push({style: blank_line, lines: v, pos: rdr.last_pos.ch});
|
||||
}
|
||||
|
||||
fn consume_whitespace_counting_blank_lines(rdr: string_reader,
|
||||
@ -148,7 +148,7 @@ fn consume_whitespace_counting_blank_lines(rdr: string_reader,
|
||||
fn read_shebang_comment(rdr: string_reader, code_to_the_left: bool,
|
||||
comments: &mut ~[cmnt]) {
|
||||
debug!(">>> shebang comment");
|
||||
let p = rdr.chpos;
|
||||
let p = rdr.last_pos.ch;
|
||||
debug!("<<< shebang comment");
|
||||
comments.push({
|
||||
style: if code_to_the_left { trailing } else { isolated },
|
||||
@ -160,7 +160,7 @@ fn read_shebang_comment(rdr: string_reader, code_to_the_left: bool,
|
||||
fn read_line_comments(rdr: string_reader, code_to_the_left: bool,
|
||||
comments: &mut ~[cmnt]) {
|
||||
debug!(">>> line comments");
|
||||
let p = rdr.chpos;
|
||||
let p = rdr.last_pos.ch;
|
||||
let mut lines: ~[~str] = ~[];
|
||||
while rdr.curr == '/' && nextch(rdr) == '/' {
|
||||
let line = read_one_line_comment(rdr);
|
||||
@ -209,7 +209,7 @@ fn trim_whitespace_prefix_and_push_line(lines: &mut ~[~str],
|
||||
fn read_block_comment(rdr: string_reader, code_to_the_left: bool,
|
||||
comments: &mut ~[cmnt]) {
|
||||
debug!(">>> block comment");
|
||||
let p = rdr.chpos;
|
||||
let p = rdr.last_pos.ch;
|
||||
let mut lines: ~[~str] = ~[];
|
||||
let mut col: CharPos = rdr.col;
|
||||
bump(rdr);
|
||||
@ -319,7 +319,7 @@ fn gather_comments_and_literals(span_diagnostic: diagnostic::span_handler,
|
||||
}
|
||||
|
||||
|
||||
let bstart = rdr.pos;
|
||||
let bstart = rdr.pos.byte;
|
||||
rdr.next_token();
|
||||
//discard, and look ahead; we're working with internal state
|
||||
let {tok: tok, sp: sp} = rdr.peek();
|
||||
|
@ -1,8 +1,8 @@
|
||||
use parse::update_parse_sess_position;
|
||||
use parser::{Parser, SOURCE_FILE};
|
||||
use attr::parser_attr;
|
||||
|
||||
export eval_crate_directives_to_mod;
|
||||
export update_parse_sess_position;
|
||||
|
||||
type ctx =
|
||||
@{sess: parse::parse_sess,
|
||||
@ -74,13 +74,6 @@ fn parse_companion_mod(cx: ctx, prefix: &Path, suffix: &Option<Path>)
|
||||
}
|
||||
}
|
||||
|
||||
fn update_parse_sess_position(sess: &parse_sess, r: &lexer::string_reader) {
|
||||
sess.pos = FilePos {
|
||||
ch: r.chpos,
|
||||
byte: sess.pos.byte + r.pos
|
||||
};
|
||||
}
|
||||
|
||||
fn cdir_path_opt(default: ~str, attrs: ~[ast::attribute]) -> ~str {
|
||||
match ::attr::first_attr_value_str_by_name(attrs, ~"path") {
|
||||
Some(d) => d,
|
||||
|
@ -1,5 +1,5 @@
|
||||
use diagnostic::span_handler;
|
||||
use codemap::{span, CodeMap, CharPos, BytePos};
|
||||
use codemap::{span, CodeMap, CharPos, BytePos, FilePos};
|
||||
use ext::tt::transcribe::{tt_reader, new_tt_reader, dup_tt_reader,
|
||||
tt_next_token};
|
||||
|
||||
@ -21,10 +21,10 @@ trait reader {
|
||||
type string_reader = @{
|
||||
span_diagnostic: span_handler,
|
||||
src: @~str,
|
||||
mut pos: FilePos,
|
||||
mut last_pos: FilePos,
|
||||
mut col: CharPos,
|
||||
mut pos: BytePos,
|
||||
mut curr: char,
|
||||
mut chpos: CharPos,
|
||||
filemap: @codemap::FileMap,
|
||||
interner: @token::ident_interner,
|
||||
/* cached: */
|
||||
@ -48,9 +48,10 @@ fn new_low_level_string_reader(span_diagnostic: span_handler,
|
||||
// Force the initial reader bump to start on a fresh line
|
||||
let initial_char = '\n';
|
||||
let r = @{span_diagnostic: span_diagnostic, src: filemap.src,
|
||||
mut col: CharPos(0), mut pos: BytePos(0),
|
||||
mut pos: filemap.start_pos,
|
||||
mut last_pos: filemap.start_pos,
|
||||
mut col: CharPos(0),
|
||||
mut curr: initial_char,
|
||||
mut chpos: filemap.start_pos.ch,
|
||||
filemap: filemap, interner: itr,
|
||||
/* dummy values; not read */
|
||||
mut peek_tok: token::EOF,
|
||||
@ -61,7 +62,9 @@ fn new_low_level_string_reader(span_diagnostic: span_handler,
|
||||
|
||||
fn dup_string_reader(&&r: string_reader) -> string_reader {
|
||||
@{span_diagnostic: r.span_diagnostic, src: r.src,
|
||||
mut col: r.col, mut pos: r.pos, mut curr: r.curr, mut chpos: r.chpos,
|
||||
mut pos: r.pos,
|
||||
mut last_pos: r.last_pos,
|
||||
mut col: r.col, mut curr: r.curr,
|
||||
filemap: r.filemap, interner: r.interner,
|
||||
mut peek_tok: r.peek_tok, mut peek_span: r.peek_span}
|
||||
}
|
||||
@ -116,34 +119,48 @@ fn string_advance_token(&&r: string_reader) {
|
||||
if is_eof(r) {
|
||||
r.peek_tok = token::EOF;
|
||||
} else {
|
||||
let start_chpos = r.chpos;
|
||||
let start_chpos = r.last_pos.ch;
|
||||
r.peek_tok = next_token_inner(r);
|
||||
r.peek_span = ast_util::mk_sp(start_chpos, r.chpos);
|
||||
r.peek_span = ast_util::mk_sp(start_chpos, r.last_pos.ch);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
fn byte_offset(rdr: string_reader) -> BytePos {
|
||||
(rdr.pos.byte - rdr.filemap.start_pos.byte)
|
||||
}
|
||||
|
||||
fn get_str_from(rdr: string_reader, start: BytePos) -> ~str unsafe {
|
||||
// I'm pretty skeptical about this subtraction. What if there's a
|
||||
// multi-byte character before the mark?
|
||||
return str::slice(*rdr.src, start.to_uint() - 1u, rdr.pos.to_uint() - 1u);
|
||||
return str::slice(*rdr.src, start.to_uint() - 1u,
|
||||
byte_offset(rdr).to_uint() - 1u);
|
||||
}
|
||||
|
||||
fn bump(rdr: string_reader) {
|
||||
if rdr.pos.to_uint() < (*rdr.src).len() {
|
||||
rdr.last_pos = rdr.pos;
|
||||
let current_byte_offset = byte_offset(rdr).to_uint();;
|
||||
if current_byte_offset < (*rdr.src).len() {
|
||||
let last_char = rdr.curr;
|
||||
let next = str::char_range_at(*rdr.src, current_byte_offset);
|
||||
let byte_offset_diff = next.next - current_byte_offset;
|
||||
rdr.pos = FilePos {
|
||||
ch: rdr.pos.ch + CharPos(1u),
|
||||
byte: rdr.pos.byte + BytePos(byte_offset_diff)
|
||||
};
|
||||
rdr.curr = next.ch;
|
||||
rdr.col += CharPos(1u);
|
||||
rdr.chpos += CharPos(1u);
|
||||
if rdr.curr == '\n' {
|
||||
rdr.filemap.next_line(rdr.chpos, rdr.pos);
|
||||
if last_char == '\n' {
|
||||
rdr.filemap.next_line(rdr.last_pos);
|
||||
rdr.col = CharPos(0u);
|
||||
}
|
||||
let next = str::char_range_at(*rdr.src, rdr.pos.to_uint());
|
||||
rdr.pos = BytePos(next.next);
|
||||
rdr.curr = next.ch;
|
||||
} else {
|
||||
// XXX: What does this accomplish?
|
||||
if (rdr.curr != -1 as char) {
|
||||
rdr.chpos += CharPos(1u);
|
||||
rdr.pos = FilePos {
|
||||
ch: rdr.pos.ch + CharPos(1u),
|
||||
byte: rdr.pos.byte + BytePos(1u)
|
||||
};
|
||||
rdr.col += CharPos(1u);
|
||||
rdr.curr = -1 as char;
|
||||
}
|
||||
@ -153,8 +170,9 @@ fn is_eof(rdr: string_reader) -> bool {
|
||||
rdr.curr == -1 as char
|
||||
}
|
||||
fn nextch(rdr: string_reader) -> char {
|
||||
if rdr.pos.to_uint() < (*rdr.src).len() {
|
||||
return str::char_at(*rdr.src, rdr.pos.to_uint());
|
||||
let offset = byte_offset(rdr).to_uint();
|
||||
if offset < (*rdr.src).len() {
|
||||
return str::char_at(*rdr.src, offset);
|
||||
} else { return -1 as char; }
|
||||
}
|
||||
|
||||
@ -211,7 +229,7 @@ fn consume_any_line_comment(rdr: string_reader)
|
||||
bump(rdr);
|
||||
// line comments starting with "///" or "//!" are doc-comments
|
||||
if rdr.curr == '/' || rdr.curr == '!' {
|
||||
let start_chpos = rdr.chpos - CharPos(2u);
|
||||
let start_chpos = rdr.pos.ch - CharPos(2u);
|
||||
let mut acc = ~"//";
|
||||
while rdr.curr != '\n' && !is_eof(rdr) {
|
||||
str::push_char(&mut acc, rdr.curr);
|
||||
@ -219,7 +237,7 @@ fn consume_any_line_comment(rdr: string_reader)
|
||||
}
|
||||
return Some({
|
||||
tok: token::DOC_COMMENT(rdr.interner.intern(@acc)),
|
||||
sp: ast_util::mk_sp(start_chpos, rdr.chpos)
|
||||
sp: ast_util::mk_sp(start_chpos, rdr.pos.ch)
|
||||
});
|
||||
} else {
|
||||
while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); }
|
||||
@ -234,7 +252,7 @@ fn consume_any_line_comment(rdr: string_reader)
|
||||
if nextch(rdr) == '!' {
|
||||
let cmap = @CodeMap::new();
|
||||
(*cmap).files.push(rdr.filemap);
|
||||
let loc = cmap.lookup_char_pos_adj(rdr.chpos);
|
||||
let loc = cmap.lookup_char_pos_adj(rdr.last_pos.ch);
|
||||
if loc.line == 1u && loc.col == CharPos(0u) {
|
||||
while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); }
|
||||
return consume_whitespace_and_comments(rdr);
|
||||
@ -250,7 +268,7 @@ fn consume_block_comment(rdr: string_reader)
|
||||
|
||||
// block comments starting with "/**" or "/*!" are doc-comments
|
||||
if rdr.curr == '*' || rdr.curr == '!' {
|
||||
let start_chpos = rdr.chpos - CharPos(2u);
|
||||
let start_chpos = rdr.pos.ch - CharPos(2u);
|
||||
let mut acc = ~"/*";
|
||||
while !(rdr.curr == '*' && nextch(rdr) == '/') && !is_eof(rdr) {
|
||||
str::push_char(&mut acc, rdr.curr);
|
||||
@ -264,7 +282,7 @@ fn consume_block_comment(rdr: string_reader)
|
||||
bump(rdr);
|
||||
return Some({
|
||||
tok: token::DOC_COMMENT(rdr.interner.intern(@acc)),
|
||||
sp: ast_util::mk_sp(start_chpos, rdr.chpos)
|
||||
sp: ast_util::mk_sp(start_chpos, rdr.pos.ch)
|
||||
});
|
||||
}
|
||||
} else {
|
||||
@ -584,7 +602,7 @@ fn next_token_inner(rdr: string_reader) -> token::Token {
|
||||
return token::LIT_INT(c2 as i64, ast::ty_char);
|
||||
}
|
||||
'"' => {
|
||||
let n = rdr.pos;
|
||||
let n = byte_offset(rdr);
|
||||
bump(rdr);
|
||||
while rdr.curr != '"' {
|
||||
if is_eof(rdr) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user