diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs index 6664bad2859..ce674f53662 100644 --- a/src/libsyntax/ast.rs +++ b/src/libsyntax/ast.rs @@ -374,13 +374,10 @@ enum blk_sort { */ #[auto_serialize] -type token_tree = spanned; - -#[auto_serialize] -enum token_tree_ { +enum token_tree { /* for macro invocations; parsing is the macro's job */ - tt_delim(token::token, [token_tree]), - tt_flat(token::token) + tt_delim([token_tree]), + tt_flat(uint, token::token) } #[auto_serialize] diff --git a/src/libsyntax/parse.rs b/src/libsyntax/parse.rs index 6ebccf33542..c08ad9927ad 100644 --- a/src/libsyntax/parse.rs +++ b/src/libsyntax/parse.rs @@ -4,6 +4,7 @@ import dvec::extensions; export parse_sess; export next_node_id; export new_parser_from_file; +export new_parser_etc_from_file; export new_parser_from_source_str; export parse_crate_from_file; export parse_crate_from_crate_file; @@ -17,7 +18,7 @@ import attr::parser_attr; import common::parser_common; import ast::node_id; import util::interner; -import lexer::reader; +import lexer::{string_reader_as_reader, reader, string_reader}; type parse_sess = @{ cm: codemap::codemap, @@ -42,14 +43,15 @@ fn parse_crate_from_file(input: str, cfg: ast::crate_cfg, sess: parse_sess) -> fn parse_crate_from_crate_file(input: str, cfg: ast::crate_cfg, sess: parse_sess) -> @ast::crate { - let p = new_parser_from_file(sess, cfg, input, parser::CRATE_FILE); + let (p, rdr) = new_parser_etc_from_file(sess, cfg, input, + parser::CRATE_FILE); let lo = p.span.lo; - let prefix = path::dirname(p.reader.filemap.name); + let prefix = path::dirname(input); let leading_attrs = p.parse_inner_attrs_and_next(); let { inner: crate_attrs, next: first_cdir_attr } = leading_attrs; let cdirs = p.parse_crate_directives(token::EOF, first_cdir_attr); - sess.chpos = p.reader.chpos; - sess.byte_pos = sess.byte_pos + p.reader.pos; + sess.chpos = rdr.chpos; + sess.byte_pos = sess.byte_pos + rdr.pos; let cx = @{sess: sess, cfg: /* FIXME: bad */ copy p.cfg}; let (companionmod, _) = path::splitext(path::basename(input)); let (m, attrs) = eval::eval_crate_directives_to_mod( @@ -65,41 +67,42 @@ fn parse_crate_from_crate_file(input: str, cfg: ast::crate_cfg, fn parse_crate_from_source_file(input: str, cfg: ast::crate_cfg, sess: parse_sess) -> @ast::crate { - let p = new_parser_from_file(sess, cfg, input, parser::SOURCE_FILE); + let (p, rdr) = new_parser_etc_from_file(sess, cfg, input, + parser::SOURCE_FILE); let r = p.parse_crate_mod(cfg); - sess.chpos = p.reader.chpos; - sess.byte_pos = sess.byte_pos + p.reader.pos; + sess.chpos = rdr.chpos; + sess.byte_pos = sess.byte_pos + rdr.pos; ret r; } fn parse_crate_from_source_str(name: str, source: @str, cfg: ast::crate_cfg, sess: parse_sess) -> @ast::crate { - let p = new_parser_from_source_str( - sess, cfg, name, codemap::fss_none, source); + let (p, rdr) = new_parser_etc_from_source_str(sess, cfg, name, + codemap::fss_none, source); let r = p.parse_crate_mod(cfg); - sess.chpos = p.reader.chpos; - sess.byte_pos = sess.byte_pos + p.reader.pos; + sess.chpos = rdr.chpos; + sess.byte_pos = sess.byte_pos + rdr.pos; ret r; } fn parse_expr_from_source_str(name: str, source: @str, cfg: ast::crate_cfg, sess: parse_sess) -> @ast::expr { - let p = new_parser_from_source_str( - sess, cfg, name, codemap::fss_none, source); + let (p, rdr) = new_parser_etc_from_source_str(sess, cfg, name, + codemap::fss_none, source); let r = p.parse_expr(); - sess.chpos = p.reader.chpos; - sess.byte_pos = sess.byte_pos + p.reader.pos; + sess.chpos = rdr.chpos; + sess.byte_pos = sess.byte_pos + rdr.pos; ret r; } fn parse_item_from_source_str(name: str, source: @str, cfg: ast::crate_cfg, +attrs: [ast::attribute], vis: ast::visibility, sess: parse_sess) -> option<@ast::item> { - let p = new_parser_from_source_str( - sess, cfg, name, codemap::fss_none, source); + let (p, rdr) = new_parser_etc_from_source_str(sess, cfg, name, + codemap::fss_none, source); let r = p.parse_item(attrs, vis); - sess.chpos = p.reader.chpos; - sess.byte_pos = sess.byte_pos + p.reader.pos; + sess.chpos = rdr.chpos; + sess.byte_pos = sess.byte_pos + rdr.pos; ret r; } @@ -109,13 +112,14 @@ fn parse_from_source_str(f: fn (p: parser) -> T, sess: parse_sess) -> T { - let p = new_parser_from_source_str(sess, cfg, name, ss, source); + let (p, rdr) = new_parser_etc_from_source_str(sess, cfg, name, ss, + source); let r = f(p); if !p.reader.is_eof() { p.reader.fatal("expected end-of-string"); } - sess.chpos = p.reader.chpos; - sess.byte_pos = sess.byte_pos + p.reader.pos; + sess.chpos = rdr.chpos; + sess.byte_pos = sess.byte_pos + rdr.pos; ret r; } @@ -127,9 +131,9 @@ fn next_node_id(sess: parse_sess) -> node_id { ret rv; } -fn new_parser_from_source_str(sess: parse_sess, cfg: ast::crate_cfg, - +name: str, +ss: codemap::file_substr, - source: @str) -> parser { +fn new_parser_etc_from_source_str(sess: parse_sess, cfg: ast::crate_cfg, + +name: str, +ss: codemap::file_substr, + source: @str) -> (parser, string_reader) { let ftype = parser::SOURCE_FILE; let filemap = codemap::new_filemap_w_substr (name, ss, source, sess.chpos, sess.byte_pos); @@ -138,14 +142,21 @@ fn new_parser_from_source_str(sess: parse_sess, cfg: ast::crate_cfg, {|x|str::hash(*x)}, {|x,y|str::eq(*x, *y)} ); - let rdr = lexer::new_reader(sess.span_diagnostic, - filemap, itr); - ret parser(sess, cfg, rdr, ftype); + let srdr = lexer::new_string_reader(sess.span_diagnostic, filemap, itr); + ret (parser(sess, cfg, srdr as reader, ftype), srdr); } -fn new_parser_from_file(sess: parse_sess, cfg: ast::crate_cfg, +path: str, - ftype: parser::file_type) -> - parser { +fn new_parser_from_source_str(sess: parse_sess, cfg: ast::crate_cfg, + +name: str, +ss: codemap::file_substr, + source: @str) -> parser { + let (p, _) = new_parser_etc_from_source_str(sess, cfg, name, ss, source); + ret p; +} + + +fn new_parser_etc_from_file(sess: parse_sess, cfg: ast::crate_cfg, +path: str, + ftype: parser::file_type) -> + (parser, string_reader) { let res = io::read_whole_file_str(path); alt res { result::ok(_) { /* Continue. */ } @@ -158,6 +169,12 @@ fn new_parser_from_file(sess: parse_sess, cfg: ast::crate_cfg, +path: str, {|x|str::hash(*x)}, {|x,y|str::eq(*x, *y)} ); - let rdr = lexer::new_reader(sess.span_diagnostic, filemap, itr); - ret parser(sess, cfg, rdr, ftype); + let srdr = lexer::new_string_reader(sess.span_diagnostic, filemap, itr); + ret (parser(sess, cfg, srdr as reader, ftype), srdr); +} + +fn new_parser_from_file(sess: parse_sess, cfg: ast::crate_cfg, +path: str, + ftype: parser::file_type) -> parser { + let (p, _) = new_parser_etc_from_file(sess, cfg, path, ftype); + ret p; } diff --git a/src/libsyntax/parse/comments.rs b/src/libsyntax/parse/comments.rs index d37d517e7d3..53a6238d57f 100644 --- a/src/libsyntax/parse/comments.rs +++ b/src/libsyntax/parse/comments.rs @@ -1,7 +1,8 @@ import io::reader_util; import io::println;//XXXXXXXXxxx import util::interner; -import lexer::{ reader, new_reader, next_token, is_whitespace }; +import lexer::{ string_reader, bump, is_eof, nextch, new_string_reader, + is_whitespace, get_str_from, string_reader_as_reader }; export cmnt; export lit; @@ -17,45 +18,46 @@ enum cmnt_style { type cmnt = {style: cmnt_style, lines: [str], pos: uint}; -fn read_to_eol(rdr: reader) -> str { +fn read_to_eol(rdr: string_reader) -> str { let mut val = ""; - while rdr.curr != '\n' && !rdr.is_eof() { + while rdr.curr != '\n' && !is_eof(rdr) { str::push_char(val, rdr.curr); - rdr.bump(); + bump(rdr); } - if rdr.curr == '\n' { rdr.bump(); } + if rdr.curr == '\n' { bump(rdr); } ret val; } -fn read_one_line_comment(rdr: reader) -> str { +fn read_one_line_comment(rdr: string_reader) -> str { let val = read_to_eol(rdr); assert ((val[0] == '/' as u8 && val[1] == '/' as u8) || (val[0] == '#' as u8 && val[1] == '!' as u8)); ret val; } -fn consume_non_eol_whitespace(rdr: reader) { - while is_whitespace(rdr.curr) && rdr.curr != '\n' && !rdr.is_eof() { - rdr.bump(); +fn consume_non_eol_whitespace(rdr: string_reader) { + while is_whitespace(rdr.curr) && rdr.curr != '\n' && !is_eof(rdr) { + bump(rdr); } } -fn push_blank_line_comment(rdr: reader, &comments: [cmnt]) { +fn push_blank_line_comment(rdr: string_reader, &comments: [cmnt]) { #debug(">>> blank-line comment"); let v: [str] = []; comments += [{style: blank_line, lines: v, pos: rdr.chpos}]; } -fn consume_whitespace_counting_blank_lines(rdr: reader, &comments: [cmnt]) { - while is_whitespace(rdr.curr) && !rdr.is_eof() { +fn consume_whitespace_counting_blank_lines(rdr: string_reader, + &comments: [cmnt]) { + while is_whitespace(rdr.curr) && !is_eof(rdr) { if rdr.col == 0u && rdr.curr == '\n' { push_blank_line_comment(rdr, comments); } - rdr.bump(); + bump(rdr); } } -fn read_shebang_comment(rdr: reader, code_to_the_left: bool) -> cmnt { +fn read_shebang_comment(rdr: string_reader, code_to_the_left: bool) -> cmnt { #debug(">>> shebang comment"); let p = rdr.chpos; #debug("<<< shebang comment"); @@ -64,11 +66,11 @@ fn read_shebang_comment(rdr: reader, code_to_the_left: bool) -> cmnt { pos: p}; } -fn read_line_comments(rdr: reader, code_to_the_left: bool) -> cmnt { +fn read_line_comments(rdr: string_reader, code_to_the_left: bool) -> cmnt { #debug(">>> line comments"); let p = rdr.chpos; let mut lines: [str] = []; - while rdr.curr == '/' && rdr.next() == '/' { + while rdr.curr == '/' && nextch(rdr) == '/' { let line = read_one_line_comment(rdr); log(debug, line); lines += [line]; @@ -99,36 +101,36 @@ fn trim_whitespace_prefix_and_push_line(&lines: [str], lines += [s1]; } -fn read_block_comment(rdr: reader, code_to_the_left: bool) -> cmnt { +fn read_block_comment(rdr: string_reader, code_to_the_left: bool) -> cmnt { #debug(">>> block comment"); let p = rdr.chpos; let mut lines: [str] = []; let mut col: uint = rdr.col; - rdr.bump(); - rdr.bump(); + bump(rdr); + bump(rdr); let mut curr_line = "/*"; let mut level: int = 1; while level > 0 { #debug("=== block comment level %d", level); - if rdr.is_eof() { rdr.fatal("unterminated block comment"); } + if is_eof(rdr) {(rdr as reader).fatal("unterminated block comment");} if rdr.curr == '\n' { trim_whitespace_prefix_and_push_line(lines, curr_line, col); curr_line = ""; - rdr.bump(); + bump(rdr); } else { str::push_char(curr_line, rdr.curr); - if rdr.curr == '/' && rdr.next() == '*' { - rdr.bump(); - rdr.bump(); + if rdr.curr == '/' && nextch(rdr) == '*' { + bump(rdr); + bump(rdr); curr_line += "*"; level += 1; } else { - if rdr.curr == '*' && rdr.next() == '/' { - rdr.bump(); - rdr.bump(); + if rdr.curr == '*' && nextch(rdr) == '/' { + bump(rdr); + bump(rdr); curr_line += "/"; level -= 1; - } else { rdr.bump(); } + } else { bump(rdr); } } } } @@ -137,26 +139,27 @@ fn read_block_comment(rdr: reader, code_to_the_left: bool) -> cmnt { } let mut style = if code_to_the_left { trailing } else { isolated }; consume_non_eol_whitespace(rdr); - if !rdr.is_eof() && rdr.curr != '\n' && vec::len(lines) == 1u { + if !is_eof(rdr) && rdr.curr != '\n' && vec::len(lines) == 1u { style = mixed; } #debug("<<< block comment"); ret {style: style, lines: lines, pos: p}; } -fn peeking_at_comment(rdr: reader) -> bool { - ret ((rdr.curr == '/' && rdr.next() == '/') || - (rdr.curr == '/' && rdr.next() == '*')) || - (rdr.curr == '#' && rdr.next() == '!'); +fn peeking_at_comment(rdr: string_reader) -> bool { + ret ((rdr.curr == '/' && nextch(rdr) == '/') || + (rdr.curr == '/' && nextch(rdr) == '*')) || + (rdr.curr == '#' && nextch(rdr) == '!'); } -fn consume_comment(rdr: reader, code_to_the_left: bool, &comments: [cmnt]) { +fn consume_comment(rdr: string_reader, code_to_the_left: bool, + &comments: [cmnt]) { #debug(">>> consume comment"); - if rdr.curr == '/' && rdr.next() == '/' { + if rdr.curr == '/' && nextch(rdr) == '/' { comments += [read_line_comments(rdr, code_to_the_left)]; - } else if rdr.curr == '/' && rdr.next() == '*' { + } else if rdr.curr == '/' && nextch(rdr) == '*' { comments += [read_block_comment(rdr, code_to_the_left)]; - } else if rdr.curr == '#' && rdr.next() == '!' { + } else if rdr.curr == '#' && nextch(rdr) == '!' { comments += [read_shebang_comment(rdr, code_to_the_left)]; } else { fail; } #debug("<<< consume comment"); @@ -173,12 +176,12 @@ fn gather_comments_and_literals(span_diagnostic: diagnostic::span_handler, {|x|str::hash(*x)}, {|x,y|str::eq(*x, *y)} ); - let rdr = new_reader(span_diagnostic, - codemap::new_filemap(path, src, 0u, 0u), itr); + let rdr = new_string_reader(span_diagnostic, + codemap::new_filemap(path, src, 0u, 0u), itr); let mut comments: [cmnt] = []; let mut literals: [lit] = []; let mut first_read: bool = true; - while !rdr.is_eof() { + while !is_eof(rdr) { loop { let mut code_to_the_left = !first_read; consume_non_eol_whitespace(rdr); @@ -192,9 +195,10 @@ fn gather_comments_and_literals(span_diagnostic: diagnostic::span_handler, } break; } - let tok = next_token(rdr); + let bpos = rdr.pos; + let tok = rdr.next_token(); if token::is_lit(tok.tok) { - let s = rdr.get_str_from(tok.bpos); + let s = get_str_from(rdr, bpos); literals += [{lit: s, pos: tok.chpos}]; log(debug, "tok lit: " + s); } else { diff --git a/src/libsyntax/parse/common.rs b/src/libsyntax/parse/common.rs index c8e62a2245d..f8292be51fe 100644 --- a/src/libsyntax/parse/common.rs +++ b/src/libsyntax/parse/common.rs @@ -18,7 +18,7 @@ fn seq_sep_none() -> seq_sep { } fn token_to_str(reader: reader, ++token: token::token) -> str { - token::to_str(*reader.interner, token) + token::to_str(*reader.interner(), token) } // This should be done with traits, once traits work diff --git a/src/libsyntax/parse/eval.rs b/src/libsyntax/parse/eval.rs index 4b5632124d8..5ca9b22524b 100644 --- a/src/libsyntax/parse/eval.rs +++ b/src/libsyntax/parse/eval.rs @@ -64,11 +64,12 @@ fn parse_companion_mod(cx: ctx, prefix: str, suffix: option) #debug("looking for companion mod %s", modpath); if file_exists(modpath) { #debug("found companion mod"); - let p0 = new_parser_from_file(cx.sess, cx.cfg, modpath, SOURCE_FILE); + let (p0, r0) = new_parser_etc_from_file(cx.sess, cx.cfg, + modpath, SOURCE_FILE); let inner_attrs = p0.parse_inner_attrs_and_next(); let m0 = p0.parse_mod_items(token::EOF, inner_attrs.next); - cx.sess.chpos = p0.reader.chpos; - cx.sess.byte_pos = cx.sess.byte_pos + p0.reader.pos; + cx.sess.chpos = p0.reader.chpos(); + cx.sess.byte_pos = cx.sess.byte_pos + r0.pos; ret (m0.view_items, m0.items, inner_attrs.inner); } else { ret ([], [], []); @@ -94,8 +95,8 @@ fn eval_crate_directive(cx: ctx, cdir: @ast::crate_directive, prefix: str, if path::path_is_absolute(*file_path) { *file_path } else { prefix + path::path_sep() + *file_path }; - let p0 = - new_parser_from_file(cx.sess, cx.cfg, full_path, SOURCE_FILE); + let (p0, r0) = + new_parser_etc_from_file(cx.sess, cx.cfg, full_path, SOURCE_FILE); let inner_attrs = p0.parse_inner_attrs_and_next(); let mod_attrs = attrs + inner_attrs.inner; let first_item_outer_attrs = inner_attrs.next; @@ -105,8 +106,8 @@ fn eval_crate_directive(cx: ctx, cdir: @ast::crate_directive, prefix: str, /* FIXME: bad */ copy id, ast::item_mod(m0), ast::public, mod_attrs); // Thread defids, chpos and byte_pos through the parsers - cx.sess.chpos = p0.reader.chpos; - cx.sess.byte_pos = cx.sess.byte_pos + p0.reader.pos; + cx.sess.chpos = p0.reader.chpos(); + cx.sess.byte_pos = cx.sess.byte_pos + r0.pos; items += [i]; } ast::cdir_dir_mod(id, cdirs, attrs) { diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs index 72b2462feb1..e5be37dc4f0 100644 --- a/src/libsyntax/parse/lexer.rs +++ b/src/libsyntax/parse/lexer.rs @@ -2,9 +2,19 @@ import util::interner; import util::interner::intern; import diagnostic; -export reader, new_reader, next_token, is_whitespace; +export reader, string_reader, new_string_reader, is_whitespace; +export nextch, is_eof, bump, get_str_from; +export string_reader_as_reader; -type reader = @{ +iface reader { + fn is_eof() -> bool; + fn next_token() -> {tok: token::token, chpos: uint}; + fn fatal(str) -> !; + fn chpos() -> uint; + fn interner() -> @interner::interner<@str>; +} + +type string_reader = @{ span_diagnostic: diagnostic::span_handler, src: @str, mut col: uint, @@ -15,47 +25,64 @@ type reader = @{ interner: @interner::interner<@str> }; -impl reader for reader { - fn is_eof() -> bool { self.curr == -1 as char } - fn get_str_from(start: uint) -> str unsafe { - // I'm pretty skeptical about this subtraction. What if there's a - // multi-byte character before the mark? - ret str::slice(*self.src, start - 1u, self.pos - 1u); - } - fn next() -> char { - if self.pos < (*self.src).len() { - ret str::char_at(*self.src, self.pos); - } else { ret -1 as char; } - } - fn bump() { - if self.pos < (*self.src).len() { - self.col += 1u; - self.chpos += 1u; - if self.curr == '\n' { - codemap::next_line(self.filemap, self.chpos, self.pos); - self.col = 0u; - } - let next = str::char_range_at(*self.src, self.pos); - self.pos = next.next; - self.curr = next.ch; +impl string_reader_as_reader of reader for string_reader { + fn is_eof() -> bool { is_eof(self) } + fn next_token() -> {tok: token::token, chpos: uint} { + consume_whitespace_and_comments(self); + let start_chpos = self.chpos; + let tok = if is_eof(self) { + token::EOF } else { - if (self.curr != -1 as char) { - self.col += 1u; - self.chpos += 1u; - self.curr = -1 as char; - } - } + next_token_inner(self) + }; + ret {tok: tok, chpos: start_chpos}; } fn fatal(m: str) -> ! { self.span_diagnostic.span_fatal( ast_util::mk_sp(self.chpos, self.chpos), m) } + fn chpos() -> uint { self.chpos } + fn interner() -> @interner::interner<@str> { self.interner } } -fn new_reader(span_diagnostic: diagnostic::span_handler, - filemap: codemap::filemap, - itr: @interner::interner<@str>) -> reader { +fn get_str_from(rdr: string_reader, start: uint) -> str unsafe { + // I'm pretty skeptical about this subtraction. What if there's a + // multi-byte character before the mark? + ret str::slice(*rdr.src, start - 1u, rdr.pos - 1u); +} + +fn bump(rdr: string_reader) { + if rdr.pos < (*rdr.src).len() { + rdr.col += 1u; + rdr.chpos += 1u; + if rdr.curr == '\n' { + codemap::next_line(rdr.filemap, rdr.chpos, rdr.pos); + rdr.col = 0u; + } + let next = str::char_range_at(*rdr.src, rdr.pos); + rdr.pos = next.next; + rdr.curr = next.ch; + } else { + if (rdr.curr != -1 as char) { + rdr.col += 1u; + rdr.chpos += 1u; + rdr.curr = -1 as char; + } + } +} +fn is_eof(rdr: string_reader) -> bool { + rdr.curr == -1 as char +} +fn nextch(rdr: string_reader) -> char { + if rdr.pos < (*rdr.src).len() { + ret str::char_at(*rdr.src, rdr.pos); + } else { ret -1 as char; } +} + +fn new_string_reader(span_diagnostic: diagnostic::span_handler, + filemap: codemap::filemap, + itr: @interner::interner<@str>) -> string_reader { let r = @{span_diagnostic: span_diagnostic, src: filemap.src, mut col: 0u, mut pos: 0u, mut curr: -1 as char, mut chpos: filemap.start_pos.ch, @@ -102,50 +129,50 @@ fn is_hex_digit(c: char) -> bool { fn is_bin_digit(c: char) -> bool { ret c == '0' || c == '1'; } -fn consume_whitespace_and_comments(rdr: reader) { - while is_whitespace(rdr.curr) { rdr.bump(); } +fn consume_whitespace_and_comments(rdr: string_reader) { + while is_whitespace(rdr.curr) { bump(rdr); } ret consume_any_line_comment(rdr); } -fn consume_any_line_comment(rdr: reader) { +fn consume_any_line_comment(rdr: string_reader) { if rdr.curr == '/' { - alt rdr.next() { + alt nextch(rdr) { '/' { - while rdr.curr != '\n' && !rdr.is_eof() { rdr.bump(); } + while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); } // Restart whitespace munch. ret consume_whitespace_and_comments(rdr); } - '*' { rdr.bump(); rdr.bump(); ret consume_block_comment(rdr); } + '*' { bump(rdr); bump(rdr); ret consume_block_comment(rdr); } _ { ret; } } } else if rdr.curr == '#' { - if rdr.next() == '!' { + if nextch(rdr) == '!' { let cmap = codemap::new_codemap(); (*cmap).files.push(rdr.filemap); let loc = codemap::lookup_char_pos_adj(cmap, rdr.chpos); if loc.line == 1u && loc.col == 0u { - while rdr.curr != '\n' && !rdr.is_eof() { rdr.bump(); } + while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); } ret consume_whitespace_and_comments(rdr); } } } } -fn consume_block_comment(rdr: reader) { +fn consume_block_comment(rdr: string_reader) { let mut level: int = 1; while level > 0 { - if rdr.is_eof() { rdr.fatal("unterminated block comment"); } - if rdr.curr == '/' && rdr.next() == '*' { - rdr.bump(); - rdr.bump(); + if is_eof(rdr) { rdr.fatal("unterminated block comment"); } + if rdr.curr == '/' && nextch(rdr) == '*' { + bump(rdr); + bump(rdr); level += 1; } else { - if rdr.curr == '*' && rdr.next() == '/' { - rdr.bump(); - rdr.bump(); + if rdr.curr == '*' && nextch(rdr) == '/' { + bump(rdr); + bump(rdr); level -= 1; - } else { rdr.bump(); } + } else { bump(rdr); } } } // restart whitespace munch. @@ -153,16 +180,16 @@ fn consume_block_comment(rdr: reader) { ret consume_whitespace_and_comments(rdr); } -fn scan_exponent(rdr: reader) -> option { +fn scan_exponent(rdr: string_reader) -> option { let mut c = rdr.curr; let mut rslt = ""; if c == 'e' || c == 'E' { str::push_char(rslt, c); - rdr.bump(); + bump(rdr); c = rdr.curr; if c == '-' || c == '+' { str::push_char(rslt, c); - rdr.bump(); + bump(rdr); } let exponent = scan_digits(rdr, 10u); if str::len(exponent) > 0u { @@ -171,62 +198,62 @@ fn scan_exponent(rdr: reader) -> option { } else { ret none::; } } -fn scan_digits(rdr: reader, radix: uint) -> str { +fn scan_digits(rdr: string_reader, radix: uint) -> str { let mut rslt = ""; loop { let c = rdr.curr; - if c == '_' { rdr.bump(); cont; } + if c == '_' { bump(rdr); cont; } alt char::to_digit(c, radix) { some(d) { str::push_char(rslt, c); - rdr.bump(); + bump(rdr); } _ { ret rslt; } } }; } -fn scan_number(c: char, rdr: reader) -> token::token { - let mut num_str, base = 10u, c = c, n = rdr.next(); +fn scan_number(c: char, rdr: string_reader) -> token::token { + let mut num_str, base = 10u, c = c, n = nextch(rdr); if c == '0' && n == 'x' { - rdr.bump(); - rdr.bump(); + bump(rdr); + bump(rdr); base = 16u; } else if c == '0' && n == 'b' { - rdr.bump(); - rdr.bump(); + bump(rdr); + bump(rdr); base = 2u; } num_str = scan_digits(rdr, base); c = rdr.curr; - rdr.next(); + nextch(rdr); if c == 'u' || c == 'i' { let signed = c == 'i'; let mut tp = { if signed { either::left(ast::ty_i) } else { either::right(ast::ty_u) } }; - rdr.bump(); + bump(rdr); c = rdr.curr; if c == '8' { - rdr.bump(); + bump(rdr); tp = if signed { either::left(ast::ty_i8) } else { either::right(ast::ty_u8) }; } - n = rdr.next(); + n = nextch(rdr); if c == '1' && n == '6' { - rdr.bump(); - rdr.bump(); + bump(rdr); + bump(rdr); tp = if signed { either::left(ast::ty_i16) } else { either::right(ast::ty_u16) }; } else if c == '3' && n == '2' { - rdr.bump(); - rdr.bump(); + bump(rdr); + bump(rdr); tp = if signed { either::left(ast::ty_i32) } else { either::right(ast::ty_u32) }; } else if c == '6' && n == '4' { - rdr.bump(); - rdr.bump(); + bump(rdr); + bump(rdr); tp = if signed { either::left(ast::ty_i64) } else { either::right(ast::ty_u64) }; } @@ -240,9 +267,9 @@ fn scan_number(c: char, rdr: reader) -> token::token { } } let mut is_float = false; - if rdr.curr == '.' && !(is_alpha(rdr.next()) || rdr.next() == '_') { + if rdr.curr == '.' && !(is_alpha(nextch(rdr)) || nextch(rdr) == '_') { is_float = true; - rdr.bump(); + bump(rdr); let dec_part = scan_digits(rdr, 10u); num_str += "." + dec_part; } @@ -254,17 +281,17 @@ fn scan_number(c: char, rdr: reader) -> token::token { none {} } if rdr.curr == 'f' { - rdr.bump(); + bump(rdr); c = rdr.curr; - n = rdr.next(); + n = nextch(rdr); if c == '3' && n == '2' { - rdr.bump(); - rdr.bump(); + bump(rdr); + bump(rdr); ret token::LIT_FLOAT(intern(*rdr.interner, @num_str), ast::ty_f32); } else if c == '6' && n == '4' { - rdr.bump(); - rdr.bump(); + bump(rdr); + bump(rdr); ret token::LIT_FLOAT(intern(*rdr.interner, @num_str), ast::ty_f64); /* FIXME: if this is out of range for either a 32-bit or @@ -289,11 +316,11 @@ fn scan_number(c: char, rdr: reader) -> token::token { } } -fn scan_numeric_escape(rdr: reader, n_hex_digits: uint) -> char { +fn scan_numeric_escape(rdr: string_reader, n_hex_digits: uint) -> char { let mut accum_int = 0, i = n_hex_digits; while i != 0u { let n = rdr.curr; - rdr.bump(); + bump(rdr); if !is_hex_digit(n) { rdr.fatal(#fmt["illegal numeric character escape: %d", n as int]); } @@ -304,15 +331,7 @@ fn scan_numeric_escape(rdr: reader, n_hex_digits: uint) -> char { ret accum_int as char; } -fn next_token(rdr: reader) -> {tok: token::token, chpos: uint, bpos: uint} { - consume_whitespace_and_comments(rdr); - let start_chpos = rdr.chpos; - let start_bpos = rdr.pos; - let tok = if rdr.is_eof() { token::EOF } else { next_token_inner(rdr) }; - ret {tok: tok, chpos: start_chpos, bpos: start_bpos}; -} - -fn next_token_inner(rdr: reader) -> token::token { +fn next_token_inner(rdr: string_reader) -> token::token { let mut accum_str = ""; let mut c = rdr.curr; if (c >= 'a' && c <= 'z') @@ -325,11 +344,11 @@ fn next_token_inner(rdr: reader) -> token::token { || c == '_' || (c > 'z' && char::is_XID_continue(c)) { str::push_char(accum_str, c); - rdr.bump(); + bump(rdr); c = rdr.curr; } if str::eq(accum_str, "_") { ret token::UNDERSCORE; } - let is_mod_name = c == ':' && rdr.next() == ':'; + let is_mod_name = c == ':' && nextch(rdr) == ':'; // FIXME: perform NFKC normalization here. (Issue #2253) ret token::IDENT(interner::intern(*rdr.interner, @@ -338,10 +357,10 @@ fn next_token_inner(rdr: reader) -> token::token { if is_dec_digit(c) { ret scan_number(c, rdr); } - fn binop(rdr: reader, op: token::binop) -> token::token { - rdr.bump(); + fn binop(rdr: string_reader, op: token::binop) -> token::token { + bump(rdr); if rdr.curr == '=' { - rdr.bump(); + bump(rdr); ret token::BINOPEQ(op); } else { ret token::BINOP(op); } } @@ -352,35 +371,35 @@ fn next_token_inner(rdr: reader) -> token::token { // One-byte tokens. - ';' { rdr.bump(); ret token::SEMI; } - ',' { rdr.bump(); ret token::COMMA; } + ';' { bump(rdr); ret token::SEMI; } + ',' { bump(rdr); ret token::COMMA; } '.' { - rdr.bump(); - if rdr.curr == '.' && rdr.next() == '.' { - rdr.bump(); - rdr.bump(); + bump(rdr); + if rdr.curr == '.' && nextch(rdr) == '.' { + bump(rdr); + bump(rdr); ret token::ELLIPSIS; } ret token::DOT; } - '(' { rdr.bump(); ret token::LPAREN; } - ')' { rdr.bump(); ret token::RPAREN; } - '{' { rdr.bump(); ret token::LBRACE; } - '}' { rdr.bump(); ret token::RBRACE; } - '[' { rdr.bump(); ret token::LBRACKET; } - ']' { rdr.bump(); ret token::RBRACKET; } - '@' { rdr.bump(); ret token::AT; } - '#' { rdr.bump(); ret token::POUND; } - '~' { rdr.bump(); ret token::TILDE; } + '(' { bump(rdr); ret token::LPAREN; } + ')' { bump(rdr); ret token::RPAREN; } + '{' { bump(rdr); ret token::LBRACE; } + '}' { bump(rdr); ret token::RBRACE; } + '[' { bump(rdr); ret token::LBRACKET; } + ']' { bump(rdr); ret token::RBRACKET; } + '@' { bump(rdr); ret token::AT; } + '#' { bump(rdr); ret token::POUND; } + '~' { bump(rdr); ret token::TILDE; } ':' { - rdr.bump(); + bump(rdr); if rdr.curr == ':' { - rdr.bump(); + bump(rdr); ret token::MOD_SEP; } else { ret token::COLON; } } - '$' { rdr.bump(); ret token::DOLLAR; } + '$' { bump(rdr); ret token::DOLLAR; } @@ -388,33 +407,33 @@ fn next_token_inner(rdr: reader) -> token::token { // Multi-byte tokens. '=' { - rdr.bump(); + bump(rdr); if rdr.curr == '=' { - rdr.bump(); + bump(rdr); ret token::EQEQ; } else if rdr.curr == '>' { - rdr.bump(); + bump(rdr); ret token::FAT_ARROW; } else { ret token::EQ; } } '!' { - rdr.bump(); + bump(rdr); if rdr.curr == '=' { - rdr.bump(); + bump(rdr); ret token::NE; } else { ret token::NOT; } } '<' { - rdr.bump(); + bump(rdr); alt rdr.curr { - '=' { rdr.bump(); ret token::LE; } + '=' { bump(rdr); ret token::LE; } '<' { ret binop(rdr, token::SHL); } '-' { - rdr.bump(); + bump(rdr); alt rdr.curr { - '>' { rdr.bump(); ret token::DARROW; } + '>' { bump(rdr); ret token::DARROW; } _ { ret token::LARROW; } } } @@ -422,20 +441,20 @@ fn next_token_inner(rdr: reader) -> token::token { } } '>' { - rdr.bump(); + bump(rdr); alt rdr.curr { - '=' { rdr.bump(); ret token::GE; } + '=' { bump(rdr); ret token::GE; } '>' { ret binop(rdr, token::SHR); } _ { ret token::GT; } } } '\'' { - rdr.bump(); + bump(rdr); let mut c2 = rdr.curr; - rdr.bump(); + bump(rdr); if c2 == '\\' { let escaped = rdr.curr; - rdr.bump(); + bump(rdr); alt escaped { 'n' { c2 = '\n'; } 'r' { c2 = '\r'; } @@ -454,24 +473,24 @@ fn next_token_inner(rdr: reader) -> token::token { if rdr.curr != '\'' { rdr.fatal("unterminated character constant"); } - rdr.bump(); // advance curr past token + bump(rdr); // advance curr past token ret token::LIT_INT(c2 as i64, ast::ty_char); } '"' { let n = rdr.chpos; - rdr.bump(); + bump(rdr); while rdr.curr != '"' { - if rdr.is_eof() { + if is_eof(rdr) { rdr.fatal(#fmt["unterminated double quote string: %s", - rdr.get_str_from(n)]); + get_str_from(rdr, n)]); } let ch = rdr.curr; - rdr.bump(); + bump(rdr); alt ch { '\\' { let escaped = rdr.curr; - rdr.bump(); + bump(rdr); alt escaped { 'n' { str::push_char(accum_str, '\n'); } 'r' { str::push_char(accum_str, '\r'); } @@ -496,27 +515,27 @@ fn next_token_inner(rdr: reader) -> token::token { _ { str::push_char(accum_str, ch); } } } - rdr.bump(); + bump(rdr); ret token::LIT_STR(interner::intern(*rdr.interner, @accum_str)); } '-' { - if rdr.next() == '>' { - rdr.bump(); - rdr.bump(); + if nextch(rdr) == '>' { + bump(rdr); + bump(rdr); ret token::RARROW; } else { ret binop(rdr, token::MINUS); } } '&' { - if rdr.next() == '&' { - rdr.bump(); - rdr.bump(); + if nextch(rdr) == '&' { + bump(rdr); + bump(rdr); ret token::ANDAND; } else { ret binop(rdr, token::AND); } } '|' { - alt rdr.next() { - '|' { rdr.bump(); rdr.bump(); ret token::OROR; } + alt nextch(rdr) { + '|' { bump(rdr); bump(rdr); ret token::OROR; } _ { ret binop(rdr, token::OR); } } } @@ -529,8 +548,8 @@ fn next_token_inner(rdr: reader) -> token::token { } } -fn consume_whitespace(rdr: reader) { - while is_whitespace(rdr.curr) && !rdr.is_eof() { rdr.bump(); } +fn consume_whitespace(rdr: string_reader) { + while is_whitespace(rdr.curr) && !is_eof(rdr) { bump(rdr); } } diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index c85cbb93855..1bf407f31c0 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -71,10 +71,11 @@ class parser { let keywords: hashmap; let restricted_keywords: hashmap; - new(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, - ftype: file_type) { - let tok0 = lexer::next_token(rdr); - let span0 = ast_util::mk_sp(tok0.chpos, rdr.chpos); + new(sess: parse_sess, cfg: ast::crate_cfg, +rdr: reader, ftype: file_type) + { + self.reader <- rdr; + let tok0 = self.reader.next_token(); + let span0 = ast_util::mk_sp(tok0.chpos, self.reader.chpos()); self.sess = sess; self.cfg = cfg; self.file_type = ftype; @@ -90,7 +91,6 @@ class parser { self.buffer_start = 0; self.buffer_end = 0; self.restriction = UNRESTRICTED; - self.reader = rdr; self.keywords = token::keyword_table(); self.restricted_keywords = token::restricted_keyword_table(); } @@ -101,9 +101,9 @@ class parser { fn bump() { self.last_span = self.span; if self.buffer_start == self.buffer_end { - let next = lexer::next_token(self.reader); + let next = self.reader.next_token(); self.token = next.tok; - self.span = mk_sp(next.chpos, self.reader.chpos); + self.span = mk_sp(next.chpos, self.reader.chpos()); } else { let next = self.buffer[self.buffer_start]; self.buffer_start = (self.buffer_start + 1) & 3; @@ -124,8 +124,8 @@ class parser { fn look_ahead(distance: uint) -> token::token { let dist = distance as int; while self.buffer_length() < dist { - let next = lexer::next_token(self.reader); - let sp = mk_sp(next.chpos, self.reader.chpos); + let next = self.reader.next_token(); + let sp = mk_sp(next.chpos, self.reader.chpos()); self.buffer[self.buffer_end] = {tok: next.tok, span: sp}; self.buffer_end = (self.buffer_end + 1) & 3; } @@ -144,7 +144,7 @@ class parser { self.sess.span_diagnostic.span_warn(copy self.span, m) } fn get_str(i: token::str_num) -> @str { - interner::get(*self.reader.interner, i) + interner::get(*self.reader.interner(), i) } fn get_id() -> node_id { next_node_id(self.sess) } @@ -1060,7 +1060,7 @@ class parser { fn parse_token_tree() -> token_tree { #[doc="what's the opposite delimiter?"] - fn flip(t: token::token) -> token::token { + fn flip(&t: token::token) -> token::token { alt t { token::LPAREN { token::RPAREN } token::LBRACE { token::RBRACE }