Convert lexer to istrs. Issue #855

This commit is contained in:
Brian Anderson 2011-08-27 00:07:03 -07:00
parent d2ae28fc99
commit 58dedcd090
5 changed files with 134 additions and 102 deletions

View File

@ -3,6 +3,7 @@
import std::int;
import std::vec;
import std::str;
import std::istr;
import std::map;
import std::map::hashmap;
import std::option;
@ -19,44 +20,44 @@
fn next() -> char;
fn init();
fn bump();
fn get_str_from(uint) -> str;
fn get_interner() -> @interner::interner<str>;
fn get_str_from(uint) -> istr;
fn get_interner() -> @interner::interner<istr>;
fn get_chpos() -> uint;
fn get_byte_pos() -> uint;
fn get_col() -> uint;
fn get_filemap() -> codemap::filemap;
fn err(str);
fn err(&istr);
};
fn new_reader(cm: &codemap::codemap, src: str, filemap: codemap::filemap,
itr: @interner::interner<str>) -> reader {
fn new_reader(cm: &codemap::codemap, src: &istr, filemap: codemap::filemap,
itr: @interner::interner<istr>) -> reader {
obj reader(cm: codemap::codemap,
src: str,
src: istr,
len: uint,
mutable col: uint,
mutable pos: uint,
mutable ch: char,
mutable chpos: uint,
mutable strs: [str],
mutable strs: [istr],
fm: codemap::filemap,
itr: @interner::interner<str>) {
itr: @interner::interner<istr>) {
fn is_eof() -> bool { ret ch == -1 as char; }
fn get_str_from(start: uint) -> str {
fn get_str_from(start: uint) -> istr {
// I'm pretty skeptical about this subtraction. What if there's a
// multi-byte character before the mark?
ret str::slice(src, start - 1u, pos - 1u);
ret istr::slice(src, start - 1u, pos - 1u);
}
fn get_chpos() -> uint { ret chpos; }
fn get_byte_pos() -> uint { ret pos; }
fn curr() -> char { ret ch; }
fn next() -> char {
if pos < len {
ret str::char_at(src, pos);
ret istr::char_at(src, pos);
} else { ret -1 as char; }
}
fn init() {
if pos < len {
let next = str::char_range_at(src, pos);
let next = istr::char_range_at(src, pos);
pos = next.next;
ch = next.ch;
}
@ -69,21 +70,23 @@ fn bump() {
codemap::next_line(fm, chpos, pos + fm.start_pos.byte);
col = 0u;
}
let next = str::char_range_at(src, pos);
let next = istr::char_range_at(src, pos);
pos = next.next;
ch = next.ch;
} else { ch = -1 as char; }
}
fn get_interner() -> @interner::interner<str> { ret itr; }
fn get_interner() -> @interner::interner<istr> { ret itr; }
fn get_col() -> uint { ret col; }
fn get_filemap() -> codemap::filemap { ret fm; }
fn err(m: str) {
codemap::emit_error(some(ast_util::mk_sp(chpos, chpos)), m, cm);
fn err(m: &istr) {
codemap::emit_error(
some(ast_util::mk_sp(chpos, chpos)),
istr::to_estr(m), cm);
}
}
let strs: [str] = [];
let strs: [istr] = [];
let rd =
reader(cm, src, str::byte_len(src), 0u, 0u, -1 as char,
reader(cm, src, istr::byte_len(src), 0u, 0u, -1 as char,
filemap.start_pos.ch, strs, filemap, itr);
rd.init();
ret rd;
@ -146,7 +149,9 @@ fn consume_any_line_comment(rdr: &reader) {
fn consume_block_comment(rdr: &reader) {
let level: int = 1;
while level > 0 {
if rdr.is_eof() { rdr.err("unterminated block comment"); fail; }
if rdr.is_eof() {
rdr.err(~"unterminated block comment"); fail;
}
if rdr.curr() == '/' && rdr.next() == '*' {
rdr.bump();
rdr.bump();
@ -164,35 +169,35 @@ fn consume_block_comment(rdr: &reader) {
be consume_whitespace_and_comments(rdr);
}
fn digits_to_string(s: str) -> int {
fn digits_to_string(s: &istr) -> int {
let accum_int: int = 0;
for c: u8 in s { accum_int *= 10; accum_int += dec_digit_val(c as char); }
ret accum_int;
}
fn scan_exponent(rdr: &reader) -> option::t<str> {
fn scan_exponent(rdr: &reader) -> option::t<istr> {
let c = rdr.curr();
let rslt = "";
let rslt = ~"";
if c == 'e' || c == 'E' {
rslt += str::unsafe_from_bytes([c as u8]);
rslt += istr::unsafe_from_bytes([c as u8]);
rdr.bump();
c = rdr.curr();
if c == '-' || c == '+' {
rslt += str::unsafe_from_bytes([c as u8]);
rslt += istr::unsafe_from_bytes([c as u8]);
rdr.bump();
}
let exponent = scan_dec_digits(rdr);
if str::byte_len(exponent) > 0u {
if istr::byte_len(exponent) > 0u {
ret some(rslt + exponent);
} else { rdr.err("scan_exponent: bad fp literal"); fail; }
} else { ret none::<str>; }
} else { rdr.err(~"scan_exponent: bad fp literal"); fail; }
} else { ret none::<istr>; }
}
fn scan_dec_digits(rdr: &reader) -> str {
fn scan_dec_digits(rdr: &reader) -> istr {
let c = rdr.curr();
let rslt: str = "";
let rslt: istr = ~"";
while is_dec_digit(c) || c == '_' {
if c != '_' { rslt += str::unsafe_from_bytes([c as u8]); }
if c != '_' { rslt += istr::unsafe_from_bytes([c as u8]); }
rdr.bump();
c = rdr.curr();
}
@ -201,7 +206,7 @@ fn scan_dec_digits(rdr: &reader) -> str {
fn scan_number(c: char, rdr: &reader) -> token::token {
let accum_int = 0;
let dec_str: str = "";
let dec_str: istr = ~"";
let is_dec_integer: bool = false;
let n = rdr.next();
if c == '0' && n == 'x' {
@ -272,7 +277,7 @@ fn scan_number(c: char, rdr: &reader) -> token::token {
rdr.bump();
let dec_part = scan_dec_digits(rdr);
let float_str = dec_str + "." + dec_part;
let float_str = dec_str + ~"." + dec_part;
c = rdr.curr();
let exponent_str = scan_exponent(rdr);
alt exponent_str { some(s) { float_str += s; } none. { } }
@ -298,15 +303,17 @@ fn scan_number(c: char, rdr: &reader) -> token::token {
}
} else {
ret token::LIT_FLOAT(interner::intern::<str>(*rdr.get_interner(),
float_str));
ret token::LIT_FLOAT(interner::intern::<istr>(
*rdr.get_interner(),
float_str));
}
}
let maybe_exponent = scan_exponent(rdr);
alt maybe_exponent {
some(s) {
ret token::LIT_FLOAT(interner::intern::<str>(*rdr.get_interner(),
dec_str + s));
ret token::LIT_FLOAT(interner::intern::<istr>(
*rdr.get_interner(),
dec_str + s));
}
none. { ret token::LIT_INT(accum_int); }
}
@ -318,7 +325,9 @@ fn scan_numeric_escape(rdr: &reader, n_hex_digits: uint) -> char {
let n = rdr.curr();
rdr.bump();
if !is_hex_digit(n) {
rdr.err(#fmt["illegal numeric character escape: %d", n as int]);
rdr.err(
istr::from_estr(
#fmt["illegal numeric character escape: %d", n as int]));
fail;
}
accum_int *= 16;
@ -337,18 +346,19 @@ fn next_token(rdr: &reader) -> {tok: token::token, chpos: uint, bpos: uint} {
}
fn next_token_inner(rdr: &reader) -> token::token {
let accum_str = "";
let accum_str = ~"";
let c = rdr.curr();
if is_alpha(c) || c == '_' {
while is_alnum(c) || c == '_' {
str::push_char(accum_str, c);
istr::push_char(accum_str, c);
rdr.bump();
c = rdr.curr();
}
if str::eq(accum_str, "_") { ret token::UNDERSCORE; }
if istr::eq(accum_str, ~"_") { ret token::UNDERSCORE; }
let is_mod_name = c == ':' && rdr.next() == ':';
ret token::IDENT(interner::intern::<str>(*rdr.get_interner(),
accum_str), is_mod_name);
ret token::IDENT(interner::intern::<istr>(
*rdr.get_interner(),
accum_str), is_mod_name);
}
if is_dec_digit(c) { ret scan_number(c, rdr); }
fn binop(rdr: &reader, op: token::binop) -> token::token {
@ -460,13 +470,15 @@ fn binop(rdr: &reader, op: token::binop) -> token::token {
'u' { c2 = scan_numeric_escape(rdr, 4u); }
'U' { c2 = scan_numeric_escape(rdr, 8u); }
c2 {
rdr.err(#fmt["unknown character escape: %d", c2 as int]);
rdr.err(
istr::from_estr(#fmt["unknown character escape: %d",
c2 as int]));
fail;
}
}
}
if rdr.curr() != '\'' {
rdr.err("unterminated character constant");
rdr.err(~"unterminated character constant");
fail;
}
rdr.bump(); // advance curr past token
@ -483,33 +495,36 @@ fn binop(rdr: &reader, op: token::binop) -> token::token {
let escaped = rdr.curr();
rdr.bump();
alt escaped {
'n' { str::push_byte(accum_str, '\n' as u8); }
'r' { str::push_byte(accum_str, '\r' as u8); }
't' { str::push_byte(accum_str, '\t' as u8); }
'\\' { str::push_byte(accum_str, '\\' as u8); }
'"' { str::push_byte(accum_str, '"' as u8); }
'n' { istr::push_byte(accum_str, '\n' as u8); }
'r' { istr::push_byte(accum_str, '\r' as u8); }
't' { istr::push_byte(accum_str, '\t' as u8); }
'\\' { istr::push_byte(accum_str, '\\' as u8); }
'"' { istr::push_byte(accum_str, '"' as u8); }
'\n' { consume_whitespace(rdr); }
'x' {
str::push_char(accum_str, scan_numeric_escape(rdr, 2u));
istr::push_char(accum_str, scan_numeric_escape(rdr, 2u));
}
'u' {
str::push_char(accum_str, scan_numeric_escape(rdr, 4u));
istr::push_char(accum_str, scan_numeric_escape(rdr, 4u));
}
'U' {
str::push_char(accum_str, scan_numeric_escape(rdr, 8u));
istr::push_char(accum_str, scan_numeric_escape(rdr, 8u));
}
c2 {
rdr.err(#fmt["unknown string escape: %d", c2 as int]);
rdr.err(
istr::from_estr(#fmt["unknown string escape: %d",
c2 as int]));
fail;
}
}
}
_ { str::push_char(accum_str, ch); }
_ { istr::push_char(accum_str, ch); }
}
}
rdr.bump();
ret token::LIT_STR(interner::intern::<str>(*rdr.get_interner(),
accum_str));
ret token::LIT_STR(interner::intern::<istr>(
*rdr.get_interner(),
accum_str));
}
'-' {
if rdr.next() == '>' {
@ -536,7 +551,11 @@ fn binop(rdr: &reader, op: token::binop) -> token::token {
'/' { ret binop(rdr, token::SLASH); }
'^' { ret binop(rdr, token::CARET); }
'%' { ret binop(rdr, token::PERCENT); }
c { rdr.err(#fmt["unkown start of token: %d", c as int]); fail; }
c {
rdr.err(
istr::from_estr(#fmt["unkown start of token: %d", c as int]));
fail;
}
}
}
@ -547,19 +566,19 @@ fn binop(rdr: &reader, op: token::binop) -> token::token {
blank_line; // Just a manual blank line "\n\n", for layout
}
type cmnt = {style: cmnt_style, lines: [str], pos: uint};
type cmnt = {style: cmnt_style, lines: [istr], pos: uint};
fn read_to_eol(rdr: &reader) -> str {
let val = "";
fn read_to_eol(rdr: &reader) -> istr {
let val = ~"";
while rdr.curr() != '\n' && !rdr.is_eof() {
str::push_char(val, rdr.curr());
istr::push_char(val, rdr.curr());
rdr.bump();
}
if rdr.curr() == '\n' { rdr.bump(); }
ret val;
}
fn read_one_line_comment(rdr: &reader) -> str {
fn read_one_line_comment(rdr: &reader) -> istr {
let val = read_to_eol(rdr);
assert (val[0] == '/' as u8 && val[1] == '/' as u8);
ret val;
@ -577,7 +596,7 @@ fn consume_non_eol_whitespace(rdr: &reader) {
fn push_blank_line_comment(rdr: &reader, comments: &mutable [cmnt]) {
log ">>> blank-line comment";
let v: [str] = [];
let v: [istr] = [];
comments += [{style: blank_line, lines: v, pos: rdr.get_chpos()}];
}
@ -594,7 +613,7 @@ fn consume_whitespace_counting_blank_lines(rdr: &reader,
fn read_line_comments(rdr: &reader, code_to_the_left: bool) -> cmnt {
log ">>> line comments";
let p = rdr.get_chpos();
let lines: [str] = [];
let lines: [istr] = [];
while rdr.curr() == '/' && rdr.next() == '/' {
let line = read_one_line_comment(rdr);
log line;
@ -607,58 +626,58 @@ fn read_line_comments(rdr: &reader, code_to_the_left: bool) -> cmnt {
pos: p};
}
fn all_whitespace(s: &str, begin: uint, end: uint) -> bool {
fn all_whitespace(s: &istr, begin: uint, end: uint) -> bool {
let i: uint = begin;
while i != end { if !is_whitespace(s[i] as char) { ret false; } i += 1u; }
ret true;
}
fn trim_whitespace_prefix_and_push_line(lines: &mutable [str], s: &str,
fn trim_whitespace_prefix_and_push_line(lines: &mutable [istr], s: &istr,
col: uint) {
let s1;
if all_whitespace(s, 0u, col) {
if col < str::byte_len(s) {
s1 = str::slice(s, col, str::byte_len(s));
} else { s1 = ""; }
if col < istr::byte_len(s) {
s1 = istr::slice(s, col, istr::byte_len(s));
} else { s1 = ~""; }
} else { s1 = s; }
log "pushing line: " + s1;
log ~"pushing line: " + s1;
lines += [s1];
}
fn read_block_comment(rdr: &reader, code_to_the_left: bool) -> cmnt {
log ">>> block comment";
let p = rdr.get_chpos();
let lines: [str] = [];
let lines: [istr] = [];
let col: uint = rdr.get_col();
rdr.bump();
rdr.bump();
let curr_line = "/*";
let curr_line = ~"/*";
let level: int = 1;
while level > 0 {
log #fmt["=== block comment level %d", level];
if rdr.is_eof() { rdr.err("unterminated block comment"); fail; }
if rdr.is_eof() { rdr.err(~"unterminated block comment"); fail; }
if rdr.curr() == '\n' {
trim_whitespace_prefix_and_push_line(lines, curr_line, col);
curr_line = "";
curr_line = ~"";
rdr.bump();
} else {
str::push_char(curr_line, rdr.curr());
istr::push_char(curr_line, rdr.curr());
if rdr.curr() == '/' && rdr.next() == '*' {
rdr.bump();
rdr.bump();
curr_line += "*";
curr_line += ~"*";
level += 1;
} else {
if rdr.curr() == '*' && rdr.next() == '/' {
rdr.bump();
rdr.bump();
curr_line += "/";
curr_line += ~"/";
level -= 1;
} else { rdr.bump(); }
}
}
}
if str::byte_len(curr_line) != 0u {
if istr::byte_len(curr_line) != 0u {
trim_whitespace_prefix_and_push_line(lines, curr_line, col);
}
let style = if code_to_the_left { trailing } else { isolated };
@ -700,14 +719,16 @@ fn is_lit(t: &token::token) -> bool {
}
}
type lit = {lit: str, pos: uint};
type lit = {lit: istr, pos: uint};
fn gather_comments_and_literals(cm: &codemap::codemap, path: str,
fn gather_comments_and_literals(cm: &codemap::codemap, path: &istr,
srdr: io::reader) ->
{cmnts: [cmnt], lits: [lit]} {
let src = str::unsafe_from_bytes(srdr.read_whole_stream());
let itr = @interner::mk::<str>(str::hash, str::eq);
let rdr = new_reader(cm, src, codemap::new_filemap(path, 0u, 0u), itr);
let src = istr::unsafe_from_bytes(srdr.read_whole_stream());
let itr = @interner::mk::<istr>(istr::hash, istr::eq);
let rdr = new_reader(cm, src,
codemap::new_filemap(
istr::to_estr(path), 0u, 0u), itr);
let comments: [cmnt] = [];
let literals: [lit] = [];
let first_read: bool = true;

View File

@ -63,10 +63,10 @@ fn next_node_id(sess: &parse_sess) -> node_id {
fn new_parser_from_file(sess: parse_sess, cfg: ast::crate_cfg, path: str,
chpos: uint, byte_pos: uint, ftype: file_type) ->
parser {
let src = istr::to_estr(io::read_whole_file_str(istr::from_estr(path)));
let src = io::read_whole_file_str(istr::from_estr(path));
let filemap = codemap::new_filemap(path, chpos, byte_pos);
sess.cm.files += [filemap];
let itr = @interner::mk(str::hash, str::eq);
let itr = @interner::mk(istr::hash, istr::eq);
let rdr = lexer::new_reader(sess.cm, src, filemap, itr);
ret new_parser(sess, cfg, rdr, ftype);
@ -128,7 +128,7 @@ fn warn(m: str) {
fn get_cfg() -> ast::crate_cfg { ret cfg; }
fn get_prec_table() -> @[op_spec] { ret precs; }
fn get_str(i: token::str_num) -> str {
ret interner::get(*rdr.get_interner(), i);
ret istr::to_estr(interner::get(*rdr.get_interner(), i));
}
fn get_reader() -> lexer::reader { ret rdr; }
fn get_filemap() -> codemap::filemap { ret rdr.get_filemap(); }
@ -2434,8 +2434,9 @@ fn parse_crate_from_source_str(name: &str, source: &str, cfg: &ast::crate_cfg,
let ftype = SOURCE_FILE;
let filemap = codemap::new_filemap(name, 0u, 0u);
sess.cm.files += [filemap];
let itr = @interner::mk(str::hash, str::eq);
let rdr = lexer::new_reader(sess.cm, source, filemap, itr);
let itr = @interner::mk(istr::hash, istr::eq);
let rdr = lexer::new_reader(sess.cm, istr::from_estr(source),
filemap, itr);
let p = new_parser(sess, cfg, rdr, ftype);
ret parse_crate_mod(p, cfg);
}

View File

@ -152,12 +152,17 @@ fn to_str(r: lexer::reader, t: token) -> str {
ret istr::to_estr(int::to_str(i, 10u)) + "_" + ty_mach_to_str(tm);
}
LIT_MACH_FLOAT(tm, s) {
ret interner::get::<str>(*r.get_interner(), s) + "_" +
ty_mach_to_str(tm);
ret istr::to_estr(interner::get::<istr>(
*r.get_interner(), s) + ~"_") +
ty_mach_to_str(tm);
}
LIT_FLOAT(s) {
ret istr::to_estr(interner::get::<istr>(*r.get_interner(), s));
}
LIT_FLOAT(s) { ret interner::get::<str>(*r.get_interner(), s); }
LIT_STR(s) { // FIXME: escape.
ret "\"" + interner::get::<str>(*r.get_interner(), s) + "\"";
ret "\"" +
istr::to_estr(interner::get::<istr>(*r.get_interner(), s))
+ "\"";
}
LIT_CHAR(c) {
// FIXME: escape.
@ -171,7 +176,7 @@ fn to_str(r: lexer::reader, t: token) -> str {
/* Name components */
IDENT(s, _) {
ret interner::get::<str>(*r.get_interner(), s);
ret istr::to_estr(interner::get::<istr>(*r.get_interner(), s));
}
IDX(i) { ret istr::to_estr(~"_" + int::to_str(i, 10u)); }
UNDERSCORE. { ret "_"; }

View File

@ -78,7 +78,8 @@ fn rust_printer(writer: io::writer) -> ps {
fn print_crate(cm: &codemap, crate: @ast::crate, filename: str,
in: io::reader, out: io::writer, ann: &pp_ann) {
let boxes: [pp::breaks] = [];
let r = lexer::gather_comments_and_literals(cm, filename, in);
let r = lexer::gather_comments_and_literals(
cm, istr::from_estr(filename), in);
let s =
@{s: pp::mk_printer(out, default_columns),
cm: some(cm),
@ -1499,7 +1500,7 @@ fn print_literal(s: &ps, lit: &@ast::lit) {
ast::lit_str(_, ast::sk_unique.) { word(s.s, "~"); }
_ { }
}
word(s.s, lt.lit);
word(s.s, istr::to_estr(lt.lit));
s.cur_lit += 1u;
ret;
}
@ -1567,27 +1568,31 @@ fn print_comment(s: &ps, cmnt: lexer::cmnt) {
lexer::mixed. {
assert (vec::len(cmnt.lines) == 1u);
zerobreak(s.s);
word(s.s, cmnt.lines[0]);
word(s.s, istr::to_estr(cmnt.lines[0]));
zerobreak(s.s);
}
lexer::isolated. {
pprust::hardbreak_if_not_bol(s);
for line: str in cmnt.lines {
for line: istr in cmnt.lines {
// Don't print empty lines because they will end up as trailing
// whitespace
if str::is_not_empty(line) { word(s.s, line); }
if istr::is_not_empty(line) {
word(s.s, istr::to_estr(line));
}
hardbreak(s.s);
}
}
lexer::trailing. {
word(s.s, " ");
if vec::len(cmnt.lines) == 1u {
word(s.s, cmnt.lines[0]);
word(s.s, istr::to_estr(cmnt.lines[0]));
hardbreak(s.s);
} else {
ibox(s, 0u);
for line: str in cmnt.lines {
if str::is_not_empty(line) { word(s.s, line); }
for line: istr in cmnt.lines {
if istr::is_not_empty(line) {
word(s.s, istr::to_estr(line));
}
hardbreak(s.s);
}
end(s);

View File

@ -4,7 +4,7 @@
unshift_char, shift_char, pop_char, push_char, is_utf8, from_chars, to_chars,
char_len, char_at, bytes, is_ascii, shift_byte, pop_byte, unsafe_from_byte,
unsafe_from_bytes, from_char, char_range_at, str_from_cstr, sbuf,
as_buf;
as_buf, push_byte;
export from_estr, to_estr, from_estrs, to_estrs;