diff --git a/src/comp/syntax/parse/lexer.rs b/src/comp/syntax/parse/lexer.rs index c39cd75db3b..a1a7d205b54 100644 --- a/src/comp/syntax/parse/lexer.rs +++ b/src/comp/syntax/parse/lexer.rs @@ -3,6 +3,7 @@ import std::int; import std::vec; import std::str; +import std::istr; import std::map; import std::map::hashmap; import std::option; @@ -19,44 +20,44 @@ fn next() -> char; fn init(); fn bump(); - fn get_str_from(uint) -> str; - fn get_interner() -> @interner::interner; + fn get_str_from(uint) -> istr; + fn get_interner() -> @interner::interner; fn get_chpos() -> uint; fn get_byte_pos() -> uint; fn get_col() -> uint; fn get_filemap() -> codemap::filemap; - fn err(str); + fn err(&istr); }; -fn new_reader(cm: &codemap::codemap, src: str, filemap: codemap::filemap, - itr: @interner::interner) -> reader { +fn new_reader(cm: &codemap::codemap, src: &istr, filemap: codemap::filemap, + itr: @interner::interner) -> reader { obj reader(cm: codemap::codemap, - src: str, + src: istr, len: uint, mutable col: uint, mutable pos: uint, mutable ch: char, mutable chpos: uint, - mutable strs: [str], + mutable strs: [istr], fm: codemap::filemap, - itr: @interner::interner) { + itr: @interner::interner) { fn is_eof() -> bool { ret ch == -1 as char; } - fn get_str_from(start: uint) -> str { + fn get_str_from(start: uint) -> istr { // I'm pretty skeptical about this subtraction. What if there's a // multi-byte character before the mark? - ret str::slice(src, start - 1u, pos - 1u); + ret istr::slice(src, start - 1u, pos - 1u); } fn get_chpos() -> uint { ret chpos; } fn get_byte_pos() -> uint { ret pos; } fn curr() -> char { ret ch; } fn next() -> char { if pos < len { - ret str::char_at(src, pos); + ret istr::char_at(src, pos); } else { ret -1 as char; } } fn init() { if pos < len { - let next = str::char_range_at(src, pos); + let next = istr::char_range_at(src, pos); pos = next.next; ch = next.ch; } @@ -69,21 +70,23 @@ fn bump() { codemap::next_line(fm, chpos, pos + fm.start_pos.byte); col = 0u; } - let next = str::char_range_at(src, pos); + let next = istr::char_range_at(src, pos); pos = next.next; ch = next.ch; } else { ch = -1 as char; } } - fn get_interner() -> @interner::interner { ret itr; } + fn get_interner() -> @interner::interner { ret itr; } fn get_col() -> uint { ret col; } fn get_filemap() -> codemap::filemap { ret fm; } - fn err(m: str) { - codemap::emit_error(some(ast_util::mk_sp(chpos, chpos)), m, cm); + fn err(m: &istr) { + codemap::emit_error( + some(ast_util::mk_sp(chpos, chpos)), + istr::to_estr(m), cm); } } - let strs: [str] = []; + let strs: [istr] = []; let rd = - reader(cm, src, str::byte_len(src), 0u, 0u, -1 as char, + reader(cm, src, istr::byte_len(src), 0u, 0u, -1 as char, filemap.start_pos.ch, strs, filemap, itr); rd.init(); ret rd; @@ -146,7 +149,9 @@ fn consume_any_line_comment(rdr: &reader) { fn consume_block_comment(rdr: &reader) { let level: int = 1; while level > 0 { - if rdr.is_eof() { rdr.err("unterminated block comment"); fail; } + if rdr.is_eof() { + rdr.err(~"unterminated block comment"); fail; + } if rdr.curr() == '/' && rdr.next() == '*' { rdr.bump(); rdr.bump(); @@ -164,35 +169,35 @@ fn consume_block_comment(rdr: &reader) { be consume_whitespace_and_comments(rdr); } -fn digits_to_string(s: str) -> int { +fn digits_to_string(s: &istr) -> int { let accum_int: int = 0; for c: u8 in s { accum_int *= 10; accum_int += dec_digit_val(c as char); } ret accum_int; } -fn scan_exponent(rdr: &reader) -> option::t { +fn scan_exponent(rdr: &reader) -> option::t { let c = rdr.curr(); - let rslt = ""; + let rslt = ~""; if c == 'e' || c == 'E' { - rslt += str::unsafe_from_bytes([c as u8]); + rslt += istr::unsafe_from_bytes([c as u8]); rdr.bump(); c = rdr.curr(); if c == '-' || c == '+' { - rslt += str::unsafe_from_bytes([c as u8]); + rslt += istr::unsafe_from_bytes([c as u8]); rdr.bump(); } let exponent = scan_dec_digits(rdr); - if str::byte_len(exponent) > 0u { + if istr::byte_len(exponent) > 0u { ret some(rslt + exponent); - } else { rdr.err("scan_exponent: bad fp literal"); fail; } - } else { ret none::; } + } else { rdr.err(~"scan_exponent: bad fp literal"); fail; } + } else { ret none::; } } -fn scan_dec_digits(rdr: &reader) -> str { +fn scan_dec_digits(rdr: &reader) -> istr { let c = rdr.curr(); - let rslt: str = ""; + let rslt: istr = ~""; while is_dec_digit(c) || c == '_' { - if c != '_' { rslt += str::unsafe_from_bytes([c as u8]); } + if c != '_' { rslt += istr::unsafe_from_bytes([c as u8]); } rdr.bump(); c = rdr.curr(); } @@ -201,7 +206,7 @@ fn scan_dec_digits(rdr: &reader) -> str { fn scan_number(c: char, rdr: &reader) -> token::token { let accum_int = 0; - let dec_str: str = ""; + let dec_str: istr = ~""; let is_dec_integer: bool = false; let n = rdr.next(); if c == '0' && n == 'x' { @@ -272,7 +277,7 @@ fn scan_number(c: char, rdr: &reader) -> token::token { rdr.bump(); let dec_part = scan_dec_digits(rdr); - let float_str = dec_str + "." + dec_part; + let float_str = dec_str + ~"." + dec_part; c = rdr.curr(); let exponent_str = scan_exponent(rdr); alt exponent_str { some(s) { float_str += s; } none. { } } @@ -298,15 +303,17 @@ fn scan_number(c: char, rdr: &reader) -> token::token { } } else { - ret token::LIT_FLOAT(interner::intern::(*rdr.get_interner(), - float_str)); + ret token::LIT_FLOAT(interner::intern::( + *rdr.get_interner(), + float_str)); } } let maybe_exponent = scan_exponent(rdr); alt maybe_exponent { some(s) { - ret token::LIT_FLOAT(interner::intern::(*rdr.get_interner(), - dec_str + s)); + ret token::LIT_FLOAT(interner::intern::( + *rdr.get_interner(), + dec_str + s)); } none. { ret token::LIT_INT(accum_int); } } @@ -318,7 +325,9 @@ fn scan_numeric_escape(rdr: &reader, n_hex_digits: uint) -> char { let n = rdr.curr(); rdr.bump(); if !is_hex_digit(n) { - rdr.err(#fmt["illegal numeric character escape: %d", n as int]); + rdr.err( + istr::from_estr( + #fmt["illegal numeric character escape: %d", n as int])); fail; } accum_int *= 16; @@ -337,18 +346,19 @@ fn next_token(rdr: &reader) -> {tok: token::token, chpos: uint, bpos: uint} { } fn next_token_inner(rdr: &reader) -> token::token { - let accum_str = ""; + let accum_str = ~""; let c = rdr.curr(); if is_alpha(c) || c == '_' { while is_alnum(c) || c == '_' { - str::push_char(accum_str, c); + istr::push_char(accum_str, c); rdr.bump(); c = rdr.curr(); } - if str::eq(accum_str, "_") { ret token::UNDERSCORE; } + if istr::eq(accum_str, ~"_") { ret token::UNDERSCORE; } let is_mod_name = c == ':' && rdr.next() == ':'; - ret token::IDENT(interner::intern::(*rdr.get_interner(), - accum_str), is_mod_name); + ret token::IDENT(interner::intern::( + *rdr.get_interner(), + accum_str), is_mod_name); } if is_dec_digit(c) { ret scan_number(c, rdr); } fn binop(rdr: &reader, op: token::binop) -> token::token { @@ -460,13 +470,15 @@ fn binop(rdr: &reader, op: token::binop) -> token::token { 'u' { c2 = scan_numeric_escape(rdr, 4u); } 'U' { c2 = scan_numeric_escape(rdr, 8u); } c2 { - rdr.err(#fmt["unknown character escape: %d", c2 as int]); + rdr.err( + istr::from_estr(#fmt["unknown character escape: %d", + c2 as int])); fail; } } } if rdr.curr() != '\'' { - rdr.err("unterminated character constant"); + rdr.err(~"unterminated character constant"); fail; } rdr.bump(); // advance curr past token @@ -483,33 +495,36 @@ fn binop(rdr: &reader, op: token::binop) -> token::token { let escaped = rdr.curr(); rdr.bump(); alt escaped { - 'n' { str::push_byte(accum_str, '\n' as u8); } - 'r' { str::push_byte(accum_str, '\r' as u8); } - 't' { str::push_byte(accum_str, '\t' as u8); } - '\\' { str::push_byte(accum_str, '\\' as u8); } - '"' { str::push_byte(accum_str, '"' as u8); } + 'n' { istr::push_byte(accum_str, '\n' as u8); } + 'r' { istr::push_byte(accum_str, '\r' as u8); } + 't' { istr::push_byte(accum_str, '\t' as u8); } + '\\' { istr::push_byte(accum_str, '\\' as u8); } + '"' { istr::push_byte(accum_str, '"' as u8); } '\n' { consume_whitespace(rdr); } 'x' { - str::push_char(accum_str, scan_numeric_escape(rdr, 2u)); + istr::push_char(accum_str, scan_numeric_escape(rdr, 2u)); } 'u' { - str::push_char(accum_str, scan_numeric_escape(rdr, 4u)); + istr::push_char(accum_str, scan_numeric_escape(rdr, 4u)); } 'U' { - str::push_char(accum_str, scan_numeric_escape(rdr, 8u)); + istr::push_char(accum_str, scan_numeric_escape(rdr, 8u)); } c2 { - rdr.err(#fmt["unknown string escape: %d", c2 as int]); + rdr.err( + istr::from_estr(#fmt["unknown string escape: %d", + c2 as int])); fail; } } } - _ { str::push_char(accum_str, ch); } + _ { istr::push_char(accum_str, ch); } } } rdr.bump(); - ret token::LIT_STR(interner::intern::(*rdr.get_interner(), - accum_str)); + ret token::LIT_STR(interner::intern::( + *rdr.get_interner(), + accum_str)); } '-' { if rdr.next() == '>' { @@ -536,7 +551,11 @@ fn binop(rdr: &reader, op: token::binop) -> token::token { '/' { ret binop(rdr, token::SLASH); } '^' { ret binop(rdr, token::CARET); } '%' { ret binop(rdr, token::PERCENT); } - c { rdr.err(#fmt["unkown start of token: %d", c as int]); fail; } + c { + rdr.err( + istr::from_estr(#fmt["unkown start of token: %d", c as int])); + fail; + } } } @@ -547,19 +566,19 @@ fn binop(rdr: &reader, op: token::binop) -> token::token { blank_line; // Just a manual blank line "\n\n", for layout } -type cmnt = {style: cmnt_style, lines: [str], pos: uint}; +type cmnt = {style: cmnt_style, lines: [istr], pos: uint}; -fn read_to_eol(rdr: &reader) -> str { - let val = ""; +fn read_to_eol(rdr: &reader) -> istr { + let val = ~""; while rdr.curr() != '\n' && !rdr.is_eof() { - str::push_char(val, rdr.curr()); + istr::push_char(val, rdr.curr()); rdr.bump(); } if rdr.curr() == '\n' { rdr.bump(); } ret val; } -fn read_one_line_comment(rdr: &reader) -> str { +fn read_one_line_comment(rdr: &reader) -> istr { let val = read_to_eol(rdr); assert (val[0] == '/' as u8 && val[1] == '/' as u8); ret val; @@ -577,7 +596,7 @@ fn consume_non_eol_whitespace(rdr: &reader) { fn push_blank_line_comment(rdr: &reader, comments: &mutable [cmnt]) { log ">>> blank-line comment"; - let v: [str] = []; + let v: [istr] = []; comments += [{style: blank_line, lines: v, pos: rdr.get_chpos()}]; } @@ -594,7 +613,7 @@ fn consume_whitespace_counting_blank_lines(rdr: &reader, fn read_line_comments(rdr: &reader, code_to_the_left: bool) -> cmnt { log ">>> line comments"; let p = rdr.get_chpos(); - let lines: [str] = []; + let lines: [istr] = []; while rdr.curr() == '/' && rdr.next() == '/' { let line = read_one_line_comment(rdr); log line; @@ -607,58 +626,58 @@ fn read_line_comments(rdr: &reader, code_to_the_left: bool) -> cmnt { pos: p}; } -fn all_whitespace(s: &str, begin: uint, end: uint) -> bool { +fn all_whitespace(s: &istr, begin: uint, end: uint) -> bool { let i: uint = begin; while i != end { if !is_whitespace(s[i] as char) { ret false; } i += 1u; } ret true; } -fn trim_whitespace_prefix_and_push_line(lines: &mutable [str], s: &str, +fn trim_whitespace_prefix_and_push_line(lines: &mutable [istr], s: &istr, col: uint) { let s1; if all_whitespace(s, 0u, col) { - if col < str::byte_len(s) { - s1 = str::slice(s, col, str::byte_len(s)); - } else { s1 = ""; } + if col < istr::byte_len(s) { + s1 = istr::slice(s, col, istr::byte_len(s)); + } else { s1 = ~""; } } else { s1 = s; } - log "pushing line: " + s1; + log ~"pushing line: " + s1; lines += [s1]; } fn read_block_comment(rdr: &reader, code_to_the_left: bool) -> cmnt { log ">>> block comment"; let p = rdr.get_chpos(); - let lines: [str] = []; + let lines: [istr] = []; let col: uint = rdr.get_col(); rdr.bump(); rdr.bump(); - let curr_line = "/*"; + let curr_line = ~"/*"; let level: int = 1; while level > 0 { log #fmt["=== block comment level %d", level]; - if rdr.is_eof() { rdr.err("unterminated block comment"); fail; } + if rdr.is_eof() { rdr.err(~"unterminated block comment"); fail; } if rdr.curr() == '\n' { trim_whitespace_prefix_and_push_line(lines, curr_line, col); - curr_line = ""; + curr_line = ~""; rdr.bump(); } else { - str::push_char(curr_line, rdr.curr()); + istr::push_char(curr_line, rdr.curr()); if rdr.curr() == '/' && rdr.next() == '*' { rdr.bump(); rdr.bump(); - curr_line += "*"; + curr_line += ~"*"; level += 1; } else { if rdr.curr() == '*' && rdr.next() == '/' { rdr.bump(); rdr.bump(); - curr_line += "/"; + curr_line += ~"/"; level -= 1; } else { rdr.bump(); } } } } - if str::byte_len(curr_line) != 0u { + if istr::byte_len(curr_line) != 0u { trim_whitespace_prefix_and_push_line(lines, curr_line, col); } let style = if code_to_the_left { trailing } else { isolated }; @@ -700,14 +719,16 @@ fn is_lit(t: &token::token) -> bool { } } -type lit = {lit: str, pos: uint}; +type lit = {lit: istr, pos: uint}; -fn gather_comments_and_literals(cm: &codemap::codemap, path: str, +fn gather_comments_and_literals(cm: &codemap::codemap, path: &istr, srdr: io::reader) -> {cmnts: [cmnt], lits: [lit]} { - let src = str::unsafe_from_bytes(srdr.read_whole_stream()); - let itr = @interner::mk::(str::hash, str::eq); - let rdr = new_reader(cm, src, codemap::new_filemap(path, 0u, 0u), itr); + let src = istr::unsafe_from_bytes(srdr.read_whole_stream()); + let itr = @interner::mk::(istr::hash, istr::eq); + let rdr = new_reader(cm, src, + codemap::new_filemap( + istr::to_estr(path), 0u, 0u), itr); let comments: [cmnt] = []; let literals: [lit] = []; let first_read: bool = true; diff --git a/src/comp/syntax/parse/parser.rs b/src/comp/syntax/parse/parser.rs index ce3b917118e..7e163751cd1 100644 --- a/src/comp/syntax/parse/parser.rs +++ b/src/comp/syntax/parse/parser.rs @@ -63,10 +63,10 @@ fn next_node_id(sess: &parse_sess) -> node_id { fn new_parser_from_file(sess: parse_sess, cfg: ast::crate_cfg, path: str, chpos: uint, byte_pos: uint, ftype: file_type) -> parser { - let src = istr::to_estr(io::read_whole_file_str(istr::from_estr(path))); + let src = io::read_whole_file_str(istr::from_estr(path)); let filemap = codemap::new_filemap(path, chpos, byte_pos); sess.cm.files += [filemap]; - let itr = @interner::mk(str::hash, str::eq); + let itr = @interner::mk(istr::hash, istr::eq); let rdr = lexer::new_reader(sess.cm, src, filemap, itr); ret new_parser(sess, cfg, rdr, ftype); @@ -128,7 +128,7 @@ fn warn(m: str) { fn get_cfg() -> ast::crate_cfg { ret cfg; } fn get_prec_table() -> @[op_spec] { ret precs; } fn get_str(i: token::str_num) -> str { - ret interner::get(*rdr.get_interner(), i); + ret istr::to_estr(interner::get(*rdr.get_interner(), i)); } fn get_reader() -> lexer::reader { ret rdr; } fn get_filemap() -> codemap::filemap { ret rdr.get_filemap(); } @@ -2434,8 +2434,9 @@ fn parse_crate_from_source_str(name: &str, source: &str, cfg: &ast::crate_cfg, let ftype = SOURCE_FILE; let filemap = codemap::new_filemap(name, 0u, 0u); sess.cm.files += [filemap]; - let itr = @interner::mk(str::hash, str::eq); - let rdr = lexer::new_reader(sess.cm, source, filemap, itr); + let itr = @interner::mk(istr::hash, istr::eq); + let rdr = lexer::new_reader(sess.cm, istr::from_estr(source), + filemap, itr); let p = new_parser(sess, cfg, rdr, ftype); ret parse_crate_mod(p, cfg); } diff --git a/src/comp/syntax/parse/token.rs b/src/comp/syntax/parse/token.rs index 4fd5aa4453d..90303542123 100644 --- a/src/comp/syntax/parse/token.rs +++ b/src/comp/syntax/parse/token.rs @@ -152,12 +152,17 @@ fn to_str(r: lexer::reader, t: token) -> str { ret istr::to_estr(int::to_str(i, 10u)) + "_" + ty_mach_to_str(tm); } LIT_MACH_FLOAT(tm, s) { - ret interner::get::(*r.get_interner(), s) + "_" + - ty_mach_to_str(tm); + ret istr::to_estr(interner::get::( + *r.get_interner(), s) + ~"_") + + ty_mach_to_str(tm); + } + LIT_FLOAT(s) { + ret istr::to_estr(interner::get::(*r.get_interner(), s)); } - LIT_FLOAT(s) { ret interner::get::(*r.get_interner(), s); } LIT_STR(s) { // FIXME: escape. - ret "\"" + interner::get::(*r.get_interner(), s) + "\""; + ret "\"" + + istr::to_estr(interner::get::(*r.get_interner(), s)) + + "\""; } LIT_CHAR(c) { // FIXME: escape. @@ -171,7 +176,7 @@ fn to_str(r: lexer::reader, t: token) -> str { /* Name components */ IDENT(s, _) { - ret interner::get::(*r.get_interner(), s); + ret istr::to_estr(interner::get::(*r.get_interner(), s)); } IDX(i) { ret istr::to_estr(~"_" + int::to_str(i, 10u)); } UNDERSCORE. { ret "_"; } diff --git a/src/comp/syntax/print/pprust.rs b/src/comp/syntax/print/pprust.rs index 9fff1a8be80..7def22f8bd3 100644 --- a/src/comp/syntax/print/pprust.rs +++ b/src/comp/syntax/print/pprust.rs @@ -78,7 +78,8 @@ fn rust_printer(writer: io::writer) -> ps { fn print_crate(cm: &codemap, crate: @ast::crate, filename: str, in: io::reader, out: io::writer, ann: &pp_ann) { let boxes: [pp::breaks] = []; - let r = lexer::gather_comments_and_literals(cm, filename, in); + let r = lexer::gather_comments_and_literals( + cm, istr::from_estr(filename), in); let s = @{s: pp::mk_printer(out, default_columns), cm: some(cm), @@ -1499,7 +1500,7 @@ fn print_literal(s: &ps, lit: &@ast::lit) { ast::lit_str(_, ast::sk_unique.) { word(s.s, "~"); } _ { } } - word(s.s, lt.lit); + word(s.s, istr::to_estr(lt.lit)); s.cur_lit += 1u; ret; } @@ -1567,27 +1568,31 @@ fn print_comment(s: &ps, cmnt: lexer::cmnt) { lexer::mixed. { assert (vec::len(cmnt.lines) == 1u); zerobreak(s.s); - word(s.s, cmnt.lines[0]); + word(s.s, istr::to_estr(cmnt.lines[0])); zerobreak(s.s); } lexer::isolated. { pprust::hardbreak_if_not_bol(s); - for line: str in cmnt.lines { + for line: istr in cmnt.lines { // Don't print empty lines because they will end up as trailing // whitespace - if str::is_not_empty(line) { word(s.s, line); } + if istr::is_not_empty(line) { + word(s.s, istr::to_estr(line)); + } hardbreak(s.s); } } lexer::trailing. { word(s.s, " "); if vec::len(cmnt.lines) == 1u { - word(s.s, cmnt.lines[0]); + word(s.s, istr::to_estr(cmnt.lines[0])); hardbreak(s.s); } else { ibox(s, 0u); - for line: str in cmnt.lines { - if str::is_not_empty(line) { word(s.s, line); } + for line: istr in cmnt.lines { + if istr::is_not_empty(line) { + word(s.s, istr::to_estr(line)); + } hardbreak(s.s); } end(s); diff --git a/src/lib/istr.rs b/src/lib/istr.rs index 256e76d0041..808ce458eb2 100644 --- a/src/lib/istr.rs +++ b/src/lib/istr.rs @@ -4,7 +4,7 @@ unshift_char, shift_char, pop_char, push_char, is_utf8, from_chars, to_chars, char_len, char_at, bytes, is_ascii, shift_byte, pop_byte, unsafe_from_byte, unsafe_from_bytes, from_char, char_range_at, str_from_cstr, sbuf, -as_buf; +as_buf, push_byte; export from_estr, to_estr, from_estrs, to_estrs;