2011-06-15 11:19:50 -07:00
|
|
|
|
2011-07-12 10:59:18 -07:00
|
|
|
import std::ioivec;
|
2011-05-17 20:41:41 +02:00
|
|
|
import std::int;
|
2011-07-14 17:26:10 -07:00
|
|
|
import std::ivec;
|
|
|
|
import std::str;
|
2011-05-12 17:24:54 +02:00
|
|
|
import std::map;
|
|
|
|
import std::map::hashmap;
|
|
|
|
import std::option;
|
|
|
|
import std::option::some;
|
|
|
|
import std::option::none;
|
2011-07-05 11:48:19 +02:00
|
|
|
import util::interner;
|
|
|
|
import util::interner::intern;
|
|
|
|
import codemap;
|
2011-06-15 11:19:50 -07:00
|
|
|
|
|
|
|
type reader =
|
|
|
|
obj {
|
|
|
|
fn is_eof() -> bool ;
|
|
|
|
fn curr() -> char ;
|
|
|
|
fn next() -> char ;
|
|
|
|
fn init() ;
|
|
|
|
fn bump() ;
|
|
|
|
fn mark() ;
|
|
|
|
fn get_mark_chpos() -> uint ;
|
|
|
|
fn get_mark_str() -> str ;
|
|
|
|
fn get_interner() -> @interner::interner[str] ;
|
|
|
|
fn get_chpos() -> uint ;
|
2011-07-16 02:01:10 -04:00
|
|
|
fn get_byte_pos() -> uint ;
|
2011-06-15 11:19:50 -07:00
|
|
|
fn get_col() -> uint ;
|
|
|
|
fn get_filemap() -> codemap::filemap ;
|
|
|
|
fn err(str) ;
|
|
|
|
};
|
|
|
|
|
2011-07-11 16:43:51 -07:00
|
|
|
fn new_reader(&codemap::codemap cm, str src, codemap::filemap filemap,
|
2011-05-16 14:53:00 -07:00
|
|
|
@interner::interner[str] itr) -> reader {
|
2011-07-05 11:48:19 +02:00
|
|
|
obj reader(codemap::codemap cm,
|
2011-07-11 16:43:51 -07:00
|
|
|
str src,
|
2011-06-15 11:19:50 -07:00
|
|
|
uint len,
|
|
|
|
mutable uint col,
|
|
|
|
mutable uint pos,
|
|
|
|
mutable char ch,
|
2011-07-11 14:38:55 -07:00
|
|
|
mutable uint mark_pos,
|
2011-06-15 11:19:50 -07:00
|
|
|
mutable uint mark_chpos,
|
|
|
|
mutable uint chpos,
|
2011-07-14 17:26:10 -07:00
|
|
|
mutable str[] strs,
|
2011-06-15 11:19:50 -07:00
|
|
|
codemap::filemap fm,
|
|
|
|
@interner::interner[str] itr) {
|
|
|
|
fn is_eof() -> bool { ret ch == -1 as char; }
|
2011-07-11 14:38:55 -07:00
|
|
|
fn mark() { mark_pos = pos; mark_chpos = chpos; }
|
2011-07-11 21:30:33 -07:00
|
|
|
fn get_mark_str() -> str {
|
|
|
|
// I'm pretty skeptical about this subtraction. What if there's a
|
|
|
|
// multi-byte character before the mark?
|
|
|
|
ret str::slice(src, mark_pos - 1u,
|
|
|
|
pos - 1u);
|
|
|
|
}
|
2011-04-08 18:44:20 +02:00
|
|
|
fn get_mark_chpos() -> uint { ret mark_chpos; }
|
|
|
|
fn get_chpos() -> uint { ret chpos; }
|
2011-07-16 02:01:10 -04:00
|
|
|
fn get_byte_pos() -> uint { ret pos; }
|
2011-06-15 11:19:50 -07:00
|
|
|
fn curr() -> char { ret ch; }
|
2011-04-08 16:48:17 +02:00
|
|
|
fn next() -> char {
|
2011-06-15 11:19:50 -07:00
|
|
|
if (pos < len) {
|
2011-07-11 16:43:51 -07:00
|
|
|
ret str::char_at(src, pos);
|
2011-06-15 11:19:50 -07:00
|
|
|
} else { ret -1 as char; }
|
2011-04-08 16:48:17 +02:00
|
|
|
}
|
2011-04-19 13:35:49 -07:00
|
|
|
fn init() {
|
2011-04-08 16:48:17 +02:00
|
|
|
if (pos < len) {
|
2011-07-11 16:43:51 -07:00
|
|
|
auto next = str::char_range_at(src, pos);
|
2011-04-08 16:48:17 +02:00
|
|
|
pos = next._1;
|
|
|
|
ch = next._0;
|
2010-08-27 12:36:57 -07:00
|
|
|
}
|
2011-04-08 16:48:17 +02:00
|
|
|
}
|
2011-04-19 13:35:49 -07:00
|
|
|
fn bump() {
|
2011-04-08 16:48:17 +02:00
|
|
|
if (pos < len) {
|
2011-05-30 14:10:54 -07:00
|
|
|
col += 1u;
|
2011-04-08 18:44:20 +02:00
|
|
|
chpos += 1u;
|
2011-07-16 02:01:10 -04:00
|
|
|
if (ch == '\n') {
|
|
|
|
codemap::next_line(fm, chpos, pos + fm.start_pos.byte);
|
|
|
|
col = 0u;
|
|
|
|
}
|
2011-07-11 16:43:51 -07:00
|
|
|
auto next = str::char_range_at(src, pos);
|
2011-04-08 16:48:17 +02:00
|
|
|
pos = next._1;
|
|
|
|
ch = next._0;
|
2011-06-15 11:19:50 -07:00
|
|
|
} else { ch = -1 as char; }
|
2011-04-08 16:48:17 +02:00
|
|
|
}
|
2011-05-16 14:53:00 -07:00
|
|
|
fn get_interner() -> @interner::interner[str] { ret itr; }
|
2011-06-15 11:19:50 -07:00
|
|
|
fn get_col() -> uint { ret col; }
|
|
|
|
fn get_filemap() -> codemap::filemap { ret fm; }
|
2011-07-05 11:48:19 +02:00
|
|
|
fn err(str m) {
|
|
|
|
codemap::emit_error(some(rec(lo=chpos, hi=chpos)), m, cm);
|
|
|
|
}
|
2011-04-08 16:48:17 +02:00
|
|
|
}
|
2011-07-14 17:26:10 -07:00
|
|
|
let str[] strs = ~[];
|
2011-06-15 11:19:50 -07:00
|
|
|
auto rd =
|
2011-07-11 16:43:51 -07:00
|
|
|
reader(cm, src, str::byte_len(src), 0u, 0u, -1 as char, 0u,
|
2011-07-16 02:01:10 -04:00
|
|
|
filemap.start_pos.ch, filemap.start_pos.ch, strs, filemap,
|
|
|
|
itr);
|
2011-04-08 18:44:20 +02:00
|
|
|
rd.init();
|
|
|
|
ret rd;
|
|
|
|
}
|
2010-08-24 09:59:41 -07:00
|
|
|
|
2011-06-15 11:19:50 -07:00
|
|
|
fn dec_digit_val(char c) -> int { ret (c as int) - ('0' as int); }
|
2010-08-31 11:36:36 -07:00
|
|
|
|
|
|
|
fn hex_digit_val(char c) -> int {
|
2011-06-15 11:19:50 -07:00
|
|
|
if (in_range(c, '0', '9')) { ret (c as int) - ('0' as int); }
|
|
|
|
if (in_range(c, 'a', 'f')) { ret (c as int) - ('a' as int) + 10; }
|
|
|
|
if (in_range(c, 'A', 'F')) { ret (c as int) - ('A' as int) + 10; }
|
2010-08-31 11:36:36 -07:00
|
|
|
fail;
|
|
|
|
}
|
|
|
|
|
2011-06-15 11:19:50 -07:00
|
|
|
fn bin_digit_value(char c) -> int { if (c == '0') { ret 0; } ret 1; }
|
2010-08-31 11:36:36 -07:00
|
|
|
|
2010-08-19 18:42:17 -07:00
|
|
|
fn is_whitespace(char c) -> bool {
|
2010-08-20 15:36:48 -07:00
|
|
|
ret c == ' ' || c == '\t' || c == '\r' || c == '\n';
|
2010-08-19 18:42:17 -07:00
|
|
|
}
|
|
|
|
|
2011-07-05 11:48:19 +02:00
|
|
|
fn may_begin_ident(char c) -> bool { ret is_alpha(c) || c == '_'; }
|
|
|
|
|
|
|
|
fn in_range(char c, char lo, char hi) -> bool { ret lo <= c && c <= hi; }
|
|
|
|
|
|
|
|
fn is_alpha(char c) -> bool {
|
|
|
|
ret in_range(c, 'a', 'z') || in_range(c, 'A', 'Z');
|
|
|
|
}
|
|
|
|
|
|
|
|
fn is_dec_digit(char c) -> bool { ret in_range(c, '0', '9'); }
|
|
|
|
|
|
|
|
fn is_alnum(char c) -> bool { ret is_alpha(c) || is_dec_digit(c); }
|
|
|
|
|
|
|
|
fn is_hex_digit(char c) -> bool {
|
|
|
|
ret in_range(c, '0', '9') || in_range(c, 'a', 'f') ||
|
|
|
|
in_range(c, 'A', 'F');
|
|
|
|
}
|
|
|
|
|
|
|
|
fn is_bin_digit(char c) -> bool { ret c == '0' || c == '1'; }
|
|
|
|
|
2011-05-30 14:10:54 -07:00
|
|
|
fn consume_whitespace_and_comments(&reader rdr) {
|
2011-06-15 11:19:50 -07:00
|
|
|
while (is_whitespace(rdr.curr())) { rdr.bump(); }
|
2010-08-27 12:36:57 -07:00
|
|
|
be consume_any_line_comment(rdr);
|
2010-08-23 19:17:04 -07:00
|
|
|
}
|
|
|
|
|
2011-05-17 20:45:49 +02:00
|
|
|
fn consume_any_line_comment(&reader rdr) {
|
2010-08-27 12:36:57 -07:00
|
|
|
if (rdr.curr() == '/') {
|
2010-08-31 13:29:00 -07:00
|
|
|
alt (rdr.next()) {
|
|
|
|
case ('/') {
|
2011-06-15 11:19:50 -07:00
|
|
|
while (rdr.curr() != '\n' && !rdr.is_eof()) { rdr.bump(); }
|
2010-08-31 13:29:00 -07:00
|
|
|
// Restart whitespace munch.
|
2011-06-15 11:19:50 -07:00
|
|
|
|
2011-05-30 14:10:54 -07:00
|
|
|
be consume_whitespace_and_comments(rdr);
|
2010-08-31 13:29:00 -07:00
|
|
|
}
|
|
|
|
case ('*') {
|
2010-08-27 12:36:57 -07:00
|
|
|
rdr.bump();
|
2010-08-31 13:29:00 -07:00
|
|
|
rdr.bump();
|
|
|
|
be consume_block_comment(rdr);
|
|
|
|
}
|
2011-06-15 11:19:50 -07:00
|
|
|
case (_) { ret; }
|
2010-08-23 19:17:04 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-05-17 20:45:49 +02:00
|
|
|
fn consume_block_comment(&reader rdr) {
|
2010-08-31 13:29:00 -07:00
|
|
|
let int level = 1;
|
|
|
|
while (level > 0) {
|
2011-06-15 11:19:50 -07:00
|
|
|
if (rdr.is_eof()) { rdr.err("unterminated block comment"); fail; }
|
2010-08-31 13:29:00 -07:00
|
|
|
if (rdr.curr() == '/' && rdr.next() == '*') {
|
|
|
|
rdr.bump();
|
|
|
|
rdr.bump();
|
|
|
|
level += 1;
|
|
|
|
} else {
|
|
|
|
if (rdr.curr() == '*' && rdr.next() == '/') {
|
|
|
|
rdr.bump();
|
|
|
|
rdr.bump();
|
|
|
|
level -= 1;
|
2011-06-15 11:19:50 -07:00
|
|
|
} else { rdr.bump(); }
|
2010-08-31 13:29:00 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
// restart whitespace munch.
|
2011-06-15 11:19:50 -07:00
|
|
|
|
2011-05-30 14:10:54 -07:00
|
|
|
be consume_whitespace_and_comments(rdr);
|
2010-08-31 13:29:00 -07:00
|
|
|
}
|
|
|
|
|
2011-03-24 17:12:24 -07:00
|
|
|
fn digits_to_string(str s) -> int {
|
2011-03-21 17:12:05 -07:00
|
|
|
let int accum_int = 0;
|
2011-03-24 17:12:24 -07:00
|
|
|
for (u8 c in s) {
|
|
|
|
accum_int *= 10;
|
|
|
|
accum_int += dec_digit_val(c as char);
|
2011-03-21 17:12:05 -07:00
|
|
|
}
|
|
|
|
ret accum_int;
|
|
|
|
}
|
|
|
|
|
2011-05-17 20:45:49 +02:00
|
|
|
fn scan_exponent(&reader rdr) -> option::t[str] {
|
2011-03-22 17:25:40 -07:00
|
|
|
auto c = rdr.curr();
|
2011-06-24 19:04:08 +02:00
|
|
|
auto rslt = "";
|
2011-03-22 17:25:40 -07:00
|
|
|
if (c == 'e' || c == 'E') {
|
2011-06-24 19:04:08 +02:00
|
|
|
rslt += str::from_bytes([c as u8]);
|
2011-03-22 17:25:40 -07:00
|
|
|
rdr.bump();
|
|
|
|
c = rdr.curr();
|
2011-03-24 17:12:24 -07:00
|
|
|
if (c == '-' || c == '+') {
|
2011-06-24 19:04:08 +02:00
|
|
|
rslt += str::from_bytes([c as u8]);
|
2011-03-22 17:25:40 -07:00
|
|
|
rdr.bump();
|
|
|
|
}
|
|
|
|
auto exponent = scan_dec_digits(rdr);
|
2011-05-17 20:41:41 +02:00
|
|
|
if (str::byte_len(exponent) > 0u) {
|
2011-06-24 19:04:08 +02:00
|
|
|
ret some(rslt + exponent);
|
2011-06-15 11:19:50 -07:00
|
|
|
} else { rdr.err("scan_exponent: bad fp literal"); fail; }
|
|
|
|
} else { ret none[str]; }
|
2011-03-24 17:12:24 -07:00
|
|
|
}
|
|
|
|
|
2011-05-17 20:45:49 +02:00
|
|
|
fn scan_dec_digits(&reader rdr) -> str {
|
2011-03-24 17:12:24 -07:00
|
|
|
auto c = rdr.curr();
|
2011-06-24 19:04:08 +02:00
|
|
|
let str rslt = "";
|
2011-06-15 11:19:50 -07:00
|
|
|
while (is_dec_digit(c) || c == '_') {
|
2011-06-24 19:04:08 +02:00
|
|
|
if (c != '_') { rslt += str::from_bytes([c as u8]); }
|
2011-03-24 17:12:24 -07:00
|
|
|
rdr.bump();
|
|
|
|
c = rdr.curr();
|
2011-03-22 17:25:40 -07:00
|
|
|
}
|
2011-06-24 19:04:08 +02:00
|
|
|
ret rslt;
|
2011-03-22 17:25:40 -07:00
|
|
|
}
|
|
|
|
|
2011-05-17 20:45:49 +02:00
|
|
|
fn scan_number(char c, &reader rdr) -> token::token {
|
2010-11-22 17:41:26 -08:00
|
|
|
auto accum_int = 0;
|
2011-03-24 17:12:24 -07:00
|
|
|
let str dec_str = "";
|
|
|
|
let bool is_dec_integer = false;
|
2010-11-22 17:41:26 -08:00
|
|
|
auto n = rdr.next();
|
|
|
|
if (c == '0' && n == 'x') {
|
|
|
|
rdr.bump();
|
|
|
|
rdr.bump();
|
|
|
|
c = rdr.curr();
|
|
|
|
while (is_hex_digit(c) || c == '_') {
|
2011-06-15 11:19:50 -07:00
|
|
|
if (c != '_') { accum_int *= 16; accum_int += hex_digit_val(c); }
|
2010-11-22 17:41:26 -08:00
|
|
|
rdr.bump();
|
|
|
|
c = rdr.curr();
|
|
|
|
}
|
2011-03-21 17:12:05 -07:00
|
|
|
} else if (c == '0' && n == 'b') {
|
2010-11-22 17:41:26 -08:00
|
|
|
rdr.bump();
|
|
|
|
rdr.bump();
|
|
|
|
c = rdr.curr();
|
|
|
|
while (is_bin_digit(c) || c == '_') {
|
2011-06-15 11:19:50 -07:00
|
|
|
if (c != '_') { accum_int *= 2; accum_int += bin_digit_value(c); }
|
2010-11-22 17:41:26 -08:00
|
|
|
rdr.bump();
|
|
|
|
c = rdr.curr();
|
|
|
|
}
|
2011-06-15 11:19:50 -07:00
|
|
|
} else { dec_str = scan_dec_digits(rdr); is_dec_integer = true; }
|
|
|
|
if (is_dec_integer) { accum_int = digits_to_string(dec_str); }
|
2011-03-21 17:12:05 -07:00
|
|
|
c = rdr.curr();
|
|
|
|
n = rdr.next();
|
2010-11-22 17:41:26 -08:00
|
|
|
if (c == 'u' || c == 'i') {
|
2011-06-15 11:19:50 -07:00
|
|
|
let bool signed = c == 'i';
|
2010-11-22 17:41:26 -08:00
|
|
|
rdr.bump();
|
|
|
|
c = rdr.curr();
|
|
|
|
if (c == '8') {
|
|
|
|
rdr.bump();
|
|
|
|
if (signed) {
|
2011-07-05 11:48:19 +02:00
|
|
|
ret token::LIT_MACH_INT(ast::ty_i8, accum_int);
|
|
|
|
} else { ret token::LIT_MACH_INT(ast::ty_u8, accum_int); }
|
2010-11-22 17:41:26 -08:00
|
|
|
}
|
|
|
|
n = rdr.next();
|
|
|
|
if (c == '1' && n == '6') {
|
|
|
|
rdr.bump();
|
|
|
|
rdr.bump();
|
|
|
|
if (signed) {
|
2011-07-05 11:48:19 +02:00
|
|
|
ret token::LIT_MACH_INT(ast::ty_i16, accum_int);
|
|
|
|
} else { ret token::LIT_MACH_INT(ast::ty_u16, accum_int); }
|
2010-11-22 17:41:26 -08:00
|
|
|
}
|
|
|
|
if (c == '3' && n == '2') {
|
|
|
|
rdr.bump();
|
|
|
|
rdr.bump();
|
|
|
|
if (signed) {
|
2011-07-05 11:48:19 +02:00
|
|
|
ret token::LIT_MACH_INT(ast::ty_i32, accum_int);
|
|
|
|
} else { ret token::LIT_MACH_INT(ast::ty_u32, accum_int); }
|
2010-11-22 17:41:26 -08:00
|
|
|
}
|
|
|
|
if (c == '6' && n == '4') {
|
|
|
|
rdr.bump();
|
|
|
|
rdr.bump();
|
|
|
|
if (signed) {
|
2011-07-05 11:48:19 +02:00
|
|
|
ret token::LIT_MACH_INT(ast::ty_i64, accum_int);
|
|
|
|
} else { ret token::LIT_MACH_INT(ast::ty_u64, accum_int); }
|
2010-11-22 17:41:26 -08:00
|
|
|
}
|
|
|
|
if (signed) {
|
2011-05-12 17:24:54 +02:00
|
|
|
ret token::LIT_INT(accum_int);
|
2010-11-22 17:41:26 -08:00
|
|
|
} else {
|
|
|
|
// FIXME: should cast in the target bit-width.
|
2011-06-15 11:19:50 -07:00
|
|
|
|
2011-05-12 17:24:54 +02:00
|
|
|
ret token::LIT_UINT(accum_int as uint);
|
2010-11-22 17:41:26 -08:00
|
|
|
}
|
|
|
|
}
|
2011-03-22 17:25:40 -07:00
|
|
|
c = rdr.curr();
|
|
|
|
if (c == '.') {
|
2011-03-21 17:12:05 -07:00
|
|
|
// Parse a floating-point number.
|
2011-06-15 11:19:50 -07:00
|
|
|
|
2011-03-21 17:12:05 -07:00
|
|
|
rdr.bump();
|
2011-03-24 17:12:24 -07:00
|
|
|
auto dec_part = scan_dec_digits(rdr);
|
|
|
|
auto float_str = dec_str + "." + dec_part;
|
2011-03-22 17:25:40 -07:00
|
|
|
c = rdr.curr();
|
2011-03-24 17:12:24 -07:00
|
|
|
auto exponent_str = scan_exponent(rdr);
|
|
|
|
alt (exponent_str) {
|
2011-06-15 11:19:50 -07:00
|
|
|
case (some(?s)) { float_str += s; }
|
|
|
|
case (none) { }
|
2011-03-22 17:25:40 -07:00
|
|
|
}
|
|
|
|
c = rdr.curr();
|
|
|
|
if (c == 'f') {
|
|
|
|
rdr.bump();
|
|
|
|
c = rdr.curr();
|
|
|
|
n = rdr.next();
|
|
|
|
if (c == '3' && n == '2') {
|
2011-06-15 11:19:50 -07:00
|
|
|
rdr.bump();
|
|
|
|
rdr.bump();
|
2011-07-05 11:48:19 +02:00
|
|
|
ret token::LIT_MACH_FLOAT(ast::ty_f32,
|
2011-06-15 11:19:50 -07:00
|
|
|
intern(*rdr.get_interner(),
|
|
|
|
float_str));
|
|
|
|
} else if (c == '6' && n == '4') {
|
|
|
|
rdr.bump();
|
|
|
|
rdr.bump();
|
2011-07-05 11:48:19 +02:00
|
|
|
ret token::LIT_MACH_FLOAT(ast::ty_f64,
|
2011-06-15 11:19:50 -07:00
|
|
|
intern(*rdr.get_interner(),
|
2011-06-16 16:55:46 -07:00
|
|
|
float_str));
|
2011-03-24 17:12:24 -07:00
|
|
|
/* FIXME: if this is out of range for either a 32-bit or
|
|
|
|
64-bit float, it won't be noticed till the back-end */
|
2011-06-15 11:19:50 -07:00
|
|
|
|
2011-03-22 17:25:40 -07:00
|
|
|
}
|
2011-06-15 11:19:50 -07:00
|
|
|
} else {
|
2011-05-16 14:53:00 -07:00
|
|
|
ret token::LIT_FLOAT(interner::intern[str](*rdr.get_interner(),
|
|
|
|
float_str));
|
2011-03-22 17:25:40 -07:00
|
|
|
}
|
2011-03-21 17:12:05 -07:00
|
|
|
}
|
2011-03-22 17:25:40 -07:00
|
|
|
auto maybe_exponent = scan_exponent(rdr);
|
2011-06-15 11:19:50 -07:00
|
|
|
alt (maybe_exponent) {
|
|
|
|
case (some(?s)) {
|
2011-05-16 14:53:00 -07:00
|
|
|
ret token::LIT_FLOAT(interner::intern[str](*rdr.get_interner(),
|
|
|
|
dec_str + s));
|
2011-03-22 17:25:40 -07:00
|
|
|
}
|
2011-06-15 11:19:50 -07:00
|
|
|
case (none) { ret token::LIT_INT(accum_int); }
|
2011-03-21 17:12:05 -07:00
|
|
|
}
|
2010-11-22 17:41:26 -08:00
|
|
|
}
|
|
|
|
|
2011-05-23 22:46:39 +02:00
|
|
|
fn scan_numeric_escape(&reader rdr, uint n_hex_digits) -> char {
|
2011-04-13 11:25:01 -07:00
|
|
|
auto accum_int = 0;
|
2011-05-23 22:46:39 +02:00
|
|
|
while (n_hex_digits != 0u) {
|
|
|
|
auto n = rdr.curr();
|
|
|
|
rdr.bump();
|
2011-04-13 11:25:01 -07:00
|
|
|
if (!is_hex_digit(n)) {
|
2011-05-11 23:04:16 -04:00
|
|
|
rdr.err(#fmt("illegal numeric character escape: %d", n as int));
|
2011-04-13 11:25:01 -07:00
|
|
|
fail;
|
|
|
|
}
|
|
|
|
accum_int *= 16;
|
|
|
|
accum_int += hex_digit_val(n);
|
2011-05-23 22:46:39 +02:00
|
|
|
n_hex_digits -= 1u;
|
2011-04-13 11:25:01 -07:00
|
|
|
}
|
|
|
|
ret accum_int as char;
|
|
|
|
}
|
|
|
|
|
2011-05-17 20:45:49 +02:00
|
|
|
fn next_token(&reader rdr) -> token::token {
|
2010-08-20 15:36:48 -07:00
|
|
|
auto accum_str = "";
|
2011-05-30 14:10:54 -07:00
|
|
|
consume_whitespace_and_comments(rdr);
|
2011-05-12 17:24:54 +02:00
|
|
|
if (rdr.is_eof()) { ret token::EOF; }
|
2010-12-09 17:11:05 -08:00
|
|
|
rdr.mark();
|
2010-08-27 12:36:57 -07:00
|
|
|
auto c = rdr.curr();
|
2010-10-13 11:02:56 -07:00
|
|
|
if (is_alpha(c) || c == '_') {
|
|
|
|
while (is_alnum(c) || c == '_') {
|
2011-05-17 20:41:41 +02:00
|
|
|
str::push_char(accum_str, c);
|
2010-08-27 12:36:57 -07:00
|
|
|
rdr.bump();
|
2010-08-31 11:36:36 -07:00
|
|
|
c = rdr.curr();
|
2010-08-20 11:41:34 -07:00
|
|
|
}
|
2011-06-15 11:19:50 -07:00
|
|
|
if (str::eq(accum_str, "_")) { ret token::UNDERSCORE; }
|
2011-05-14 12:54:31 +02:00
|
|
|
auto is_mod_name = c == ':' && rdr.next() == ':';
|
2011-05-16 14:53:00 -07:00
|
|
|
ret token::IDENT(interner::intern[str](*rdr.get_interner(),
|
|
|
|
accum_str), is_mod_name);
|
2010-08-20 10:03:31 -07:00
|
|
|
}
|
2011-06-15 11:19:50 -07:00
|
|
|
if (is_dec_digit(c)) { ret scan_number(c, rdr); }
|
2011-05-17 20:45:49 +02:00
|
|
|
fn binop(&reader rdr, token::binop op) -> token::token {
|
2010-08-27 12:36:57 -07:00
|
|
|
rdr.bump();
|
2010-12-08 14:50:47 -08:00
|
|
|
if (rdr.curr() == '=') {
|
2010-08-27 12:36:57 -07:00
|
|
|
rdr.bump();
|
2011-05-12 17:24:54 +02:00
|
|
|
ret token::BINOPEQ(op);
|
2011-06-15 11:19:50 -07:00
|
|
|
} else { ret token::BINOP(op); }
|
2010-08-23 19:17:04 -07:00
|
|
|
}
|
2010-08-20 14:34:48 -07:00
|
|
|
alt (c) {
|
2011-06-29 18:07:04 -07:00
|
|
|
// One-byte tokens.
|
|
|
|
case ('?') { rdr.bump(); ret token::QUES; }
|
2011-05-12 17:24:54 +02:00
|
|
|
case (';') { rdr.bump(); ret token::SEMI; }
|
|
|
|
case (',') { rdr.bump(); ret token::COMMA; }
|
2011-07-13 15:44:09 -07:00
|
|
|
case ('.') {
|
|
|
|
rdr.bump();
|
2011-07-08 16:35:09 -07:00
|
|
|
if (rdr.curr() == '.' && rdr.next() == '.') {
|
|
|
|
rdr.bump();
|
|
|
|
rdr.bump();
|
2011-07-11 16:13:17 -07:00
|
|
|
ret token::ELLIPSIS;
|
2011-07-08 16:35:09 -07:00
|
|
|
}
|
|
|
|
ret token::DOT;
|
|
|
|
}
|
2011-05-12 17:24:54 +02:00
|
|
|
case ('(') { rdr.bump(); ret token::LPAREN; }
|
|
|
|
case (')') { rdr.bump(); ret token::RPAREN; }
|
|
|
|
case ('{') { rdr.bump(); ret token::LBRACE; }
|
|
|
|
case ('}') { rdr.bump(); ret token::RBRACE; }
|
|
|
|
case ('[') { rdr.bump(); ret token::LBRACKET; }
|
|
|
|
case (']') { rdr.bump(); ret token::RBRACKET; }
|
|
|
|
case ('@') { rdr.bump(); ret token::AT; }
|
2011-07-13 15:44:09 -07:00
|
|
|
case ('#') {
|
|
|
|
rdr.bump();
|
2011-06-29 18:07:04 -07:00
|
|
|
if (rdr.curr() == '<') {
|
|
|
|
rdr.bump();
|
|
|
|
ret token::POUND_LT;
|
|
|
|
}
|
|
|
|
if (rdr.curr() == '{') {
|
|
|
|
rdr.bump();
|
|
|
|
ret token::POUND_LBRACE;
|
|
|
|
}
|
|
|
|
ret token::POUND;
|
|
|
|
}
|
2011-05-12 17:24:54 +02:00
|
|
|
case ('~') { rdr.bump(); ret token::TILDE; }
|
2011-05-12 17:20:07 +02:00
|
|
|
case (':') {
|
|
|
|
rdr.bump();
|
|
|
|
if (rdr.curr() == ':') {
|
|
|
|
rdr.bump();
|
2011-05-12 17:24:54 +02:00
|
|
|
ret token::MOD_SEP;
|
2011-06-15 11:19:50 -07:00
|
|
|
} else { ret token::COLON; }
|
2011-05-12 17:20:07 +02:00
|
|
|
}
|
2011-06-29 18:07:04 -07:00
|
|
|
// Multi-byte tokens.
|
|
|
|
case ('=') {
|
2010-08-31 13:29:00 -07:00
|
|
|
rdr.bump();
|
|
|
|
if (rdr.curr() == '=') {
|
2010-08-27 12:36:57 -07:00
|
|
|
rdr.bump();
|
2011-05-12 17:24:54 +02:00
|
|
|
ret token::EQEQ;
|
2011-06-15 11:19:50 -07:00
|
|
|
} else { ret token::EQ; }
|
2010-08-31 13:29:00 -07:00
|
|
|
}
|
|
|
|
case ('!') {
|
|
|
|
rdr.bump();
|
|
|
|
if (rdr.curr() == '=') {
|
2010-08-27 12:36:57 -07:00
|
|
|
rdr.bump();
|
2011-05-12 17:24:54 +02:00
|
|
|
ret token::NE;
|
2011-06-15 11:19:50 -07:00
|
|
|
} else { ret token::NOT; }
|
2010-08-31 13:29:00 -07:00
|
|
|
}
|
|
|
|
case ('<') {
|
|
|
|
rdr.bump();
|
|
|
|
alt (rdr.curr()) {
|
2011-06-15 11:19:50 -07:00
|
|
|
case ('=') { rdr.bump(); ret token::LE; }
|
|
|
|
case ('<') { ret binop(rdr, token::LSL); }
|
|
|
|
case ('|') { rdr.bump(); ret token::SEND; }
|
2011-06-13 15:48:55 -07:00
|
|
|
case ('-') {
|
|
|
|
rdr.bump();
|
|
|
|
alt (rdr.curr()) {
|
|
|
|
case ('>') { rdr.bump(); ret token::DARROW; }
|
|
|
|
case (_) { ret token::LARROW; }
|
|
|
|
}
|
|
|
|
}
|
2011-06-15 11:19:50 -07:00
|
|
|
case (_) { ret token::LT; }
|
2010-08-31 13:29:00 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
case ('>') {
|
|
|
|
rdr.bump();
|
|
|
|
alt (rdr.curr()) {
|
2011-06-15 11:19:50 -07:00
|
|
|
case ('=') { rdr.bump(); ret token::GE; }
|
2010-08-31 13:29:00 -07:00
|
|
|
case ('>') {
|
|
|
|
if (rdr.next() == '>') {
|
|
|
|
rdr.bump();
|
2011-05-12 17:24:54 +02:00
|
|
|
ret binop(rdr, token::ASR);
|
2011-06-15 11:19:50 -07:00
|
|
|
} else { ret binop(rdr, token::LSR); }
|
2010-08-31 13:29:00 -07:00
|
|
|
}
|
2011-06-15 11:19:50 -07:00
|
|
|
case (_) { ret token::GT; }
|
2010-08-20 15:36:48 -07:00
|
|
|
}
|
|
|
|
}
|
2010-08-24 08:56:42 -07:00
|
|
|
case ('\'') {
|
2010-08-27 12:36:57 -07:00
|
|
|
rdr.bump();
|
|
|
|
auto c2 = rdr.curr();
|
2011-05-23 22:46:39 +02:00
|
|
|
rdr.bump();
|
2010-08-24 08:56:42 -07:00
|
|
|
if (c2 == '\\') {
|
2011-05-23 22:46:39 +02:00
|
|
|
auto escaped = rdr.curr();
|
|
|
|
rdr.bump();
|
|
|
|
alt (escaped) {
|
2011-04-13 11:25:01 -07:00
|
|
|
case ('n') { c2 = '\n'; }
|
|
|
|
case ('r') { c2 = '\r'; }
|
|
|
|
case ('t') { c2 = '\t'; }
|
|
|
|
case ('\\') { c2 = '\\'; }
|
|
|
|
case ('\'') { c2 = '\''; }
|
2011-05-23 22:46:39 +02:00
|
|
|
case ('x') { c2 = scan_numeric_escape(rdr, 2u); }
|
|
|
|
case ('u') { c2 = scan_numeric_escape(rdr, 4u); }
|
|
|
|
case ('U') { c2 = scan_numeric_escape(rdr, 8u); }
|
2010-09-20 23:56:43 -07:00
|
|
|
case (?c2) {
|
2011-05-11 23:04:16 -04:00
|
|
|
rdr.err(#fmt("unknown character escape: %d",
|
|
|
|
c2 as int));
|
2010-08-24 08:56:42 -07:00
|
|
|
fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2011-05-23 22:46:39 +02:00
|
|
|
if (rdr.curr() != '\'') {
|
2011-05-11 23:04:16 -04:00
|
|
|
rdr.err("unterminated character constant");
|
2010-08-24 08:56:42 -07:00
|
|
|
fail;
|
|
|
|
}
|
2011-04-13 11:25:01 -07:00
|
|
|
rdr.bump(); // advance curr past token
|
2011-06-15 11:19:50 -07:00
|
|
|
|
2011-05-12 17:24:54 +02:00
|
|
|
ret token::LIT_CHAR(c2);
|
2010-08-24 08:56:42 -07:00
|
|
|
}
|
|
|
|
case ('"') {
|
2010-08-27 12:36:57 -07:00
|
|
|
rdr.bump();
|
|
|
|
while (rdr.curr() != '"') {
|
2011-05-23 22:46:39 +02:00
|
|
|
auto ch = rdr.curr();
|
|
|
|
rdr.bump();
|
|
|
|
alt (ch) {
|
2010-08-24 08:56:42 -07:00
|
|
|
case ('\\') {
|
2011-05-23 22:46:39 +02:00
|
|
|
auto escaped = rdr.curr();
|
|
|
|
rdr.bump();
|
|
|
|
alt (escaped) {
|
2010-08-27 13:36:18 -07:00
|
|
|
case ('n') {
|
2011-05-17 20:41:41 +02:00
|
|
|
str::push_byte(accum_str, '\n' as u8);
|
2010-08-27 13:36:18 -07:00
|
|
|
}
|
|
|
|
case ('r') {
|
2011-05-17 20:41:41 +02:00
|
|
|
str::push_byte(accum_str, '\r' as u8);
|
2010-08-27 13:36:18 -07:00
|
|
|
}
|
|
|
|
case ('t') {
|
2011-05-17 20:41:41 +02:00
|
|
|
str::push_byte(accum_str, '\t' as u8);
|
2010-08-27 13:36:18 -07:00
|
|
|
}
|
|
|
|
case ('\\') {
|
2011-05-17 20:41:41 +02:00
|
|
|
str::push_byte(accum_str, '\\' as u8);
|
2010-08-27 13:36:18 -07:00
|
|
|
}
|
|
|
|
case ('"') {
|
2011-05-17 20:41:41 +02:00
|
|
|
str::push_byte(accum_str, '"' as u8);
|
2010-08-27 13:36:18 -07:00
|
|
|
}
|
2011-06-15 11:19:50 -07:00
|
|
|
case ('\n') { consume_whitespace(rdr); }
|
2011-04-13 11:25:01 -07:00
|
|
|
case ('x') {
|
2011-05-17 20:41:41 +02:00
|
|
|
str::push_char(accum_str,
|
2011-05-23 22:46:39 +02:00
|
|
|
scan_numeric_escape(rdr, 2u));
|
2011-04-13 11:25:01 -07:00
|
|
|
}
|
|
|
|
case ('u') {
|
2011-05-17 20:41:41 +02:00
|
|
|
str::push_char(accum_str,
|
2011-05-23 22:46:39 +02:00
|
|
|
scan_numeric_escape(rdr, 4u));
|
2011-04-13 11:25:01 -07:00
|
|
|
}
|
|
|
|
case ('U') {
|
2011-05-17 20:41:41 +02:00
|
|
|
str::push_char(accum_str,
|
2011-05-23 22:46:39 +02:00
|
|
|
scan_numeric_escape(rdr, 8u));
|
2011-04-13 11:25:01 -07:00
|
|
|
}
|
2010-09-20 23:56:43 -07:00
|
|
|
case (?c2) {
|
2011-05-11 23:04:16 -04:00
|
|
|
rdr.err(#fmt("unknown string escape: %d",
|
|
|
|
c2 as int));
|
2010-08-24 08:56:42 -07:00
|
|
|
fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2011-06-15 11:19:50 -07:00
|
|
|
case (_) { str::push_char(accum_str, ch); }
|
2010-08-24 08:56:42 -07:00
|
|
|
}
|
|
|
|
}
|
2010-08-27 12:36:57 -07:00
|
|
|
rdr.bump();
|
2011-05-16 14:53:00 -07:00
|
|
|
ret token::LIT_STR(interner::intern[str](*rdr.get_interner(),
|
|
|
|
accum_str));
|
2010-08-24 08:56:42 -07:00
|
|
|
}
|
2010-08-23 19:17:04 -07:00
|
|
|
case ('-') {
|
2010-08-27 12:36:57 -07:00
|
|
|
if (rdr.next() == '>') {
|
|
|
|
rdr.bump();
|
|
|
|
rdr.bump();
|
2011-05-12 17:24:54 +02:00
|
|
|
ret token::RARROW;
|
2011-06-15 11:19:50 -07:00
|
|
|
} else { ret binop(rdr, token::MINUS); }
|
2010-08-23 19:17:04 -07:00
|
|
|
}
|
|
|
|
case ('&') {
|
2010-08-27 12:36:57 -07:00
|
|
|
if (rdr.next() == '&') {
|
|
|
|
rdr.bump();
|
|
|
|
rdr.bump();
|
2011-05-12 17:24:54 +02:00
|
|
|
ret token::ANDAND;
|
2011-06-15 11:19:50 -07:00
|
|
|
} else { ret binop(rdr, token::AND); }
|
2010-08-31 13:29:00 -07:00
|
|
|
}
|
|
|
|
case ('|') {
|
2011-05-26 18:16:24 -07:00
|
|
|
alt (rdr.next()) {
|
2011-06-15 11:19:50 -07:00
|
|
|
case ('|') { rdr.bump(); rdr.bump(); ret token::OROR; }
|
|
|
|
case ('>') { rdr.bump(); rdr.bump(); ret token::RECV; }
|
|
|
|
case (_) { ret binop(rdr, token::OR); }
|
2010-08-23 19:17:04 -07:00
|
|
|
}
|
|
|
|
}
|
2011-06-15 11:19:50 -07:00
|
|
|
case ('+') { ret binop(rdr, token::PLUS); }
|
|
|
|
case ('*') { ret binop(rdr, token::STAR); }
|
|
|
|
case ('/') { ret binop(rdr, token::SLASH); }
|
|
|
|
case ('^') { ret binop(rdr, token::CARET); }
|
|
|
|
case ('%') { ret binop(rdr, token::PERCENT); }
|
2011-04-13 11:25:01 -07:00
|
|
|
case (?c) {
|
2011-05-11 23:04:16 -04:00
|
|
|
rdr.err(#fmt("unkown start of token: %d", c as int));
|
2011-04-13 11:25:01 -07:00
|
|
|
fail;
|
|
|
|
}
|
2010-08-20 14:34:48 -07:00
|
|
|
}
|
2011-03-05 20:05:02 +00:00
|
|
|
fail;
|
2010-07-14 09:41:36 -07:00
|
|
|
}
|
|
|
|
|
2011-05-30 14:10:54 -07:00
|
|
|
tag cmnt_style {
|
2011-06-15 11:19:50 -07:00
|
|
|
isolated; // No code on either side of each line of the comment
|
|
|
|
|
|
|
|
trailing; // Code exists to the left of the comment
|
|
|
|
|
|
|
|
mixed; // Code before /* foo */ and after the comment
|
|
|
|
|
2011-06-30 17:25:13 -07:00
|
|
|
blank_line; // Just a manual blank linke "\n\n", for layout
|
|
|
|
|
2011-03-24 16:33:20 +01:00
|
|
|
}
|
2011-04-08 18:44:20 +02:00
|
|
|
|
2011-07-14 17:26:10 -07:00
|
|
|
type cmnt = rec(cmnt_style style, str[] lines, uint pos);
|
2011-03-24 16:33:20 +01:00
|
|
|
|
2011-05-30 14:10:54 -07:00
|
|
|
fn read_to_eol(&reader rdr) -> str {
|
|
|
|
auto val = "";
|
|
|
|
while (rdr.curr() != '\n' && !rdr.is_eof()) {
|
|
|
|
str::push_char(val, rdr.curr());
|
|
|
|
rdr.bump();
|
|
|
|
}
|
|
|
|
ret val;
|
2011-03-24 16:33:20 +01:00
|
|
|
}
|
|
|
|
|
2011-05-30 14:10:54 -07:00
|
|
|
fn read_one_line_comment(&reader rdr) -> str {
|
|
|
|
auto val = read_to_eol(rdr);
|
2011-06-15 11:19:50 -07:00
|
|
|
assert (val.(0) == '/' as u8 && val.(1) == '/' as u8);
|
2011-05-30 14:10:54 -07:00
|
|
|
ret val;
|
|
|
|
}
|
|
|
|
|
|
|
|
fn consume_whitespace(&reader rdr) {
|
2011-06-15 11:19:50 -07:00
|
|
|
while (is_whitespace(rdr.curr()) && !rdr.is_eof()) { rdr.bump(); }
|
2011-03-24 16:33:20 +01:00
|
|
|
}
|
|
|
|
|
2011-05-30 14:10:54 -07:00
|
|
|
fn consume_non_eol_whitespace(&reader rdr) {
|
2011-06-15 11:19:50 -07:00
|
|
|
while (is_whitespace(rdr.curr()) && rdr.curr() != '\n' && !rdr.is_eof()) {
|
2011-05-30 14:10:54 -07:00
|
|
|
rdr.bump();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-06-30 17:25:13 -07:00
|
|
|
fn consume_whitespace_counting_blank_lines(&reader rdr,
|
2011-07-12 16:13:30 -07:00
|
|
|
&mutable cmnt[] comments) {
|
2011-06-30 17:25:13 -07:00
|
|
|
while (is_whitespace(rdr.curr()) && !rdr.is_eof()) {
|
|
|
|
if (rdr.curr() == '\n' && rdr.next() == '\n') {
|
|
|
|
log ">>> blank-line comment";
|
2011-07-14 17:26:10 -07:00
|
|
|
let str[] v = ~[];
|
2011-07-12 16:13:30 -07:00
|
|
|
comments += ~[rec(style=blank_line, lines=v,
|
|
|
|
pos=rdr.get_chpos())];
|
2011-06-30 17:25:13 -07:00
|
|
|
}
|
|
|
|
rdr.bump();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-05-30 14:10:54 -07:00
|
|
|
fn read_line_comments(&reader rdr, bool code_to_the_left) -> cmnt {
|
|
|
|
log ">>> line comments";
|
2011-04-08 18:44:20 +02:00
|
|
|
auto p = rdr.get_chpos();
|
2011-07-14 17:26:10 -07:00
|
|
|
let str[] lines = ~[];
|
2011-05-30 14:10:54 -07:00
|
|
|
while (rdr.curr() == '/' && rdr.next() == '/') {
|
2011-06-15 11:19:50 -07:00
|
|
|
auto line = read_one_line_comment(rdr);
|
2011-05-31 15:31:50 -07:00
|
|
|
log line;
|
2011-07-14 17:26:10 -07:00
|
|
|
lines += ~[line];
|
2011-05-30 14:10:54 -07:00
|
|
|
consume_non_eol_whitespace(rdr);
|
|
|
|
}
|
|
|
|
log "<<< line comments";
|
2011-06-15 11:19:50 -07:00
|
|
|
ret rec(style=if (code_to_the_left) { trailing } else { isolated },
|
|
|
|
lines=lines,
|
2011-05-30 14:10:54 -07:00
|
|
|
pos=p);
|
|
|
|
}
|
|
|
|
|
|
|
|
fn all_whitespace(&str s, uint begin, uint end) -> bool {
|
|
|
|
let uint i = begin;
|
|
|
|
while (i != end) {
|
2011-06-15 11:19:50 -07:00
|
|
|
if (!is_whitespace(s.(i) as char)) { ret false; }
|
2011-05-30 14:10:54 -07:00
|
|
|
i += 1u;
|
|
|
|
}
|
|
|
|
ret true;
|
|
|
|
}
|
|
|
|
|
2011-07-14 17:26:10 -07:00
|
|
|
fn trim_whitespace_prefix_and_push_line(&mutable str[] lines, &str s,
|
2011-06-15 11:19:50 -07:00
|
|
|
uint col) {
|
2011-05-30 14:10:54 -07:00
|
|
|
auto s1;
|
|
|
|
if (all_whitespace(s, 0u, col)) {
|
|
|
|
if (col < str::byte_len(s)) {
|
|
|
|
s1 = str::slice(s, col, str::byte_len(s));
|
2011-06-15 11:19:50 -07:00
|
|
|
} else { s1 = ""; }
|
|
|
|
} else { s1 = s; }
|
2011-05-30 14:10:54 -07:00
|
|
|
log "pushing line: " + s1;
|
2011-07-14 17:26:10 -07:00
|
|
|
lines += ~[s1];
|
2011-05-30 14:10:54 -07:00
|
|
|
}
|
|
|
|
|
2011-06-15 11:19:50 -07:00
|
|
|
fn read_block_comment(&reader rdr, bool code_to_the_left) -> cmnt {
|
2011-05-30 14:10:54 -07:00
|
|
|
log ">>> block comment";
|
|
|
|
auto p = rdr.get_chpos();
|
2011-07-14 17:26:10 -07:00
|
|
|
let str[] lines = ~[];
|
2011-05-30 14:10:54 -07:00
|
|
|
let uint col = rdr.get_col();
|
|
|
|
rdr.bump();
|
|
|
|
rdr.bump();
|
|
|
|
auto curr_line = "/*";
|
|
|
|
let int level = 1;
|
|
|
|
while (level > 0) {
|
|
|
|
log #fmt("=== block comment level %d", level);
|
2011-06-15 11:19:50 -07:00
|
|
|
if (rdr.is_eof()) { rdr.err("unterminated block comment"); fail; }
|
2011-05-30 14:10:54 -07:00
|
|
|
if (rdr.curr() == '\n') {
|
|
|
|
trim_whitespace_prefix_and_push_line(lines, curr_line, col);
|
|
|
|
curr_line = "";
|
|
|
|
rdr.bump();
|
|
|
|
} else {
|
|
|
|
str::push_char(curr_line, rdr.curr());
|
|
|
|
if (rdr.curr() == '/' && rdr.next() == '*') {
|
|
|
|
rdr.bump();
|
|
|
|
rdr.bump();
|
|
|
|
curr_line += "*";
|
|
|
|
level += 1;
|
|
|
|
} else {
|
|
|
|
if (rdr.curr() == '*' && rdr.next() == '/') {
|
|
|
|
rdr.bump();
|
|
|
|
rdr.bump();
|
|
|
|
curr_line += "/";
|
|
|
|
level -= 1;
|
2011-06-15 11:19:50 -07:00
|
|
|
} else { rdr.bump(); }
|
2011-05-30 14:10:54 -07:00
|
|
|
}
|
|
|
|
}
|
2011-03-24 16:33:20 +01:00
|
|
|
}
|
2011-05-30 14:10:54 -07:00
|
|
|
if (str::byte_len(curr_line) != 0u) {
|
|
|
|
trim_whitespace_prefix_and_push_line(lines, curr_line, col);
|
|
|
|
}
|
|
|
|
auto style = if (code_to_the_left) { trailing } else { isolated };
|
|
|
|
consume_non_eol_whitespace(rdr);
|
2011-07-14 17:26:10 -07:00
|
|
|
if (!rdr.is_eof() && rdr.curr() != '\n' && ivec::len(lines) == 1u) {
|
2011-05-30 14:10:54 -07:00
|
|
|
style = mixed;
|
|
|
|
}
|
|
|
|
log "<<< block comment";
|
2011-06-15 11:19:50 -07:00
|
|
|
ret rec(style=style, lines=lines, pos=p);
|
2011-05-30 14:10:54 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
fn peeking_at_comment(&reader rdr) -> bool {
|
2011-06-15 11:19:50 -07:00
|
|
|
ret rdr.curr() == '/' && rdr.next() == '/' ||
|
|
|
|
rdr.curr() == '/' && rdr.next() == '*';
|
2011-05-30 14:10:54 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
fn consume_comment(&reader rdr, bool code_to_the_left,
|
2011-07-12 16:13:30 -07:00
|
|
|
&mutable cmnt[] comments) {
|
2011-05-30 14:10:54 -07:00
|
|
|
log ">>> consume comment";
|
|
|
|
if (rdr.curr() == '/' && rdr.next() == '/') {
|
2011-07-12 16:13:30 -07:00
|
|
|
comments += ~[read_line_comments(rdr, code_to_the_left)];
|
2011-05-30 14:10:54 -07:00
|
|
|
} else if (rdr.curr() == '/' && rdr.next() == '*') {
|
2011-07-12 16:13:30 -07:00
|
|
|
comments += ~[read_block_comment(rdr, code_to_the_left)];
|
2011-05-30 14:10:54 -07:00
|
|
|
} else { fail; }
|
|
|
|
log "<<< consume comment";
|
2011-03-24 16:33:20 +01:00
|
|
|
}
|
|
|
|
|
2011-06-02 17:18:43 -07:00
|
|
|
fn is_lit(&token::token t) -> bool {
|
|
|
|
ret alt (t) {
|
2011-06-15 11:19:50 -07:00
|
|
|
case (token::LIT_INT(_)) { true }
|
|
|
|
case (token::LIT_UINT(_)) { true }
|
|
|
|
case (token::LIT_MACH_INT(_, _)) { true }
|
|
|
|
case (token::LIT_FLOAT(_)) { true }
|
|
|
|
case (token::LIT_MACH_FLOAT(_, _)) { true }
|
|
|
|
case (token::LIT_STR(_)) { true }
|
|
|
|
case (token::LIT_CHAR(_)) { true }
|
|
|
|
case (token::LIT_BOOL(_)) { true }
|
|
|
|
case (_) { false }
|
|
|
|
}
|
2011-06-02 17:18:43 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
type lit = rec(str lit, uint pos);
|
|
|
|
|
2011-07-12 16:13:30 -07:00
|
|
|
fn gather_comments_and_literals(&codemap::codemap cm, str path)
|
|
|
|
-> rec(cmnt[] cmnts, lit[] lits) {
|
2011-07-12 10:59:18 -07:00
|
|
|
auto srdr = ioivec::file_reader(path);
|
|
|
|
auto src = str::unsafe_from_bytes_ivec(srdr.read_whole_stream());
|
2011-05-20 13:57:09 -07:00
|
|
|
auto itr = @interner::mk[str](str::hash, str::eq);
|
2011-07-16 02:01:10 -04:00
|
|
|
auto rdr = new_reader(cm, src, codemap::new_filemap(path, 0u, 0u), itr);
|
2011-07-12 16:13:30 -07:00
|
|
|
let cmnt[] comments = ~[];
|
|
|
|
let lit[] literals = ~[];
|
2011-06-14 11:11:22 -07:00
|
|
|
let bool first_read = true;
|
2011-03-24 16:33:20 +01:00
|
|
|
while (!rdr.is_eof()) {
|
|
|
|
while (true) {
|
2011-06-14 11:11:22 -07:00
|
|
|
auto code_to_the_left = !first_read;
|
2011-05-30 14:10:54 -07:00
|
|
|
consume_non_eol_whitespace(rdr);
|
2011-05-31 15:31:50 -07:00
|
|
|
if (rdr.curr() == '\n') {
|
2011-05-30 14:10:54 -07:00
|
|
|
code_to_the_left = false;
|
2011-06-30 17:25:13 -07:00
|
|
|
consume_whitespace_counting_blank_lines(rdr, comments);
|
2011-05-30 14:10:54 -07:00
|
|
|
}
|
|
|
|
while (peeking_at_comment(rdr)) {
|
|
|
|
consume_comment(rdr, code_to_the_left, comments);
|
2011-06-30 17:25:13 -07:00
|
|
|
consume_whitespace_counting_blank_lines(rdr, comments);
|
2011-05-30 14:10:54 -07:00
|
|
|
}
|
|
|
|
break;
|
2011-03-24 16:33:20 +01:00
|
|
|
}
|
2011-06-30 17:25:13 -07:00
|
|
|
auto tok = next_token(rdr);
|
|
|
|
if (is_lit(tok)) {
|
2011-07-12 16:13:30 -07:00
|
|
|
literals += ~[rec(lit=rdr.get_mark_str(),
|
|
|
|
pos=rdr.get_mark_chpos())];
|
2011-06-02 17:18:43 -07:00
|
|
|
}
|
2011-06-30 17:25:13 -07:00
|
|
|
log "tok: " + token::to_str(rdr, tok);
|
2011-06-14 11:11:22 -07:00
|
|
|
first_read = false;
|
2011-03-24 16:33:20 +01:00
|
|
|
}
|
2011-06-02 17:18:43 -07:00
|
|
|
ret rec(cmnts=comments, lits=literals);
|
2011-06-15 11:19:50 -07:00
|
|
|
}
|
2010-08-12 10:29:23 -07:00
|
|
|
//
|
|
|
|
// Local Variables:
|
|
|
|
// mode: rust
|
|
|
|
// fill-column: 78;
|
|
|
|
// indent-tabs-mode: nil
|
|
|
|
// c-basic-offset: 4
|
|
|
|
// buffer-file-coding-system: utf-8-unix
|
2011-03-25 15:07:27 -07:00
|
|
|
// compile-command: "make -k -C $RBUILD 2>&1 | sed -e 's/\\/x\\//x:\\//g'";
|
2010-08-12 10:29:23 -07:00
|
|
|
// End:
|
|
|
|
//
|