2010-08-18 15:41:13 -07:00
|
|
|
import std._io.stdio_reader;
|
2010-07-14 09:41:36 -07:00
|
|
|
|
2010-08-19 18:42:17 -07:00
|
|
|
fn in_range(char c, char lo, char hi) -> bool {
|
2010-08-20 12:12:37 -07:00
|
|
|
ret lo <= c && c <= hi;
|
2010-08-19 18:42:17 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
fn is_alpha(char c) -> bool {
|
|
|
|
ret in_range(c, 'a', 'z') ||
|
|
|
|
in_range(c, 'A', 'Z');
|
|
|
|
}
|
|
|
|
|
|
|
|
fn is_dec_digit(char c) -> bool {
|
|
|
|
ret in_range(c, '0', '9');
|
|
|
|
}
|
|
|
|
|
|
|
|
fn is_hex_digit(char c) -> bool {
|
|
|
|
ret in_range(c, '0', '9') ||
|
|
|
|
in_range(c, 'a', 'f') ||
|
|
|
|
in_range(c, 'A', 'F');
|
|
|
|
}
|
|
|
|
|
|
|
|
fn is_bin_digit(char c) -> bool {
|
|
|
|
ret c == '0' || c == '1';
|
|
|
|
}
|
|
|
|
|
|
|
|
fn is_whitespace(char c) -> bool {
|
2010-08-20 15:36:48 -07:00
|
|
|
ret c == ' ' || c == '\t' || c == '\r' || c == '\n';
|
2010-08-19 18:42:17 -07:00
|
|
|
}
|
|
|
|
|
2010-08-23 19:17:04 -07:00
|
|
|
fn consume_any_whitespace(stdio_reader rdr, char c) -> char {
|
|
|
|
auto c1 = c;
|
|
|
|
while (is_whitespace(c1)) {
|
|
|
|
c1 = rdr.getc() as char;
|
|
|
|
}
|
|
|
|
be consume_any_line_comment(rdr, c1);
|
|
|
|
}
|
|
|
|
|
|
|
|
fn consume_any_line_comment(stdio_reader rdr, char c) -> char {
|
|
|
|
auto c1 = c;
|
|
|
|
if (c1 == '/') {
|
|
|
|
auto c2 = rdr.getc() as char;
|
|
|
|
if (c2 == '/') {
|
|
|
|
while (c1 != '\n') {
|
|
|
|
c1 = rdr.getc() as char;
|
|
|
|
}
|
|
|
|
// Restart whitespace munch.
|
|
|
|
be consume_any_whitespace(rdr, c1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ret c;
|
|
|
|
}
|
|
|
|
|
2010-08-18 15:41:13 -07:00
|
|
|
fn next_token(stdio_reader rdr) -> token.token {
|
2010-08-19 18:42:17 -07:00
|
|
|
auto eof = (-1) as char;
|
|
|
|
auto c = rdr.getc() as char;
|
2010-08-20 15:36:48 -07:00
|
|
|
auto accum_str = "";
|
|
|
|
auto accum_int = 0;
|
2010-08-19 18:42:17 -07:00
|
|
|
|
2010-08-24 08:50:56 -07:00
|
|
|
fn next(stdio_reader rdr) -> char {
|
|
|
|
ret rdr.getc() as char;
|
|
|
|
}
|
|
|
|
|
|
|
|
fn forget(stdio_reader rdr, char c) {
|
|
|
|
rdr.ungetc(c as int);
|
|
|
|
}
|
|
|
|
|
2010-08-23 19:17:04 -07:00
|
|
|
c = consume_any_whitespace(rdr, c);
|
2010-08-19 18:42:17 -07:00
|
|
|
|
|
|
|
if (c == eof) { ret token.EOF(); }
|
2010-08-20 12:12:37 -07:00
|
|
|
|
2010-08-20 10:03:31 -07:00
|
|
|
if (is_alpha(c)) {
|
2010-08-20 11:41:34 -07:00
|
|
|
while (is_alpha(c)) {
|
2010-08-20 15:36:48 -07:00
|
|
|
accum_str += (c as u8);
|
2010-08-24 08:50:56 -07:00
|
|
|
c = next(rdr);
|
2010-08-20 11:41:34 -07:00
|
|
|
}
|
2010-08-24 08:50:56 -07:00
|
|
|
forget(rdr, c);
|
2010-08-20 15:36:48 -07:00
|
|
|
ret token.IDENT(accum_str);
|
2010-08-20 10:03:31 -07:00
|
|
|
}
|
2010-08-20 11:41:34 -07:00
|
|
|
|
|
|
|
if (is_dec_digit(c)) {
|
|
|
|
if (c == '0') {
|
|
|
|
} else {
|
|
|
|
while (is_dec_digit(c)) {
|
2010-08-20 15:36:48 -07:00
|
|
|
accum_int *= 10;
|
|
|
|
accum_int += (c as int) - ('0' as int);
|
2010-08-24 08:50:56 -07:00
|
|
|
c = next(rdr);
|
2010-08-20 11:41:34 -07:00
|
|
|
}
|
2010-08-24 08:50:56 -07:00
|
|
|
forget(rdr, c);
|
2010-08-20 15:36:48 -07:00
|
|
|
ret token.LIT_INT(accum_int);
|
2010-08-20 11:41:34 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-08-23 19:17:04 -07:00
|
|
|
|
|
|
|
fn op_or_opeq(stdio_reader rdr, char c2,
|
|
|
|
token.op op) -> token.token {
|
|
|
|
if (c2 == '=') {
|
|
|
|
ret token.OPEQ(op);
|
|
|
|
} else {
|
2010-08-24 08:50:56 -07:00
|
|
|
forget(rdr, c2);
|
2010-08-23 19:17:04 -07:00
|
|
|
ret token.OP(op);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-08-20 14:34:48 -07:00
|
|
|
alt (c) {
|
2010-08-23 19:17:04 -07:00
|
|
|
// One-byte tokens.
|
2010-08-20 15:36:48 -07:00
|
|
|
case (';') { ret token.SEMI(); }
|
|
|
|
case (',') { ret token.COMMA(); }
|
|
|
|
case ('.') { ret token.DOT(); }
|
|
|
|
case ('(') { ret token.LPAREN(); }
|
|
|
|
case (')') { ret token.RPAREN(); }
|
|
|
|
case ('{') { ret token.LBRACE(); }
|
|
|
|
case ('}') { ret token.RBRACE(); }
|
|
|
|
case ('[') { ret token.LBRACKET(); }
|
|
|
|
case (']') { ret token.RBRACKET(); }
|
|
|
|
case ('@') { ret token.AT(); }
|
|
|
|
case ('#') { ret token.POUND(); }
|
2010-08-23 19:17:04 -07:00
|
|
|
|
|
|
|
// Multi-byte tokens.
|
2010-08-20 15:36:48 -07:00
|
|
|
case ('=') {
|
2010-08-24 08:50:56 -07:00
|
|
|
auto c2 = next(rdr);
|
2010-08-20 15:36:48 -07:00
|
|
|
if (c2 == '=') {
|
|
|
|
ret token.OP(token.EQEQ());
|
|
|
|
} else {
|
2010-08-24 08:50:56 -07:00
|
|
|
forget(rdr, c2);
|
2010-08-20 15:36:48 -07:00
|
|
|
ret token.OP(token.EQ());
|
|
|
|
}
|
|
|
|
}
|
2010-08-23 19:17:04 -07:00
|
|
|
|
2010-08-24 08:56:42 -07:00
|
|
|
case ('\'') {
|
|
|
|
// FIXME: general utf8-consumption support.
|
|
|
|
auto c2 = next(rdr);
|
|
|
|
if (c2 == '\\') {
|
|
|
|
c2 = next(rdr);
|
|
|
|
alt (c2) {
|
|
|
|
case ('n') { c2 = '\n'; }
|
|
|
|
case ('r') { c2 = '\r'; }
|
|
|
|
case ('t') { c2 = '\t'; }
|
|
|
|
case ('\\') { c2 = '\\'; }
|
|
|
|
case ('\'') { c2 = '\''; }
|
|
|
|
// FIXME: unicode numeric escapes.
|
|
|
|
case (_) {
|
|
|
|
log "unknown character escape";
|
|
|
|
log c2;
|
|
|
|
fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (next(rdr) != '\'') {
|
|
|
|
log "unterminated character constant";
|
|
|
|
fail;
|
|
|
|
}
|
|
|
|
ret token.LIT_CHAR(c2);
|
|
|
|
}
|
|
|
|
|
|
|
|
case ('"') {
|
|
|
|
// FIXME: general utf8-consumption support.
|
|
|
|
auto c2 = next(rdr);
|
|
|
|
while (c2 != '"') {
|
|
|
|
alt (c2) {
|
|
|
|
case ('\\') {
|
|
|
|
c2 = next(rdr);
|
|
|
|
alt (c2) {
|
|
|
|
case ('n') { accum_str += '\n' as u8; }
|
|
|
|
case ('r') { accum_str += '\r' as u8; }
|
|
|
|
case ('t') { accum_str += '\t' as u8; }
|
|
|
|
case ('\\') { accum_str += '\\' as u8; }
|
|
|
|
case ('"') { accum_str += '"' as u8; }
|
|
|
|
// FIXME: unicode numeric escapes.
|
|
|
|
case (_) {
|
|
|
|
log "unknown string escape";
|
|
|
|
log c2;
|
|
|
|
fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
case (_) {
|
|
|
|
accum_str += c2 as u8;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
c2 = next(rdr);
|
|
|
|
}
|
|
|
|
ret token.LIT_STR(accum_str);
|
|
|
|
}
|
|
|
|
|
2010-08-23 19:17:04 -07:00
|
|
|
case ('-') {
|
2010-08-24 08:50:56 -07:00
|
|
|
auto c2 = next(rdr);
|
2010-08-23 19:17:04 -07:00
|
|
|
if (c2 == '>') {
|
|
|
|
ret token.RARROW();
|
|
|
|
} else {
|
|
|
|
ret op_or_opeq(rdr, c2, token.MINUS());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
case ('&') {
|
2010-08-24 08:50:56 -07:00
|
|
|
auto c2 = next(rdr);
|
2010-08-23 19:17:04 -07:00
|
|
|
if (c2 == '&') {
|
|
|
|
ret token.OP(token.ANDAND());
|
|
|
|
} else {
|
|
|
|
ret op_or_opeq(rdr, c2, token.AND());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
case ('+') {
|
2010-08-24 08:50:56 -07:00
|
|
|
ret op_or_opeq(rdr, next(rdr), token.PLUS());
|
2010-08-23 19:17:04 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
case ('*') {
|
2010-08-24 08:50:56 -07:00
|
|
|
ret op_or_opeq(rdr, next(rdr), token.STAR());
|
2010-08-23 19:17:04 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
case ('/') {
|
2010-08-24 08:50:56 -07:00
|
|
|
ret op_or_opeq(rdr, next(rdr), token.STAR());
|
2010-08-23 19:17:04 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
case ('!') {
|
2010-08-24 08:50:56 -07:00
|
|
|
ret op_or_opeq(rdr, next(rdr), token.NOT());
|
2010-08-23 19:17:04 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
case ('^') {
|
2010-08-24 08:50:56 -07:00
|
|
|
ret op_or_opeq(rdr, next(rdr), token.CARET());
|
2010-08-23 19:17:04 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
case ('%') {
|
2010-08-24 08:50:56 -07:00
|
|
|
ret op_or_opeq(rdr, next(rdr), token.PERCENT());
|
2010-08-23 19:17:04 -07:00
|
|
|
}
|
|
|
|
|
2010-08-20 14:34:48 -07:00
|
|
|
}
|
2010-08-20 11:41:34 -07:00
|
|
|
|
|
|
|
log "lexer stopping at ";
|
2010-08-18 15:41:13 -07:00
|
|
|
log c;
|
|
|
|
ret token.EOF();
|
2010-07-14 09:41:36 -07:00
|
|
|
}
|
|
|
|
|
2010-08-18 11:35:34 -07:00
|
|
|
|
2010-08-12 10:29:23 -07:00
|
|
|
//
|
|
|
|
// Local Variables:
|
|
|
|
// mode: rust
|
|
|
|
// fill-column: 78;
|
|
|
|
// indent-tabs-mode: nil
|
|
|
|
// c-basic-offset: 4
|
|
|
|
// buffer-file-coding-system: utf-8-unix
|
|
|
|
// compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'";
|
|
|
|
// End:
|
|
|
|
//
|