diff --git a/src/comp/fe/lexer.rs b/src/comp/fe/lexer.rs index 0deb5401259..95aac8c519e 100644 --- a/src/comp/fe/lexer.rs +++ b/src/comp/fe/lexer.rs @@ -238,16 +238,48 @@ fn consume_any_whitespace(reader rdr) { fn consume_any_line_comment(reader rdr) { if (rdr.curr() == '/') { - if (rdr.next() == '/') { - while (rdr.curr() != '\n') { - rdr.bump(); + alt (rdr.next()) { + case ('/') { + while (rdr.curr() != '\n') { + rdr.bump(); + } + // Restart whitespace munch. + be consume_any_whitespace(rdr); + } + case ('*') { + rdr.bump(); + rdr.bump(); + be consume_block_comment(rdr); + } + case (_) { + ret; } - // Restart whitespace munch. - be consume_any_whitespace(rdr); } } } + +fn consume_block_comment(reader rdr) { + let int level = 1; + while (level > 0) { + if (rdr.curr() == '/' && rdr.next() == '*') { + rdr.bump(); + rdr.bump(); + level += 1; + } else { + if (rdr.curr() == '*' && rdr.next() == '/') { + rdr.bump(); + rdr.bump(); + level -= 1; + } else { + rdr.bump(); + } + } + } + // restart whitespace munch. + be consume_any_whitespace(rdr); +} + fn next_token(reader rdr) -> token.token { auto accum_str = ""; auto accum_int = 0; @@ -310,18 +342,19 @@ fn next_token(reader rdr) -> token.token { } - fn op_or_opeq(reader rdr, token.op op) -> token.token { + fn binop(reader rdr, token.binop op) -> token.token { rdr.bump(); if (rdr.next() == '=') { rdr.bump(); - ret token.OPEQ(op); + ret token.BINOPEQ(op); } else { - ret token.OP(op); + ret token.BINOP(op); } } alt (c) { // One-byte tokens. + case (':') { rdr.bump(); ret token.COLON(); } case (';') { rdr.bump(); ret token.SEMI(); } case (',') { rdr.bump(); ret token.COMMA(); } case ('.') { rdr.bump(); ret token.DOT(); } @@ -334,16 +367,74 @@ fn next_token(reader rdr) -> token.token { case ('@') { rdr.bump(); ret token.AT(); } case ('#') { rdr.bump(); ret token.POUND(); } case ('_') { rdr.bump(); ret token.UNDERSCORE(); } + case ('~') { rdr.bump(); ret token.TILDE(); } + // Multi-byte tokens. case ('=') { - if (rdr.next() == '=') { + rdr.bump(); + if (rdr.curr() == '=') { rdr.bump(); - rdr.bump(); - ret token.OP(token.EQEQ()); + ret token.EQEQ(); } else { + ret token.EQ(); + } + } + + case ('!') { + rdr.bump(); + if (rdr.curr() == '=') { rdr.bump(); - ret token.OP(token.EQ()); + ret token.NE(); + } else { + ret token.NOT(); + } + } + + case ('<') { + rdr.bump(); + alt (rdr.curr()) { + case ('=') { + rdr.bump(); + ret token.LE(); + } + case ('<') { + ret binop(rdr, token.LSL()); + } + case ('-') { + rdr.bump(); + ret token.LARROW(); + } + case ('|') { + rdr.bump(); + ret token.SEND(); + } + case (_) { + ret token.LT(); + } + } + } + + case ('>') { + rdr.bump(); + alt (rdr.curr()) { + case ('=') { + rdr.bump(); + ret token.GE(); + } + + case ('>') { + if (rdr.next() == '>') { + rdr.bump(); + ret binop(rdr, token.ASR()); + } else { + ret binop(rdr, token.LSR()); + } + } + + case (_) { + ret token.GT(); + } } } @@ -426,7 +517,7 @@ fn next_token(reader rdr) -> token.token { rdr.bump(); ret token.RARROW(); } else { - ret op_or_opeq(rdr, token.MINUS()); + ret binop(rdr, token.MINUS()); } } @@ -434,34 +525,40 @@ fn next_token(reader rdr) -> token.token { if (rdr.next() == '&') { rdr.bump(); rdr.bump(); - ret token.OP(token.ANDAND()); + ret token.ANDAND(); } else { - ret op_or_opeq(rdr, token.AND()); + ret binop(rdr, token.AND()); + } + } + + case ('|') { + if (rdr.next() == '|') { + rdr.bump(); + rdr.bump(); + ret token.OROR(); + } else { + ret binop(rdr, token.OR()); } } case ('+') { - ret op_or_opeq(rdr, token.PLUS()); + ret binop(rdr, token.PLUS()); } case ('*') { - ret op_or_opeq(rdr, token.STAR()); + ret binop(rdr, token.STAR()); } case ('/') { - ret op_or_opeq(rdr, token.STAR()); - } - - case ('!') { - ret op_or_opeq(rdr, token.NOT()); + ret binop(rdr, token.STAR()); } case ('^') { - ret op_or_opeq(rdr, token.CARET()); + ret binop(rdr, token.CARET()); } case ('%') { - ret op_or_opeq(rdr, token.PERCENT()); + ret binop(rdr, token.PERCENT()); } } diff --git a/src/comp/fe/token.rs b/src/comp/fe/token.rs index 2ec492fabec..e9e6f2228d8 100644 --- a/src/comp/fe/token.rs +++ b/src/comp/fe/token.rs @@ -3,12 +3,21 @@ import util.common.ty_mach_to_str; import std._int; import std._uint; -type op = tag +type binop = tag (PLUS(), MINUS(), STAR(), SLASH(), PERCENT(), + CARET(), + AND(), + OR(), + LSL(), + LSR(), + ASR()); + +type token = tag + (/* Expression-operator symbols. */ EQ(), LT(), LE(), @@ -16,20 +25,14 @@ type op = tag NE(), GE(), GT(), + ANDAND(), + OROR(), NOT(), TILDE(), - CARET(), - AND(), - ANDAND(), - OR(), - OROR(), - LSL(), - LSR(), - ASR()); -type token = tag - (OP(op), - OPEQ(op), + BINOP(binop), + BINOPEQ(binop), + AS(), WITH(), @@ -152,40 +155,44 @@ type token = tag BRACEQUOTE(str), EOF()); -fn op_to_str(op o) -> str { +fn binop_to_str(binop o) -> str { alt (o) { - case (PLUS()) { ret "+"; } - case (MINUS()) { ret "-"; } - case (STAR()) { ret "*"; } - case (SLASH()) { ret "/"; } - case (PERCENT()) { ret "%"; } - case (EQ()) { ret "="; } - case (LT()) { ret "<"; } - case (LE()) { ret "<="; } - case (EQEQ()) { ret "=="; } - case (NE()) { ret "!="; } - case (GE()) { ret ">="; } - case (GT()) { ret ">"; } - case (NOT()) { ret "!"; } - case (TILDE()) { ret "~"; } - case (CARET()) { ret "^"; } - case (AND()) { ret "&"; } - case (ANDAND()) { ret "&&"; } - case (OR()) { ret "|"; } - case (OROR()) { ret "||"; } - case (LSL()) { ret "<<"; } - case (LSR()) { ret ">>"; } - case (ASR()) { ret ">>>"; } + case (PLUS()) { ret "+"; } + case (MINUS()) { ret "-"; } + case (STAR()) { ret "*"; } + case (SLASH()) { ret "/"; } + case (PERCENT()) { ret "%"; } + case (CARET()) { ret "^"; } + case (AND()) { ret "&"; } + case (OR()) { ret "|"; } + case (LSL()) { ret "<<"; } + case (LSR()) { ret ">>"; } + case (ASR()) { ret ">>>"; } } } fn to_str(token t) -> str { alt (t) { - case (OP(op)) { ret op_to_str(op); } - case (OPEQ(op)) { ret op_to_str(op) + "="; } + + case (EQ()) { ret "="; } + case (LT()) { ret "<"; } + case (LE()) { ret "<="; } + case (EQEQ()) { ret "=="; } + case (NE()) { ret "!="; } + case (GE()) { ret ">="; } + case (GT()) { ret ">"; } + case (NOT()) { ret "!"; } + case (TILDE()) { ret "~"; } + case (OROR()) { ret "||"; } + case (ANDAND()) { ret "&&"; } + + case (BINOP(op)) { ret binop_to_str(op); } + case (BINOPEQ(op)) { ret binop_to_str(op) + "="; } + case (AS()) { ret "as"; } case (WITH()) { ret "with"; } + /* Structural symbols */ case (AT()) { ret "@"; } case (DOT()) { ret "."; }