rust/src/librustsyntax/parse/token.rs

277 lines
5.9 KiB
Rust

import util::interner;
import util::interner::interner;
import std::map::{hashmap, str_hash};
type str_num = uint;
enum binop {
PLUS,
MINUS,
STAR,
SLASH,
PERCENT,
CARET,
AND,
OR,
LSL,
LSR,
ASR,
}
enum token {
/* Expression-operator symbols. */
EQ,
LT,
LE,
EQEQ,
NE,
GE,
GT,
ANDAND,
OROR,
NOT,
TILDE,
BINOP(binop),
BINOPEQ(binop),
/* Structural symbols */
AT,
DOT,
ELLIPSIS,
COMMA,
SEMI,
COLON,
MOD_SEP,
RARROW,
LARROW,
DARROW,
LPAREN,
RPAREN,
LBRACKET,
RBRACKET,
LBRACE,
RBRACE,
POUND,
POUND_LBRACE,
POUND_LT,
DOLLAR_LPAREN,
DOLLAR_NUM(uint),
/* Literals */
LIT_INT(i64, ast::int_ty),
LIT_UINT(u64, ast::uint_ty),
LIT_FLOAT(str_num, ast::float_ty),
LIT_STR(str_num),
LIT_BOOL(bool),
/* Name components */
IDENT(str_num, bool),
IDX(int),
UNDERSCORE,
BRACEQUOTE(str_num),
EOF,
}
fn binop_to_str(o: binop) -> str {
alt o {
PLUS { ret "+"; }
MINUS { ret "-"; }
STAR { ret "*"; }
SLASH { ret "/"; }
PERCENT { ret "%"; }
CARET { ret "^"; }
AND { ret "&"; }
OR { ret "|"; }
LSL { ret "<<"; }
LSR { ret ">>"; }
ASR { ret ">>>"; }
}
}
fn to_str(in: interner<str>, t: token) -> str {
alt t {
EQ { ret "="; }
LT { ret "<"; }
LE { ret "<="; }
EQEQ { ret "=="; }
NE { ret "!="; }
GE { ret ">="; }
GT { ret ">"; }
NOT { ret "!"; }
TILDE { ret "~"; }
OROR { ret "||"; }
ANDAND { ret "&&"; }
BINOP(op) { ret binop_to_str(op); }
BINOPEQ(op) { ret binop_to_str(op) + "="; }
/* Structural symbols */
AT {
ret "@";
}
DOT { ret "."; }
ELLIPSIS { ret "..."; }
COMMA { ret ","; }
SEMI { ret ";"; }
COLON { ret ":"; }
MOD_SEP { ret "::"; }
RARROW { ret "->"; }
LARROW { ret "<-"; }
DARROW { ret "<->"; }
LPAREN { ret "("; }
RPAREN { ret ")"; }
LBRACKET { ret "["; }
RBRACKET { ret "]"; }
LBRACE { ret "{"; }
RBRACE { ret "}"; }
POUND { ret "#"; }
POUND_LBRACE { ret "#{"; }
POUND_LT { ret "#<"; }
DOLLAR_LPAREN { ret "$("; }
DOLLAR_NUM(u) {
ret "$" + uint::to_str(u as uint, 10u);
}
/* Literals */
LIT_INT(c, ast::ty_char) {
// FIXME: escape.
let mut tmp = "'";
str::push_char(tmp, c as char);
str::push_char(tmp, '\'');
ret tmp;
}
LIT_INT(i, t) {
ret int::to_str(i as int, 10u) + ast_util::int_ty_to_str(t);
}
LIT_UINT(u, t) {
ret uint::to_str(u as uint, 10u) + ast_util::uint_ty_to_str(t);
}
LIT_FLOAT(s, t) {
ret interner::get::<str>(in, s) +
ast_util::float_ty_to_str(t);
}
LIT_STR(s) { // FIXME: escape.
ret "\"" + interner::get::<str>(in, s) + "\"";
}
LIT_BOOL(b) { if b { ret "true"; } else { ret "false"; } }
/* Name components */
IDENT(s, _) {
ret interner::get::<str>(in, s);
}
IDX(i) { ret "_" + int::to_str(i, 10u); }
UNDERSCORE { ret "_"; }
BRACEQUOTE(_) { ret "<bracequote>"; }
EOF { ret "<eof>"; }
}
}
pure fn can_begin_expr(t: token) -> bool {
alt t {
LPAREN { true }
LBRACE { true }
LBRACKET { true }
IDENT(_, _) { true }
UNDERSCORE { true }
TILDE { true }
LIT_INT(_, _) { true }
LIT_UINT(_, _) { true }
LIT_FLOAT(_, _) { true }
LIT_STR(_) { true }
POUND { true }
AT { true }
NOT { true }
BINOP(MINUS) { true }
BINOP(STAR) { true }
BINOP(AND) { true }
MOD_SEP { true }
_ { false }
}
}
fn is_ident(t: token::token) -> bool {
alt t { token::IDENT(_, _) { ret true; } _ { } }
ret false;
}
fn is_plain_ident(t: token::token) -> bool {
ret alt t { token::IDENT(_, false) { true } _ { false } };
}
fn is_bar(t: token::token) -> bool {
alt t { token::BINOP(token::OR) | token::OROR { true } _ { false } }
}
fn is_bad_expr_word(t: token,
bad_expr_words: hashmap<str, ()>,
in: interner<str>) -> bool {
alt t {
token::IDENT(_, false) {
bad_expr_words.contains_key(to_str(in, t))
}
_ { false }
}
}
#[doc = "
All the valid words that have meaning in the Rust language. Some of these are
nonetheless valid as identifiers becasue they are unambiguous.
"]
fn keyword_table() -> hashmap<str, ()> {
let keywords = str_hash();
bad_expr_word_table().keys() {|word|
keywords.insert(word, ());
}
let other_keywords = [
"as",
"bind",
"else",
"false",
"implements",
"move",
"of",
"priv",
"self",
"send",
"static",
"to",
"true",
"use",
"with"
];
for other_keywords.each {|word|
keywords.insert(word, ());
}
ret keywords;
}
#[doc = "
These are the words that shouldn't be allowed as value identifiers,
because, if used at the start of a line, they will cause the line to be
interpreted as a specific kind of statement, which would be confusing.
"]
fn bad_expr_word_table() -> hashmap<str, ()> {
let words = str_hash();
let keys = ["alt", "assert", "be", "break", "check", "claim",
"class", "const", "cont", "copy", "crust", "do", "else",
"enum", "export", "fail", "fn", "for", "if", "iface",
"impl", "import", "let", "log", "loop", "mod",
"mut", "mutable", "native", "new", "pure", "resource",
"ret", "trait", "type", "unchecked", "unsafe", "while"];
for keys.each {|word|
words.insert(word, ());
}
words
}
// Local Variables:
// fill-column: 78;
// indent-tabs-mode: nil
// c-basic-offset: 4
// buffer-file-coding-system: utf-8-unix
// End: