277 lines
5.9 KiB
Rust
Raw Normal View History

import util::interner;
import util::interner::interner;
import std::map::{hashmap, str_hash};
2010-08-18 11:35:12 -07:00
2011-05-09 14:17:28 -07:00
type str_num = uint;
2012-01-19 14:24:03 -08:00
enum binop {
2012-01-19 17:56:05 -08:00
PLUS,
MINUS,
STAR,
SLASH,
PERCENT,
CARET,
AND,
OR,
LSL,
LSR,
ASR,
}
2012-01-19 14:24:03 -08:00
enum token {
/* Expression-operator symbols. */
2012-01-19 17:56:05 -08:00
EQ,
LT,
LE,
EQEQ,
NE,
GE,
GT,
ANDAND,
OROR,
NOT,
TILDE,
BINOP(binop),
BINOPEQ(binop),
/* Structural symbols */
2012-01-19 17:56:05 -08:00
AT,
DOT,
ELLIPSIS,
COMMA,
SEMI,
COLON,
MOD_SEP,
RARROW,
LARROW,
DARROW,
LPAREN,
RPAREN,
LBRACKET,
RBRACKET,
LBRACE,
RBRACE,
POUND,
DOLLAR_LPAREN,
DOLLAR_NUM(uint),
/* Literals */
2012-01-19 17:56:05 -08:00
LIT_INT(i64, ast::int_ty),
LIT_UINT(u64, ast::uint_ty),
LIT_FLOAT(str_num, ast::float_ty),
LIT_STR(str_num),
/* Name components */
2012-01-19 17:56:05 -08:00
IDENT(str_num, bool),
UNDERSCORE,
EOF,
}
2010-08-18 11:35:12 -07:00
2011-09-02 15:34:58 -07:00
fn binop_to_str(o: binop) -> str {
2011-07-27 14:19:39 +02:00
alt o {
PLUS { ret "+"; }
MINUS { ret "-"; }
STAR { ret "*"; }
SLASH { ret "/"; }
PERCENT { ret "%"; }
CARET { ret "^"; }
AND { ret "&"; }
OR { ret "|"; }
LSL { ret "<<"; }
LSR { ret ">>"; }
ASR { ret ">>>"; }
2010-08-20 15:57:59 -07:00
}
}
fn to_str(in: interner<str>, t: token) -> str {
2011-07-27 14:19:39 +02:00
alt t {
EQ { ret "="; }
LT { ret "<"; }
LE { ret "<="; }
EQEQ { ret "=="; }
NE { ret "!="; }
GE { ret ">="; }
GT { ret ">"; }
NOT { ret "!"; }
TILDE { ret "~"; }
OROR { ret "||"; }
ANDAND { ret "&&"; }
2011-07-27 14:19:39 +02:00
BINOP(op) { ret binop_to_str(op); }
2011-09-02 15:34:58 -07:00
BINOPEQ(op) { ret binop_to_str(op) + "="; }
2011-07-27 14:19:39 +02:00
/* Structural symbols */
AT {
2011-09-02 15:34:58 -07:00
ret "@";
2011-07-27 14:19:39 +02:00
}
DOT { ret "."; }
ELLIPSIS { ret "..."; }
COMMA { ret ","; }
SEMI { ret ";"; }
COLON { ret ":"; }
MOD_SEP { ret "::"; }
RARROW { ret "->"; }
LARROW { ret "<-"; }
DARROW { ret "<->"; }
LPAREN { ret "("; }
RPAREN { ret ")"; }
LBRACKET { ret "["; }
RBRACKET { ret "]"; }
LBRACE { ret "{"; }
RBRACE { ret "}"; }
POUND { ret "#"; }
2011-09-02 15:34:58 -07:00
DOLLAR_LPAREN { ret "$("; }
DOLLAR_NUM(u) {
ret "$" + uint::to_str(u as uint, 10u);
}
2011-07-27 14:19:39 +02:00
/* Literals */
LIT_INT(c, ast::ty_char) {
// FIXME: escape.
let mut tmp = "'";
str::push_char(tmp, c as char);
str::push_char(tmp, '\'');
ret tmp;
}
LIT_INT(i, t) {
ret int::to_str(i as int, 10u) + ast_util::int_ty_to_str(t);
2011-07-27 14:19:39 +02:00
}
LIT_UINT(u, t) {
ret uint::to_str(u as uint, 10u) + ast_util::uint_ty_to_str(t);
2011-07-27 14:19:39 +02:00
}
LIT_FLOAT(s, t) {
ret interner::get::<str>(in, s) +
ast_util::float_ty_to_str(t);
2011-07-27 14:19:39 +02:00
}
LIT_STR(s) { // FIXME: escape.
ret "\"" + interner::get::<str>(in, s) + "\"";
2011-07-27 14:19:39 +02:00
}
2011-09-02 15:34:58 -07:00
2011-07-27 14:19:39 +02:00
/* Name components */
IDENT(s, _) {
ret interner::get::<str>(in, s);
2011-07-27 14:19:39 +02:00
}
UNDERSCORE { ret "_"; }
EOF { ret "<eof>"; }
}
}
pure fn can_begin_expr(t: token) -> bool {
2011-07-27 14:19:39 +02:00
alt t {
LPAREN { true }
LBRACE { true }
LBRACKET { true }
2011-07-27 14:19:39 +02:00
IDENT(_, _) { true }
UNDERSCORE { true }
TILDE { true }
LIT_INT(_, _) { true }
LIT_UINT(_, _) { true }
LIT_FLOAT(_, _) { true }
2011-07-27 14:19:39 +02:00
LIT_STR(_) { true }
POUND { true }
AT { true }
NOT { true }
BINOP(MINUS) { true }
BINOP(STAR) { true }
BINOP(AND) { true }
MOD_SEP { true }
2011-07-27 14:19:39 +02:00
_ { false }
}
}
2012-04-22 14:59:04 -07:00
fn is_lit(t: token::token) -> bool {
ret alt t {
token::LIT_INT(_, _) { true }
token::LIT_UINT(_, _) { true }
token::LIT_FLOAT(_, _) { true }
token::LIT_STR(_) { true }
_ { false }
}
}
fn is_ident(t: token::token) -> bool {
alt t { token::IDENT(_, _) { ret true; } _ { } }
ret false;
}
fn is_plain_ident(t: token::token) -> bool {
ret alt t { token::IDENT(_, false) { true } _ { false } };
}
fn is_bar(t: token::token) -> bool {
alt t { token::BINOP(token::OR) | token::OROR { true } _ { false } }
}
fn is_bad_expr_word(t: token,
bad_expr_words: hashmap<str, ()>,
in: interner<str>) -> bool {
alt t {
token::IDENT(_, false) {
bad_expr_words.contains_key(to_str(in, t))
}
_ { false }
}
}
#[doc = "
All the valid words that have meaning in the Rust language. Some of these are
nonetheless valid as identifiers becasue they are unambiguous.
"]
fn keyword_table() -> hashmap<str, ()> {
let keywords = str_hash();
bad_expr_word_table().keys() {|word|
keywords.insert(word, ());
}
let other_keywords = [
"as",
"bind",
"else",
"false",
"implements",
"move",
"of",
"priv",
"self",
"send",
"static",
"to",
"true",
"use",
"with"
];
for other_keywords.each {|word|
keywords.insert(word, ());
}
ret keywords;
}
#[doc = "
These are the words that shouldn't be allowed as value identifiers,
because, if used at the start of a line, they will cause the line to be
interpreted as a specific kind of statement, which would be confusing.
"]
fn bad_expr_word_table() -> hashmap<str, ()> {
let words = str_hash();
let keys = ["alt", "assert", "be", "break", "check", "claim",
"class", "const", "cont", "copy", "crust", "do", "else",
"enum", "export", "fail", "fn", "for", "if", "iface",
"impl", "import", "let", "log", "loop", "mod",
"mut", "native", "new", "pure", "resource",
"ret", "trait", "type", "unchecked", "unsafe", "while"];
for keys.each {|word|
words.insert(word, ());
}
words
}
2010-08-18 11:35:12 -07:00
// Local Variables:
// fill-column: 78;
// indent-tabs-mode: nil
// c-basic-offset: 4
// buffer-file-coding-system: utf-8-unix
// End: