2011-07-05 11:48:19 +02:00
|
|
|
import util::interner;
|
2012-04-15 03:44:32 -07:00
|
|
|
import util::interner::interner;
|
2012-04-17 21:14:40 -07:00
|
|
|
import std::map::{hashmap, str_hash};
|
2010-08-18 11:35:12 -07:00
|
|
|
|
2011-05-09 14:17:28 -07:00
|
|
|
type str_num = uint;
|
|
|
|
|
2012-01-19 14:24:03 -08:00
|
|
|
enum binop {
|
2012-01-19 17:56:05 -08:00
|
|
|
PLUS,
|
|
|
|
MINUS,
|
|
|
|
STAR,
|
|
|
|
SLASH,
|
|
|
|
PERCENT,
|
|
|
|
CARET,
|
|
|
|
AND,
|
|
|
|
OR,
|
2012-05-22 14:59:15 -07:00
|
|
|
SHL,
|
|
|
|
SHR,
|
2010-09-09 15:59:29 -07:00
|
|
|
}
|
|
|
|
|
2012-01-19 14:24:03 -08:00
|
|
|
enum token {
|
2010-09-09 15:59:29 -07:00
|
|
|
/* Expression-operator symbols. */
|
2012-01-19 17:56:05 -08:00
|
|
|
EQ,
|
|
|
|
LT,
|
|
|
|
LE,
|
|
|
|
EQEQ,
|
|
|
|
NE,
|
|
|
|
GE,
|
|
|
|
GT,
|
|
|
|
ANDAND,
|
|
|
|
OROR,
|
|
|
|
NOT,
|
|
|
|
TILDE,
|
|
|
|
BINOP(binop),
|
|
|
|
BINOPEQ(binop),
|
2010-09-09 15:59:29 -07:00
|
|
|
|
|
|
|
/* Structural symbols */
|
2012-01-19 17:56:05 -08:00
|
|
|
AT,
|
|
|
|
DOT,
|
|
|
|
ELLIPSIS,
|
|
|
|
COMMA,
|
|
|
|
SEMI,
|
|
|
|
COLON,
|
|
|
|
MOD_SEP,
|
|
|
|
RARROW,
|
|
|
|
LARROW,
|
|
|
|
DARROW,
|
2012-06-04 18:34:10 -07:00
|
|
|
FAT_ARROW,
|
2012-01-19 17:56:05 -08:00
|
|
|
LPAREN,
|
|
|
|
RPAREN,
|
|
|
|
LBRACKET,
|
|
|
|
RBRACKET,
|
|
|
|
LBRACE,
|
|
|
|
RBRACE,
|
|
|
|
POUND,
|
2012-04-22 16:58:04 -07:00
|
|
|
DOLLAR,
|
2012-01-25 16:38:09 -07:00
|
|
|
|
2010-09-09 15:59:29 -07:00
|
|
|
/* Literals */
|
2012-01-19 17:56:05 -08:00
|
|
|
LIT_INT(i64, ast::int_ty),
|
|
|
|
LIT_UINT(u64, ast::uint_ty),
|
2012-06-11 16:31:03 -07:00
|
|
|
LIT_INT_UNSUFFIXED(i64, ast::int_ty),
|
2012-01-19 17:56:05 -08:00
|
|
|
LIT_FLOAT(str_num, ast::float_ty),
|
|
|
|
LIT_STR(str_num),
|
2010-09-09 15:59:29 -07:00
|
|
|
|
|
|
|
/* Name components */
|
2012-01-19 17:56:05 -08:00
|
|
|
IDENT(str_num, bool),
|
|
|
|
UNDERSCORE,
|
|
|
|
EOF,
|
2010-09-09 15:59:29 -07:00
|
|
|
}
|
2010-08-18 11:35:12 -07:00
|
|
|
|
2011-09-02 15:34:58 -07:00
|
|
|
fn binop_to_str(o: binop) -> str {
|
2011-07-27 14:19:39 +02:00
|
|
|
alt o {
|
2012-06-11 16:49:35 -07:00
|
|
|
PLUS { "+" }
|
|
|
|
MINUS { "-" }
|
|
|
|
STAR { "*" }
|
|
|
|
SLASH { "/" }
|
|
|
|
PERCENT { "%" }
|
|
|
|
CARET { "^" }
|
|
|
|
AND { "&" }
|
|
|
|
OR { "|" }
|
|
|
|
SHL { "<<" }
|
|
|
|
SHR { ">>" }
|
2010-08-20 15:57:59 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-06-09 00:53:34 -07:00
|
|
|
fn to_str(in: interner<@str>, t: token) -> str {
|
2011-07-27 14:19:39 +02:00
|
|
|
alt t {
|
2012-06-11 16:49:35 -07:00
|
|
|
EQ { "=" }
|
|
|
|
LT { "<" }
|
|
|
|
LE { "<=" }
|
|
|
|
EQEQ { "==" }
|
|
|
|
NE { "!=" }
|
|
|
|
GE { ">=" }
|
|
|
|
GT { ">" }
|
|
|
|
NOT { "!" }
|
|
|
|
TILDE { "~" }
|
|
|
|
OROR { "||" }
|
|
|
|
ANDAND { "&&" }
|
|
|
|
BINOP(op) { binop_to_str(op) }
|
|
|
|
BINOPEQ(op) { binop_to_str(op) + "=" }
|
2011-09-02 15:34:58 -07:00
|
|
|
|
2011-07-27 14:19:39 +02:00
|
|
|
/* Structural symbols */
|
2012-06-11 16:49:35 -07:00
|
|
|
AT { "@" }
|
|
|
|
DOT { "." }
|
|
|
|
ELLIPSIS { "..." }
|
|
|
|
COMMA { "," }
|
|
|
|
SEMI { "" }
|
|
|
|
COLON { ":" }
|
|
|
|
MOD_SEP { "::" }
|
|
|
|
RARROW { "->" }
|
|
|
|
LARROW { "<-" }
|
|
|
|
DARROW { "<->" }
|
|
|
|
FAT_ARROW { "=>" }
|
|
|
|
LPAREN { "(" }
|
|
|
|
RPAREN { ")" }
|
|
|
|
LBRACKET { "[" }
|
|
|
|
RBRACKET { "]" }
|
|
|
|
LBRACE { "{" }
|
|
|
|
RBRACE { "}" }
|
|
|
|
POUND { "#" }
|
|
|
|
DOLLAR { "$" }
|
2012-01-25 16:38:09 -07:00
|
|
|
|
2011-07-27 14:19:39 +02:00
|
|
|
/* Literals */
|
2012-01-18 22:37:22 -08:00
|
|
|
LIT_INT(c, ast::ty_char) {
|
2012-06-11 16:49:35 -07:00
|
|
|
"'" + char::escape_default(c as char) + "'"
|
2011-12-07 21:06:12 +01:00
|
|
|
}
|
|
|
|
LIT_INT(i, t) {
|
2012-06-11 16:49:35 -07:00
|
|
|
int::to_str(i as int, 10u) + ast_util::int_ty_to_str(t)
|
2011-07-27 14:19:39 +02:00
|
|
|
}
|
2011-12-07 21:06:12 +01:00
|
|
|
LIT_UINT(u, t) {
|
2012-06-11 16:49:35 -07:00
|
|
|
uint::to_str(u as uint, 10u) + ast_util::uint_ty_to_str(t)
|
2011-07-27 14:19:39 +02:00
|
|
|
}
|
2012-06-11 16:31:03 -07:00
|
|
|
LIT_INT_UNSUFFIXED(i, t) {
|
2012-06-11 16:49:35 -07:00
|
|
|
int::to_str(i as int, 10u) + ast_util::int_ty_to_str(t)
|
2012-06-11 16:31:03 -07:00
|
|
|
}
|
2011-12-07 21:06:12 +01:00
|
|
|
LIT_FLOAT(s, t) {
|
2012-06-11 16:49:35 -07:00
|
|
|
*interner::get(in, s) +
|
|
|
|
ast_util::float_ty_to_str(t)
|
2011-07-27 14:19:39 +02:00
|
|
|
}
|
2012-05-31 15:31:13 -07:00
|
|
|
LIT_STR(s) {
|
2012-06-11 16:49:35 -07:00
|
|
|
"\""
|
2012-06-09 00:53:34 -07:00
|
|
|
+ str::escape_default(*interner::get(in, s))
|
2012-06-11 16:49:35 -07:00
|
|
|
+ "\""
|
2011-07-27 14:19:39 +02:00
|
|
|
}
|
|
|
|
/* Name components */
|
|
|
|
IDENT(s, _) {
|
2012-06-11 16:49:35 -07:00
|
|
|
*interner::get(in, s)
|
2011-07-27 14:19:39 +02:00
|
|
|
}
|
2012-06-11 16:49:35 -07:00
|
|
|
UNDERSCORE { "_" }
|
|
|
|
EOF { "<eof>" }
|
2010-08-20 11:41:34 -07:00
|
|
|
}
|
|
|
|
}
|
2011-07-03 11:48:14 -07:00
|
|
|
|
2011-08-24 13:41:50 -07:00
|
|
|
pure fn can_begin_expr(t: token) -> bool {
|
2011-07-27 14:19:39 +02:00
|
|
|
alt t {
|
2012-01-18 22:37:22 -08:00
|
|
|
LPAREN { true }
|
|
|
|
LBRACE { true }
|
|
|
|
LBRACKET { true }
|
2011-07-27 14:19:39 +02:00
|
|
|
IDENT(_, _) { true }
|
2012-01-18 22:37:22 -08:00
|
|
|
UNDERSCORE { true }
|
|
|
|
TILDE { true }
|
2011-12-07 21:06:12 +01:00
|
|
|
LIT_INT(_, _) { true }
|
|
|
|
LIT_UINT(_, _) { true }
|
2012-06-11 16:31:03 -07:00
|
|
|
LIT_INT_UNSUFFIXED(_, _) { true }
|
2011-12-07 21:06:12 +01:00
|
|
|
LIT_FLOAT(_, _) { true }
|
2011-07-27 14:19:39 +02:00
|
|
|
LIT_STR(_) { true }
|
2012-01-18 22:37:22 -08:00
|
|
|
POUND { true }
|
|
|
|
AT { true }
|
|
|
|
NOT { true }
|
|
|
|
BINOP(MINUS) { true }
|
|
|
|
BINOP(STAR) { true }
|
2012-03-23 16:15:58 -07:00
|
|
|
BINOP(AND) { true }
|
2012-01-18 22:37:22 -08:00
|
|
|
MOD_SEP { true }
|
2011-07-27 14:19:39 +02:00
|
|
|
_ { false }
|
2011-07-03 11:48:14 -07:00
|
|
|
}
|
|
|
|
}
|
2011-10-07 16:22:53 +02:00
|
|
|
|
2012-06-11 16:49:35 -07:00
|
|
|
fn is_lit(t: token) -> bool {
|
|
|
|
alt t {
|
|
|
|
LIT_INT(_, _) { true }
|
|
|
|
LIT_UINT(_, _) { true }
|
|
|
|
LIT_INT_UNSUFFIXED(_, _) { true }
|
|
|
|
LIT_FLOAT(_, _) { true }
|
|
|
|
LIT_STR(_) { true }
|
|
|
|
_ { false }
|
|
|
|
}
|
2012-04-22 14:59:04 -07:00
|
|
|
}
|
|
|
|
|
2012-06-11 16:49:35 -07:00
|
|
|
pure fn is_ident(t: token) -> bool {
|
|
|
|
alt t { IDENT(_, _) { true } _ { false } }
|
2012-04-17 21:14:40 -07:00
|
|
|
}
|
|
|
|
|
2012-06-11 16:49:35 -07:00
|
|
|
pure fn is_plain_ident(t: token) -> bool {
|
|
|
|
alt t { IDENT(_, false) { true } _ { false } }
|
2012-04-17 21:14:40 -07:00
|
|
|
}
|
|
|
|
|
2012-06-11 16:49:35 -07:00
|
|
|
pure fn is_bar(t: token) -> bool {
|
|
|
|
alt t { BINOP(OR) | OROR { true } _ { false } }
|
2012-04-17 21:14:40 -07:00
|
|
|
}
|
|
|
|
|
2012-04-19 16:44:24 -07:00
|
|
|
#[doc = "
|
2012-04-24 22:33:49 -07:00
|
|
|
All the valid words that have meaning in the Rust language.
|
|
|
|
|
|
|
|
Rust keywords are either 'contextual' or 'restricted'. Contextual
|
|
|
|
keywords may be used as identifiers because their appearance in
|
|
|
|
the grammar is unambiguous. Restricted keywords may not appear
|
|
|
|
in positions that might otherwise contain _value identifiers_.
|
2012-04-19 16:44:24 -07:00
|
|
|
"]
|
|
|
|
fn keyword_table() -> hashmap<str, ()> {
|
|
|
|
let keywords = str_hash();
|
2012-04-24 22:33:49 -07:00
|
|
|
for contextual_keyword_table().each_key {|word|
|
|
|
|
keywords.insert(word, ());
|
|
|
|
}
|
|
|
|
for restricted_keyword_table().each_key {|word|
|
2012-04-19 16:44:24 -07:00
|
|
|
keywords.insert(word, ());
|
|
|
|
}
|
2012-06-11 16:49:35 -07:00
|
|
|
keywords
|
2012-04-24 22:33:49 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
#[doc = "Keywords that may be used as identifiers"]
|
|
|
|
fn contextual_keyword_table() -> hashmap<str, ()> {
|
|
|
|
let words = str_hash();
|
|
|
|
let keys = [
|
2012-04-19 16:44:24 -07:00
|
|
|
"as",
|
|
|
|
"bind",
|
|
|
|
"else",
|
|
|
|
"implements",
|
|
|
|
"move",
|
|
|
|
"of",
|
2012-05-08 16:06:24 +02:00
|
|
|
"priv", "pub",
|
2012-04-24 22:33:49 -07:00
|
|
|
"self", "send", "static",
|
2012-04-19 16:44:24 -07:00
|
|
|
"to",
|
|
|
|
"use",
|
|
|
|
"with"
|
|
|
|
];
|
2012-04-24 22:33:49 -07:00
|
|
|
for keys.each {|word|
|
|
|
|
words.insert(word, ());
|
2012-04-19 16:44:24 -07:00
|
|
|
}
|
2012-04-24 22:33:49 -07:00
|
|
|
words
|
2012-04-19 16:44:24 -07:00
|
|
|
}
|
|
|
|
|
2012-04-17 21:14:40 -07:00
|
|
|
#[doc = "
|
2012-04-24 22:33:49 -07:00
|
|
|
Keywords that may not appear in any position that might otherwise contain a
|
|
|
|
_value identifier_. Restricted keywords may still be used as other types of
|
|
|
|
identifiers.
|
|
|
|
|
|
|
|
Reasons:
|
|
|
|
|
|
|
|
* For some (most?), if used at the start of a line, they will cause the line
|
|
|
|
to be interpreted as a specific kind of statement, which would be confusing.
|
|
|
|
|
|
|
|
* `true` or `false` as identifiers would always be shadowed by
|
|
|
|
the boolean constants
|
2012-04-17 21:14:40 -07:00
|
|
|
"]
|
2012-04-24 22:33:49 -07:00
|
|
|
fn restricted_keyword_table() -> hashmap<str, ()> {
|
2012-04-17 21:14:40 -07:00
|
|
|
let words = str_hash();
|
2012-04-24 22:33:49 -07:00
|
|
|
let keys = [
|
|
|
|
"alt",
|
|
|
|
"assert",
|
|
|
|
"be", "break",
|
|
|
|
"check", "claim", "class", "const", "cont", "copy", "crust",
|
2012-05-22 11:19:03 -07:00
|
|
|
"drop",
|
2012-04-24 22:33:49 -07:00
|
|
|
"else", "enum", "export",
|
|
|
|
"fail", "false", "fn", "for",
|
|
|
|
"if", "iface", "impl", "import",
|
|
|
|
"let", "log", "loop",
|
|
|
|
"mod", "mut",
|
|
|
|
"native", "new",
|
|
|
|
"pure",
|
|
|
|
"resource", "ret",
|
|
|
|
"true", "trait", "type",
|
|
|
|
"unchecked", "unsafe",
|
|
|
|
"while"
|
|
|
|
];
|
2012-04-17 21:14:40 -07:00
|
|
|
for keys.each {|word|
|
|
|
|
words.insert(word, ());
|
|
|
|
}
|
|
|
|
words
|
|
|
|
}
|
|
|
|
|
2010-08-18 11:35:12 -07:00
|
|
|
// Local Variables:
|
|
|
|
// fill-column: 78;
|
|
|
|
// indent-tabs-mode: nil
|
|
|
|
// c-basic-offset: 4
|
|
|
|
// buffer-file-coding-system: utf-8-unix
|
|
|
|
// End:
|