rust/src/libsyntax/parse/token.rs

352 lines
8.0 KiB
Rust
Raw Normal View History

import util::interner;
import util::interner::interner;
import std::map::{hashmap, str_hash};
2012-05-21 10:45:56 -07:00
import std::serialization::{serializer,
deserializer,
serialize_uint,
deserialize_uint,
serialize_i64,
deserialize_i64,
serialize_u64,
deserialize_u64,
serialize_bool,
deserialize_bool};
2010-08-18 11:35:12 -07:00
2012-05-21 10:45:56 -07:00
#[auto_serialize]
2011-05-09 14:17:28 -07:00
type str_num = uint;
2012-05-21 10:45:56 -07:00
#[auto_serialize]
2012-01-19 14:24:03 -08:00
enum binop {
2012-01-19 17:56:05 -08:00
PLUS,
MINUS,
STAR,
SLASH,
PERCENT,
CARET,
AND,
OR,
SHL,
SHR,
}
2012-05-21 10:45:56 -07:00
#[auto_serialize]
2012-01-19 14:24:03 -08:00
enum token {
/* Expression-operator symbols. */
2012-01-19 17:56:05 -08:00
EQ,
LT,
LE,
EQEQ,
NE,
GE,
GT,
ANDAND,
OROR,
NOT,
TILDE,
BINOP(binop),
BINOPEQ(binop),
/* Structural symbols */
2012-01-19 17:56:05 -08:00
AT,
DOT,
ELLIPSIS,
COMMA,
SEMI,
COLON,
MOD_SEP,
RARROW,
LARROW,
DARROW,
FAT_ARROW,
2012-01-19 17:56:05 -08:00
LPAREN,
RPAREN,
LBRACKET,
RBRACKET,
LBRACE,
RBRACE,
POUND,
DOLLAR,
/* Literals */
2012-01-19 17:56:05 -08:00
LIT_INT(i64, ast::int_ty),
LIT_UINT(u64, ast::uint_ty),
LIT_INT_UNSUFFIXED(i64),
2012-01-19 17:56:05 -08:00
LIT_FLOAT(str_num, ast::float_ty),
LIT_STR(str_num),
/* Name components */
2012-01-19 17:56:05 -08:00
IDENT(str_num, bool),
UNDERSCORE,
/* For interpolation */
INTERPOLATED(nonterminal),
DOC_COMMENT(str_num),
2012-01-19 17:56:05 -08:00
EOF,
}
2010-08-18 11:35:12 -07:00
#[auto_serialize]
/// For interpolation during macro expansion.
enum nonterminal {
nt_item(@ast::item),
nt_block(ast::blk),
nt_stmt(@ast::stmt),
nt_pat( @ast::pat),
nt_expr(@ast::expr),
nt_ty( @ast::ty),
nt_ident(str_num, bool),
nt_path(@ast::path),
nt_tt( @ast::token_tree), //needs @ed to break a circularity
nt_matchers(~[ast::matcher])
}
fn binop_to_str(o: binop) -> ~str {
2011-07-27 14:19:39 +02:00
alt o {
PLUS { ~"+" }
MINUS { ~"-" }
STAR { ~"*" }
SLASH { ~"/" }
PERCENT { ~"%" }
CARET { ~"^" }
AND { ~"&" }
OR { ~"|" }
SHL { ~"<<" }
SHR { ~">>" }
2010-08-20 15:57:59 -07:00
}
}
fn to_str(in: interner<@~str>, t: token) -> ~str {
2011-07-27 14:19:39 +02:00
alt t {
EQ { ~"=" }
LT { ~"<" }
LE { ~"<=" }
EQEQ { ~"==" }
NE { ~"!=" }
GE { ~">=" }
GT { ~">" }
NOT { ~"!" }
TILDE { ~"~" }
OROR { ~"||" }
ANDAND { ~"&&" }
BINOP(op) { binop_to_str(op) }
BINOPEQ(op) { binop_to_str(op) + ~"=" }
2011-09-02 15:34:58 -07:00
2011-07-27 14:19:39 +02:00
/* Structural symbols */
AT { ~"@" }
DOT { ~"." }
ELLIPSIS { ~"..." }
COMMA { ~"," }
SEMI { ~";" }
COLON { ~":" }
MOD_SEP { ~"::" }
RARROW { ~"->" }
LARROW { ~"<-" }
DARROW { ~"<->" }
FAT_ARROW { ~"=>" }
LPAREN { ~"(" }
RPAREN { ~")" }
LBRACKET { ~"[" }
RBRACKET { ~"]" }
LBRACE { ~"{" }
RBRACE { ~"}" }
POUND { ~"#" }
DOLLAR { ~"$" }
2011-07-27 14:19:39 +02:00
/* Literals */
LIT_INT(c, ast::ty_char) {
~"'" + char::escape_default(c as char) + ~"'"
}
LIT_INT(i, t) {
int::to_str(i as int, 10u) + ast_util::int_ty_to_str(t)
2011-07-27 14:19:39 +02:00
}
LIT_UINT(u, t) {
uint::to_str(u as uint, 10u) + ast_util::uint_ty_to_str(t)
2011-07-27 14:19:39 +02:00
}
LIT_INT_UNSUFFIXED(i) {
int::to_str(i as int, 10u)
}
LIT_FLOAT(s, t) {
let mut body = *in.get(s);
if body.ends_with(".") {
body = body + "0"; // `10.f` is not a float literal
}
body + ast_util::float_ty_to_str(t)
}
2012-07-17 11:22:11 -07:00
LIT_STR(s) { ~"\"" + str::escape_default( *in.get(s)) + ~"\"" }
2011-07-27 14:19:39 +02:00
/* Name components */
2012-07-17 11:22:11 -07:00
IDENT(s, _) { *in.get(s) }
UNDERSCORE { ~"_" }
/* Other */
2012-07-17 11:22:11 -07:00
DOC_COMMENT(s) { *in.get(s) }
EOF { ~"<eof>" }
INTERPOLATED(nt) {
~"an interpolated " +
alt nt {
nt_item(*) { ~"item" } nt_block(*) { ~"block" }
nt_stmt(*) { ~"statement" } nt_pat(*) { ~"pattern" }
nt_expr(*) { ~"expression" } nt_ty(*) { ~"type" }
nt_ident(*) { ~"identifier" } nt_path(*) { ~"path" }
nt_tt(*) { ~"tt" } nt_matchers(*) { ~"matcher sequence" }
2012-07-06 14:48:01 -07:00
}
}
}
}
pure fn can_begin_expr(t: token) -> bool {
2011-07-27 14:19:39 +02:00
alt t {
LPAREN { true }
LBRACE { true }
LBRACKET { true }
2011-07-27 14:19:39 +02:00
IDENT(_, _) { true }
UNDERSCORE { true }
TILDE { true }
LIT_INT(_, _) { true }
LIT_UINT(_, _) { true }
LIT_INT_UNSUFFIXED(_) { true }
LIT_FLOAT(_, _) { true }
2011-07-27 14:19:39 +02:00
LIT_STR(_) { true }
POUND { true }
AT { true }
NOT { true }
BINOP(MINUS) { true }
BINOP(STAR) { true }
BINOP(AND) { true }
BINOP(OR) { true } // in lambda syntax
OROR { true } // in lambda syntax
MOD_SEP { true }
INTERPOLATED(nt_expr(*))
| INTERPOLATED(nt_ident(*))
| INTERPOLATED(nt_block(*))
| INTERPOLATED(nt_path(*)) { true }
2011-07-27 14:19:39 +02:00
_ { false }
}
}
/// what's the opposite delimiter?
fn flip_delimiter(&t: token::token) -> token::token {
alt t {
token::LPAREN { token::RPAREN }
token::LBRACE { token::RBRACE }
token::LBRACKET { token::RBRACKET }
token::RPAREN { token::LPAREN }
token::RBRACE { token::LBRACE }
token::RBRACKET { token::LBRACKET }
_ { fail }
}
}
fn is_lit(t: token) -> bool {
alt t {
LIT_INT(_, _) { true }
LIT_UINT(_, _) { true }
LIT_INT_UNSUFFIXED(_) { true }
LIT_FLOAT(_, _) { true }
LIT_STR(_) { true }
_ { false }
}
2012-04-22 14:59:04 -07:00
}
pure fn is_ident(t: token) -> bool {
alt t { IDENT(_, _) { true } _ { false } }
}
pure fn is_plain_ident(t: token) -> bool {
alt t { IDENT(_, false) { true } _ { false } }
}
pure fn is_bar(t: token) -> bool {
alt t { BINOP(OR) | OROR { true } _ { false } }
}
/**
* All the valid words that have meaning in the Rust language.
*
* Rust keywords are either 'contextual' or 'restricted'. Contextual
* keywords may be used as identifiers because their appearance in
* the grammar is unambiguous. Restricted keywords may not appear
* in positions that might otherwise contain _value identifiers_.
*/
fn keyword_table() -> hashmap<~str, ()> {
let keywords = str_hash();
2012-06-30 16:19:07 -07:00
for contextual_keyword_table().each_key |word| {
keywords.insert(word, ());
}
2012-06-30 16:19:07 -07:00
for restricted_keyword_table().each_key |word| {
keywords.insert(word, ());
}
keywords
}
/// Keywords that may be used as identifiers
fn contextual_keyword_table() -> hashmap<~str, ()> {
let words = str_hash();
let keys = ~[
~"as",
~"else",
~"move",
~"of",
~"priv", ~"pub",
~"self", ~"send", ~"static",
~"to",
~"use",
~"with"
];
2012-06-30 16:19:07 -07:00
for keys.each |word| {
words.insert(word, ());
}
words
}
/**
* Keywords that may not appear in any position that might otherwise contain a
* _value identifier_. Restricted keywords may still be used as other types of
* identifiers.
*
* Reasons:
*
* * For some (most?), if used at the start of a line, they will cause the
* line to be interpreted as a specific kind of statement, which would be
* confusing.
*
* * `true` or `false` as identifiers would always be shadowed by
* the boolean constants
*/
fn restricted_keyword_table() -> hashmap<~str, ()> {
let words = str_hash();
let keys = ~[
~"alt", ~"again", ~"assert",
~"break",
~"check", ~"class", ~"const", ~"copy",
~"do", ~"drop",
~"else", ~"enum", ~"export", ~"extern",
~"fail", ~"false", ~"fn", ~"for",
2012-08-02 18:08:31 -07:00
~"if", ~"impl", ~"import",
~"let", ~"log", ~"loop",
~"match", ~"mod", ~"module", ~"move", ~"mut",
~"new",
~"owned",
~"pure",
2012-08-01 17:30:05 -07:00
~"ref", ~"return",
~"struct",
~"true", ~"trait", ~"type",
~"unchecked", ~"unsafe",
~"while"
];
2012-06-30 16:19:07 -07:00
for keys.each |word| {
words.insert(word, ());
}
words
}
2010-08-18 11:35:12 -07:00
// Local Variables:
// fill-column: 78;
// indent-tabs-mode: nil
// c-basic-offset: 4
// buffer-file-coding-system: utf-8-unix
// End: