rust/src/libsyntax/parse/token.rs

449 lines
11 KiB
Rust
Raw Normal View History

import util::interner;
import util::interner::interner;
import std::map::{hashmap, str_hash};
2012-05-21 12:45:56 -05:00
import std::serialization::{serializer,
deserializer,
serialize_uint,
deserialize_uint,
serialize_i64,
deserialize_i64,
serialize_u64,
deserialize_u64,
serialize_bool,
deserialize_bool};
2010-08-18 13:35:12 -05:00
2012-05-21 12:45:56 -05:00
#[auto_serialize]
2011-05-09 16:17:28 -05:00
type str_num = uint;
2012-05-21 12:45:56 -05:00
#[auto_serialize]
2012-01-19 16:24:03 -06:00
enum binop {
2012-01-19 19:56:05 -06:00
PLUS,
MINUS,
STAR,
SLASH,
PERCENT,
CARET,
AND,
OR,
SHL,
SHR,
}
2012-05-21 12:45:56 -05:00
#[auto_serialize]
2012-01-19 16:24:03 -06:00
enum token {
/* Expression-operator symbols. */
2012-01-19 19:56:05 -06:00
EQ,
LT,
LE,
EQEQ,
NE,
GE,
GT,
ANDAND,
OROR,
NOT,
TILDE,
BINOP(binop),
BINOPEQ(binop),
/* Structural symbols */
2012-01-19 19:56:05 -06:00
AT,
DOT,
DOTDOT,
2012-01-19 19:56:05 -06:00
ELLIPSIS,
COMMA,
SEMI,
COLON,
MOD_SEP,
RARROW,
LARROW,
DARROW,
FAT_ARROW,
2012-01-19 19:56:05 -06:00
LPAREN,
RPAREN,
LBRACKET,
RBRACKET,
LBRACE,
RBRACE,
POUND,
DOLLAR,
/* Literals */
2012-01-19 19:56:05 -06:00
LIT_INT(i64, ast::int_ty),
LIT_UINT(u64, ast::uint_ty),
LIT_INT_UNSUFFIXED(i64),
2012-01-19 19:56:05 -06:00
LIT_FLOAT(str_num, ast::float_ty),
LIT_STR(str_num),
/* Name components */
2012-01-19 19:56:05 -06:00
IDENT(str_num, bool),
UNDERSCORE,
/* For interpolation */
INTERPOLATED(nonterminal),
DOC_COMMENT(str_num),
2012-01-19 19:56:05 -06:00
EOF,
}
2010-08-18 13:35:12 -05:00
#[auto_serialize]
/// For interpolation during macro expansion.
enum nonterminal {
nt_item(@ast::item),
nt_block(ast::blk),
nt_stmt(@ast::stmt),
nt_pat( @ast::pat),
nt_expr(@ast::expr),
nt_ty( @ast::ty),
nt_ident(str_num, bool),
nt_path(@ast::path),
nt_tt( @ast::token_tree), //needs @ed to break a circularity
nt_matchers(~[ast::matcher])
}
fn binop_to_str(o: binop) -> ~str {
2012-08-06 14:34:08 -05:00
match o {
2012-08-03 21:59:04 -05:00
PLUS => ~"+",
MINUS => ~"-",
STAR => ~"*",
SLASH => ~"/",
PERCENT => ~"%",
CARET => ~"^",
AND => ~"&",
OR => ~"|",
SHL => ~"<<",
SHR => ~">>"
2010-08-20 17:57:59 -05:00
}
}
fn to_str(in: interner<@~str>, t: token) -> ~str {
2012-08-06 14:34:08 -05:00
match t {
2012-08-03 21:59:04 -05:00
EQ => ~"=",
LT => ~"<",
LE => ~"<=",
EQEQ => ~"==",
NE => ~"!=",
GE => ~">=",
GT => ~">",
NOT => ~"!",
TILDE => ~"~",
OROR => ~"||",
ANDAND => ~"&&",
BINOP(op) => binop_to_str(op),
BINOPEQ(op) => binop_to_str(op) + ~"=",
2011-09-02 17:34:58 -05:00
2011-07-27 07:19:39 -05:00
/* Structural symbols */
2012-08-03 21:59:04 -05:00
AT => ~"@",
DOT => ~".",
DOTDOT => ~"..",
ELLIPSIS => ~"...",
COMMA => ~",",
SEMI => ~";",
COLON => ~":",
MOD_SEP => ~"::",
RARROW => ~"->",
LARROW => ~"<-",
DARROW => ~"<->",
FAT_ARROW => ~"=>",
LPAREN => ~"(",
RPAREN => ~")",
LBRACKET => ~"[",
RBRACKET => ~"]",
LBRACE => ~"{",
RBRACE => ~"}",
POUND => ~"#",
DOLLAR => ~"$",
2011-07-27 07:19:39 -05:00
/* Literals */
2012-08-03 21:59:04 -05:00
LIT_INT(c, ast::ty_char) => {
~"'" + char::escape_default(c as char) + ~"'"
}
2012-08-03 21:59:04 -05:00
LIT_INT(i, t) => {
int::to_str(i as int, 10u) + ast_util::int_ty_to_str(t)
2011-07-27 07:19:39 -05:00
}
2012-08-03 21:59:04 -05:00
LIT_UINT(u, t) => {
uint::to_str(u as uint, 10u) + ast_util::uint_ty_to_str(t)
2011-07-27 07:19:39 -05:00
}
2012-08-03 21:59:04 -05:00
LIT_INT_UNSUFFIXED(i) => {
int::to_str(i as int, 10u)
}
2012-08-03 21:59:04 -05:00
LIT_FLOAT(s, t) => {
let mut body = *in.get(s);
if body.ends_with(~".") {
body = body + ~"0"; // `10.f` is not a float literal
}
body + ast_util::float_ty_to_str(t)
}
2012-08-03 21:59:04 -05:00
LIT_STR(s) => { ~"\"" + str::escape_default( *in.get(s)) + ~"\"" }
2011-07-27 07:19:39 -05:00
/* Name components */
2012-08-03 21:59:04 -05:00
IDENT(s, _) => *in.get(s),
2012-07-17 13:22:11 -05:00
2012-08-03 21:59:04 -05:00
UNDERSCORE => ~"_",
/* Other */
2012-08-03 21:59:04 -05:00
DOC_COMMENT(s) => *in.get(s),
EOF => ~"<eof>",
INTERPOLATED(nt) => {
~"an interpolated " +
2012-08-06 14:34:08 -05:00
match nt {
2012-08-03 21:59:04 -05:00
nt_item(*) => ~"item",
nt_block(*) => ~"block",
nt_stmt(*) => ~"statement",
nt_pat(*) => ~"pattern",
nt_expr(*) => ~"expression",
nt_ty(*) => ~"type",
nt_ident(*) => ~"identifier",
nt_path(*) => ~"path",
nt_tt(*) => ~"tt",
nt_matchers(*) => ~"matcher sequence"
2012-07-06 16:48:01 -05:00
}
}
}
}
pure fn can_begin_expr(t: token) -> bool {
2012-08-06 14:34:08 -05:00
match t {
2012-08-03 21:59:04 -05:00
LPAREN => true,
LBRACE => true,
LBRACKET => true,
IDENT(_, _) => true,
UNDERSCORE => true,
TILDE => true,
LIT_INT(_, _) => true,
LIT_UINT(_, _) => true,
LIT_INT_UNSUFFIXED(_) => true,
LIT_FLOAT(_, _) => true,
LIT_STR(_) => true,
POUND => true,
AT => true,
NOT => true,
BINOP(MINUS) => true,
BINOP(STAR) => true,
BINOP(AND) => true,
BINOP(OR) => true, // in lambda syntax
OROR => true, // in lambda syntax
MOD_SEP => true,
INTERPOLATED(nt_expr(*))
| INTERPOLATED(nt_ident(*))
| INTERPOLATED(nt_block(*))
2012-08-03 21:59:04 -05:00
| INTERPOLATED(nt_path(*)) => true,
_ => false
}
}
/// what's the opposite delimiter?
fn flip_delimiter(&t: token::token) -> token::token {
2012-08-06 14:34:08 -05:00
match t {
2012-08-03 21:59:04 -05:00
token::LPAREN => token::RPAREN,
token::LBRACE => token::RBRACE,
token::LBRACKET => token::RBRACKET,
token::RPAREN => token::LPAREN,
token::RBRACE => token::LBRACE,
token::RBRACKET => token::LBRACKET,
_ => fail
}
}
fn is_lit(t: token) -> bool {
2012-08-06 14:34:08 -05:00
match t {
2012-08-03 21:59:04 -05:00
LIT_INT(_, _) => true,
LIT_UINT(_, _) => true,
LIT_INT_UNSUFFIXED(_) => true,
LIT_FLOAT(_, _) => true,
LIT_STR(_) => true,
_ => false
}
2012-04-22 16:59:04 -05:00
}
pure fn is_ident(t: token) -> bool {
2012-08-06 14:34:08 -05:00
match t { IDENT(_, _) => true, _ => false }
}
pure fn is_ident_or_path(t: token) -> bool {
2012-08-06 14:34:08 -05:00
match t {
IDENT(_, _) | INTERPOLATED(nt_path(*)) => true,
_ => false
}
}
pure fn is_plain_ident(t: token) -> bool {
2012-08-06 14:34:08 -05:00
match t { IDENT(_, false) => true, _ => false }
}
pure fn is_bar(t: token) -> bool {
2012-08-06 14:34:08 -05:00
match t { BINOP(OR) | OROR => true, _ => false }
}
2012-08-02 16:33:26 -05:00
mod special_idents {
2012-07-18 18:18:02 -05:00
import ast::ident;
const underscore : ident = 0u;
const anon : ident = 1u;
const dtor : ident = 2u; // 'drop', but that's reserved
const invalid : ident = 3u; // ''
const unary : ident = 4u;
const not_fn : ident = 5u;
const idx_fn : ident = 6u;
const unary_minus_fn : ident = 7u;
const clownshoes_extensions : ident = 8u;
const self_ : ident = 9u; // 'self'
/* for matcher NTs */
const item : ident = 10u;
const block : ident = 11u;
const stmt : ident = 12u;
const pat : ident = 13u;
const expr : ident = 14u;
const ty : ident = 15u;
const ident : ident = 16u;
const path : ident = 17u;
const tt : ident = 18u;
const matchers : ident = 19u;
const str : ident = 20u; // for the type
/* outside of libsyntax */
const ty_visitor : ident = 21u;
const arg : ident = 22u;
const descrim : ident = 23u;
const clownshoe_abi : ident = 24u;
const clownshoe_stack_shim : ident = 25u;
const tydesc : ident = 26u;
const literally_dtor : ident = 27u;
const main : ident = 28u;
const opaque : ident = 29u;
const blk : ident = 30u;
const static : ident = 31u;
const intrinsic : ident = 32u;
}
2012-07-18 18:18:02 -05:00
type ident_interner = util::interner::interner<@~str>;
/** Key for thread-local data for sneaking interner information to the
* serializer/deserializer. It sounds like a hack because it is one.
* Bonus ultra-hack: functions as keys don't work across crates,
* so we have to use a unique number. See taskgroup_key! in task.rs
* for another case of this. */
macro_rules! interner_key (
() => (unsafe::transmute::<(uint, uint), &fn(+@@token::ident_interner)>(
(-3 as uint, 0u)))
)
2012-07-18 18:18:02 -05:00
2012-08-02 16:33:26 -05:00
fn mk_ident_interner() -> ident_interner {
/* the indices here must correspond to the numbers in special_idents */
2012-07-18 18:18:02 -05:00
let init_vec = ~[@~"_", @~"anon", @~"drop", @~"", @~"unary", @~"!",
@~"[]", @~"unary-", @~"__extensions__", @~"self",
@~"item", @~"block", @~"stmt", @~"pat", @~"expr",
@~"ty", @~"ident", @~"path", @~"tt", @~"matchers",
@~"str", @~"ty_visitor", @~"arg", @~"descrim",
@~"__rust_abi", @~"__rust_stack_shim", @~"tydesc",
@~"dtor", @~"main", @~"<opaque>", @~"blk", @~"static",
@~"intrinsic"];
2012-07-18 18:18:02 -05:00
let rv = interner::mk_prefill::<@~str>(|x| str::hash(*x),
|x,y| str::eq(*x, *y), init_vec);
/* having multiple interners will just confuse the serializer */
unsafe{ assert task::local_data_get(interner_key!()) == none };
unsafe{ task::local_data_set(interner_key!(), @rv) };
2012-08-02 16:33:26 -05:00
rv
}
2012-07-18 18:18:02 -05:00
/* for when we don't care about the contents; doesn't interact with TLD or
serialization */
fn mk_fake_ident_interner() -> ident_interner {
interner::mk::<@~str>(|x| str::hash(*x), |x,y| str::eq(*x, *y))
}
/**
* All the valid words that have meaning in the Rust language.
*
* Rust keywords are either 'contextual' or 'restricted'. Contextual
* keywords may be used as identifiers because their appearance in
* the grammar is unambiguous. Restricted keywords may not appear
* in positions that might otherwise contain _value identifiers_.
*/
fn keyword_table() -> hashmap<~str, ()> {
let keywords = str_hash();
2012-06-30 18:19:07 -05:00
for contextual_keyword_table().each_key |word| {
keywords.insert(word, ());
}
2012-06-30 18:19:07 -05:00
for restricted_keyword_table().each_key |word| {
keywords.insert(word, ());
}
keywords
}
/// Keywords that may be used as identifiers
fn contextual_keyword_table() -> hashmap<~str, ()> {
let words = str_hash();
let keys = ~[
~"as",
~"else",
~"move",
~"of",
~"priv", ~"pub",
~"self", ~"send", ~"static",
~"to",
~"use",
~"with"
];
2012-06-30 18:19:07 -05:00
for keys.each |word| {
words.insert(word, ());
}
words
}
/**
* Keywords that may not appear in any position that might otherwise contain a
* _value identifier_. Restricted keywords may still be used as other types of
* identifiers.
*
* Reasons:
*
* * For some (most?), if used at the start of a line, they will cause the
* line to be interpreted as a specific kind of statement, which would be
* confusing.
*
* * `true` or `false` as identifiers would always be shadowed by
* the boolean constants
*/
fn restricted_keyword_table() -> hashmap<~str, ()> {
let words = str_hash();
let keys = ~[
2012-08-06 14:34:08 -05:00
~"again", ~"assert",
~"break",
2012-08-15 20:46:55 -05:00
~"check", ~"const", ~"copy",
~"do", ~"drop",
~"else", ~"enum", ~"export", ~"extern",
~"fail", ~"false", ~"fn", ~"for",
2012-08-17 14:27:57 -05:00
~"if", ~"impl", ~"import",
~"let", ~"log", ~"loop",
~"match", ~"mod", ~"module", ~"move", ~"mut",
~"new",
~"owned",
~"pure",
2012-08-01 19:30:05 -05:00
~"ref", ~"return",
~"struct",
~"true", ~"trait", ~"type",
~"unchecked", ~"unsafe",
~"while"
];
2012-06-30 18:19:07 -05:00
for keys.each |word| {
words.insert(word, ());
}
words
}
2010-08-18 13:35:12 -05:00
// Local Variables:
// fill-column: 78;
// indent-tabs-mode: nil
// c-basic-offset: 4
// buffer-file-coding-system: utf-8-unix
// End: