rust/src/libsyntax/parse/token.rs

import util::interner;
import util::interner::interner;
import std::map::{hashmap, str_hash};
import std::serialization::{serializer,
                            deserializer,
                            serialize_uint,
                            deserialize_uint,
                            serialize_i64,
                            deserialize_i64,
                            serialize_u64,
                            deserialize_u64,
                            serialize_bool,
                            deserialize_bool};

#[auto_serialize]
type str_num = uint;

#[auto_serialize]
enum binop {
    PLUS,
    MINUS,
    STAR,
    SLASH,
    PERCENT,
    CARET,
    AND,
    OR,
    SHL,
    SHR,
}

#[auto_serialize]
enum token {
    /* Expression-operator symbols. */
    EQ,
    LT,
    LE,
    EQEQ,
    NE,
    GE,
    GT,
    ANDAND,
    OROR,
    NOT,
    TILDE,
    BINOP(binop),
    BINOPEQ(binop),

    /* Structural symbols */
    AT,
    DOT,
    ELLIPSIS,
    COMMA,
    SEMI,
    COLON,
    MOD_SEP,
    RARROW,
    LARROW,
    DARROW,
    FAT_ARROW,
    LPAREN,
    RPAREN,
    LBRACKET,
    RBRACKET,
    LBRACE,
    RBRACE,
    POUND,
    DOLLAR,

    /* Literals */
    LIT_INT(i64, ast::int_ty),
    LIT_UINT(u64, ast::uint_ty),
    LIT_INT_UNSUFFIXED(i64),
    LIT_FLOAT(str_num, ast::float_ty),
    LIT_STR(str_num),

    /* Name components */
    IDENT(str_num, bool),
    UNDERSCORE,

    //ACTUALLY(whole_nonterminal),

    DOC_COMMENT(str_num),
    EOF,
}

#[auto_serialize]
/// For interpolation during macro expansion.
enum whole_nt {
    w_item(@ast::item),
    w_block(ast::blk),
    w_stmt(@ast::stmt),
    w_pat( @ast::pat),
    w_expr(@ast::expr),
    w_ty(  @ast::ty),
    w_ident(ast::ident),
    w_path(@ast::path),
}

fn binop_to_str(o: binop) -> str {
    alt o {
      PLUS { "+" }
      MINUS { "-" }
      STAR { "*" }
      SLASH { "/" }
      PERCENT { "%" }
      CARET { "^" }
      AND { "&" }
      OR { "|" }
      SHL { "<<" }
      SHR { ">>" }
    }
}

fn to_str(in: interner<@str>, t: token) -> str {
    alt t {
      EQ { "=" }
      LT { "<" }
      LE { "<=" }
      EQEQ { "==" }
      NE { "!=" }
      GE { ">=" }
      GT { ">" }
      NOT { "!" }
      TILDE { "~" }
      OROR { "||" }
      ANDAND { "&&" }
      BINOP(op) { binop_to_str(op) }
      BINOPEQ(op) { binop_to_str(op) + "=" }

      /* Structural symbols */
      AT { "@" }
      DOT { "." }
      ELLIPSIS { "..." }
      COMMA { "," }
      SEMI { "" }
      COLON { ":" }
      MOD_SEP { "::" }
      RARROW { "->" }
      LARROW { "<-" }
      DARROW { "<->" }
      FAT_ARROW { "=>" }
      LPAREN { "(" }
      RPAREN { ")" }
      LBRACKET { "[" }
      RBRACKET { "]" }
      LBRACE { "{" }
      RBRACE { "}" }
      POUND { "#" }
      DOLLAR { "$" }

      /* Literals */
      LIT_INT(c, ast::ty_char) {
        "'" + char::escape_default(c as char) + "'"
      }
      LIT_INT(i, t) {
        int::to_str(i as int, 10u) + ast_util::int_ty_to_str(t)
      }
      LIT_UINT(u, t) {
        uint::to_str(u as uint, 10u) + ast_util::uint_ty_to_str(t)
      }
      LIT_INT_UNSUFFIXED(i) {
        int::to_str(i as int, 10u)
      }
      LIT_FLOAT(s, t) {
        *interner::get(in, s) +
            ast_util::float_ty_to_str(t)
      }
      LIT_STR(s) {
        "\""
            + str::escape_default(*interner::get(in, s))
            + "\""
      }

      /* Name components */
      IDENT(s, _) {
        *interner::get(in, s)
      }
      UNDERSCORE { "_" }

      /* Other */
      DOC_COMMENT(s) { *interner::get(in, s) }
      EOF { "<eof>" }
    }
}

pure fn can_begin_expr(t: token) -> bool {
    alt t {
      LPAREN { true }
      LBRACE { true }
      LBRACKET { true }
      IDENT(_, _) { true }
      UNDERSCORE { true }
      TILDE { true }
      LIT_INT(_, _) { true }
      LIT_UINT(_, _) { true }
      LIT_INT_UNSUFFIXED(_) { true }
      LIT_FLOAT(_, _) { true }
      LIT_STR(_) { true }
      POUND { true }
      AT { true }
      NOT { true }
      BINOP(MINUS) { true }
      BINOP(STAR) { true }
      BINOP(AND) { true }
      BINOP(OR) { true } // in lambda syntax
      OROR { true } // in lambda syntax
      MOD_SEP { true }
      _ { false }
    }
}

fn is_lit(t: token) -> bool {
    alt t {
      LIT_INT(_, _) { true }
      LIT_UINT(_, _) { true }
      LIT_INT_UNSUFFIXED(_) { true }
      LIT_FLOAT(_, _) { true }
      LIT_STR(_) { true }
      _ { false }
    }
}

pure fn is_ident(t: token) -> bool {
    alt t { IDENT(_, _) { true } _ { false } }
}

pure fn is_plain_ident(t: token) -> bool {
    alt t { IDENT(_, false) { true } _ { false } }
}

pure fn is_bar(t: token) -> bool {
    alt t { BINOP(OR) | OROR { true } _ { false } }
}

/**
 * All the valid words that have meaning in the Rust language.
 *
 * Rust keywords are either 'contextual' or 'restricted'. Contextual
 * keywords may be used as identifiers because their appearance in
 * the grammar is unambiguous. Restricted keywords may not appear
 * in positions that might otherwise contain _value identifiers_.
 */
fn keyword_table() -> hashmap<str, ()> {
    let keywords = str_hash();
    for contextual_keyword_table().each_key |word| {
        keywords.insert(word, ());
    }
    for restricted_keyword_table().each_key |word| {
        keywords.insert(word, ());
    }
    keywords
}

/// Keywords that may be used as identifiers
fn contextual_keyword_table() -> hashmap<str, ()> {
    let words = str_hash();
    let keys = ~[
        "as",
        "else",
        "move",
        "of",
        "priv", "pub",
        "self", "send", "static",
        "to",
        "use",
        "with",
        /* temp */
        "sep", "many", "at_least_one", "parse"
    ];
    for keys.each |word| {
        words.insert(word, ());
    }
    words
}

/**
 * Keywords that may not appear in any position that might otherwise contain a
 * _value identifier_. Restricted keywords may still be used as other types of
 * identifiers.
 *
 * Reasons:
 *
 * * For some (most?), if used at the start of a line, they will cause the
 *   line to be interpreted as a specific kind of statement, which would be
 *   confusing.
 *
 * * `true` or `false` as identifiers would always be shadowed by
 *   the boolean constants
 */
fn restricted_keyword_table() -> hashmap<str, ()> {
    let words = str_hash();
    let keys = ~[
        "alt",
        "assert",
        "break",
        "check", "claim", "class", "const", "cont", "copy",
        "do", "drop",
        "else", "enum", "export", "extern",
        "fail", "false", "fn", "for",
        "if", "iface", "impl", "import",
        "let", "log", "loop",
        "mod", "mut",
        "new",
        "pure", "ret",
        "true", "trait", "type",
        "unchecked", "unsafe",
        "while"
    ];
    for keys.each |word| {
        words.insert(word, ());
    }
    words
}

// Local Variables:
// fill-column: 78;
// indent-tabs-mode: nil
// c-basic-offset: 4
// buffer-file-coding-system: utf-8-unix
// End: