rust/src/grammar/RustLexer.g4
2015-10-27 23:06:37 +02:00

198 lines
4.0 KiB
ANTLR

lexer grammar RustLexer;
@lexer::members {
public boolean is_at(int pos) {
return _input.index() == pos;
}
}
tokens {
EQ, LT, LE, EQEQ, NE, GE, GT, ANDAND, OROR, NOT, TILDE, PLUS,
MINUS, STAR, SLASH, PERCENT, CARET, AND, OR, SHL, SHR, BINOP,
BINOPEQ, LARROW, AT, DOT, DOTDOT, DOTDOTDOT, COMMA, SEMI, COLON,
MOD_SEP, RARROW, FAT_ARROW, LPAREN, RPAREN, LBRACKET, RBRACKET,
LBRACE, RBRACE, POUND, DOLLAR, UNDERSCORE, LIT_CHAR, LIT_BYTE,
LIT_INTEGER, LIT_FLOAT, LIT_STR, LIT_STR_RAW, LIT_BYTE_STR,
LIT_BYTE_STR_RAW, QUESTION, IDENT, LIFETIME, WHITESPACE, DOC_COMMENT,
COMMENT, SHEBANG, UTF8_BOM
}
import xidstart , xidcontinue;
/* Expression-operator symbols */
EQ : '=' ;
LT : '<' ;
LE : '<=' ;
EQEQ : '==' ;
NE : '!=' ;
GE : '>=' ;
GT : '>' ;
ANDAND : '&&' ;
OROR : '||' ;
NOT : '!' ;
TILDE : '~' ;
PLUS : '+' ;
MINUS : '-' ;
STAR : '*' ;
SLASH : '/' ;
PERCENT : '%' ;
CARET : '^' ;
AND : '&' ;
OR : '|' ;
SHL : '<<' ;
SHR : '>>' ;
LARROW : '<-' ;
BINOP
: PLUS
| SLASH
| MINUS
| STAR
| PERCENT
| CARET
| AND
| OR
| SHL
| SHR
| LARROW
;
BINOPEQ : BINOP EQ ;
/* "Structural symbols" */
AT : '@' ;
DOT : '.' ;
DOTDOT : '..' ;
DOTDOTDOT : '...' ;
COMMA : ',' ;
SEMI : ';' ;
COLON : ':' ;
MOD_SEP : '::' ;
RARROW : '->' ;
FAT_ARROW : '=>' ;
LPAREN : '(' ;
RPAREN : ')' ;
LBRACKET : '[' ;
RBRACKET : ']' ;
LBRACE : '{' ;
RBRACE : '}' ;
POUND : '#';
DOLLAR : '$' ;
UNDERSCORE : '_' ;
// Literals
fragment HEXIT
: [0-9a-fA-F]
;
fragment CHAR_ESCAPE
: [nrt\\'"0]
| [xX] HEXIT HEXIT
| 'u' HEXIT HEXIT HEXIT HEXIT
| 'U' HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT
| 'u{' HEXIT '}'
| 'u{' HEXIT HEXIT '}'
| 'u{' HEXIT HEXIT HEXIT '}'
| 'u{' HEXIT HEXIT HEXIT HEXIT '}'
| 'u{' HEXIT HEXIT HEXIT HEXIT HEXIT '}'
| 'u{' HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT '}'
;
fragment SUFFIX
: IDENT
;
fragment INTEGER_SUFFIX
: { _input.LA(1) != 'e' && _input.LA(1) != 'E' }? SUFFIX
;
LIT_CHAR
: '\'' ( '\\' CHAR_ESCAPE
| ~[\\'\n\t\r]
| '\ud800' .. '\udbff' '\udc00' .. '\udfff'
)
'\'' SUFFIX?
;
LIT_BYTE
: 'b\'' ( '\\' ( [xX] HEXIT HEXIT
| [nrt\\'"0] )
| ~[\\'\n\t\r] '\udc00'..'\udfff'?
)
'\'' SUFFIX?
;
LIT_INTEGER
: [0-9][0-9_]* INTEGER_SUFFIX?
| '0b' [01_]+ INTEGER_SUFFIX?
| '0o' [0-7_]+ INTEGER_SUFFIX?
| '0x' [0-9a-fA-F_]+ INTEGER_SUFFIX?
;
LIT_FLOAT
: [0-9][0-9_]* ('.' {
/* dot followed by another dot is a range, not a float */
_input.LA(1) != '.' &&
/* dot followed by an identifier is an integer with a function call, not a float */
_input.LA(1) != '_' &&
!(_input.LA(1) >= 'a' && _input.LA(1) <= 'z') &&
!(_input.LA(1) >= 'A' && _input.LA(1) <= 'Z')
}? | ('.' [0-9][0-9_]*)? ([eE] [-+]? [0-9][0-9_]*)? SUFFIX?)
;
LIT_STR
: '"' ('\\\n' | '\\\r\n' | '\\' CHAR_ESCAPE | .)*? '"' SUFFIX?
;
LIT_BYTE_STR : 'b' LIT_STR ;
LIT_BYTE_STR_RAW : 'b' LIT_STR_RAW ;
/* this is a bit messy */
fragment LIT_STR_RAW_INNER
: '"' .*? '"'
| LIT_STR_RAW_INNER2
;
fragment LIT_STR_RAW_INNER2
: POUND LIT_STR_RAW_INNER POUND
;
LIT_STR_RAW
: 'r' LIT_STR_RAW_INNER SUFFIX?
;
QUESTION : '?';
IDENT : XID_Start XID_Continue* ;
fragment QUESTION_IDENTIFIER : QUESTION? IDENT;
LIFETIME : '\'' IDENT ;
WHITESPACE : [ \r\n\t]+ ;
UNDOC_COMMENT : '////' ~[\n]* -> type(COMMENT) ;
YESDOC_COMMENT : '///' ~[\r\n]* -> type(DOC_COMMENT) ;
OUTER_DOC_COMMENT : '//!' ~[\r\n]* -> type(DOC_COMMENT) ;
LINE_COMMENT : '//' ( ~[/\n] ~[\n]* )? -> type(COMMENT) ;
DOC_BLOCK_COMMENT
: ('/**' ~[*] | '/*!') (DOC_BLOCK_COMMENT | .)*? '*/' -> type(DOC_COMMENT)
;
BLOCK_COMMENT : '/*' (BLOCK_COMMENT | .)*? '*/' -> type(COMMENT) ;
/* these appear at the beginning of a file */
SHEBANG : '#!' { is_at(2) && _input.LA(1) != '[' }? ~[\r\n]* -> type(SHEBANG) ;
UTF8_BOM : '\ufeff' { is_at(1) }? -> skip ;