198 lines
4.0 KiB
ANTLR
198 lines
4.0 KiB
ANTLR
lexer grammar RustLexer;
|
|
|
|
@lexer::members {
|
|
public boolean is_at(int pos) {
|
|
return _input.index() == pos;
|
|
}
|
|
}
|
|
|
|
|
|
tokens {
|
|
EQ, LT, LE, EQEQ, NE, GE, GT, ANDAND, OROR, NOT, TILDE, PLUS,
|
|
MINUS, STAR, SLASH, PERCENT, CARET, AND, OR, SHL, SHR, BINOP,
|
|
BINOPEQ, LARROW, AT, DOT, DOTDOT, DOTDOTDOT, COMMA, SEMI, COLON,
|
|
MOD_SEP, RARROW, FAT_ARROW, LPAREN, RPAREN, LBRACKET, RBRACKET,
|
|
LBRACE, RBRACE, POUND, DOLLAR, UNDERSCORE, LIT_CHAR, LIT_BYTE,
|
|
LIT_INTEGER, LIT_FLOAT, LIT_STR, LIT_STR_RAW, LIT_BYTE_STR,
|
|
LIT_BYTE_STR_RAW, QUESTION, IDENT, LIFETIME, WHITESPACE, DOC_COMMENT,
|
|
COMMENT, SHEBANG, UTF8_BOM
|
|
}
|
|
|
|
import xidstart , xidcontinue;
|
|
|
|
|
|
/* Expression-operator symbols */
|
|
|
|
EQ : '=' ;
|
|
LT : '<' ;
|
|
LE : '<=' ;
|
|
EQEQ : '==' ;
|
|
NE : '!=' ;
|
|
GE : '>=' ;
|
|
GT : '>' ;
|
|
ANDAND : '&&' ;
|
|
OROR : '||' ;
|
|
NOT : '!' ;
|
|
TILDE : '~' ;
|
|
PLUS : '+' ;
|
|
MINUS : '-' ;
|
|
STAR : '*' ;
|
|
SLASH : '/' ;
|
|
PERCENT : '%' ;
|
|
CARET : '^' ;
|
|
AND : '&' ;
|
|
OR : '|' ;
|
|
SHL : '<<' ;
|
|
SHR : '>>' ;
|
|
LARROW : '<-' ;
|
|
|
|
BINOP
|
|
: PLUS
|
|
| SLASH
|
|
| MINUS
|
|
| STAR
|
|
| PERCENT
|
|
| CARET
|
|
| AND
|
|
| OR
|
|
| SHL
|
|
| SHR
|
|
| LARROW
|
|
;
|
|
|
|
BINOPEQ : BINOP EQ ;
|
|
|
|
/* "Structural symbols" */
|
|
|
|
AT : '@' ;
|
|
DOT : '.' ;
|
|
DOTDOT : '..' ;
|
|
DOTDOTDOT : '...' ;
|
|
COMMA : ',' ;
|
|
SEMI : ';' ;
|
|
COLON : ':' ;
|
|
MOD_SEP : '::' ;
|
|
RARROW : '->' ;
|
|
FAT_ARROW : '=>' ;
|
|
LPAREN : '(' ;
|
|
RPAREN : ')' ;
|
|
LBRACKET : '[' ;
|
|
RBRACKET : ']' ;
|
|
LBRACE : '{' ;
|
|
RBRACE : '}' ;
|
|
POUND : '#';
|
|
DOLLAR : '$' ;
|
|
UNDERSCORE : '_' ;
|
|
|
|
// Literals
|
|
|
|
fragment HEXIT
|
|
: [0-9a-fA-F]
|
|
;
|
|
|
|
fragment CHAR_ESCAPE
|
|
: [nrt\\'"0]
|
|
| [xX] HEXIT HEXIT
|
|
| 'u' HEXIT HEXIT HEXIT HEXIT
|
|
| 'U' HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT
|
|
| 'u{' HEXIT '}'
|
|
| 'u{' HEXIT HEXIT '}'
|
|
| 'u{' HEXIT HEXIT HEXIT '}'
|
|
| 'u{' HEXIT HEXIT HEXIT HEXIT '}'
|
|
| 'u{' HEXIT HEXIT HEXIT HEXIT HEXIT '}'
|
|
| 'u{' HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT '}'
|
|
;
|
|
|
|
fragment SUFFIX
|
|
: IDENT
|
|
;
|
|
|
|
fragment INTEGER_SUFFIX
|
|
: { _input.LA(1) != 'e' && _input.LA(1) != 'E' }? SUFFIX
|
|
;
|
|
|
|
LIT_CHAR
|
|
: '\'' ( '\\' CHAR_ESCAPE
|
|
| ~[\\'\n\t\r]
|
|
| '\ud800' .. '\udbff' '\udc00' .. '\udfff'
|
|
)
|
|
'\'' SUFFIX?
|
|
;
|
|
|
|
LIT_BYTE
|
|
: 'b\'' ( '\\' ( [xX] HEXIT HEXIT
|
|
| [nrt\\'"0] )
|
|
| ~[\\'\n\t\r] '\udc00'..'\udfff'?
|
|
)
|
|
'\'' SUFFIX?
|
|
;
|
|
|
|
LIT_INTEGER
|
|
|
|
: [0-9][0-9_]* INTEGER_SUFFIX?
|
|
| '0b' [01_]+ INTEGER_SUFFIX?
|
|
| '0o' [0-7_]+ INTEGER_SUFFIX?
|
|
| '0x' [0-9a-fA-F_]+ INTEGER_SUFFIX?
|
|
;
|
|
|
|
LIT_FLOAT
|
|
: [0-9][0-9_]* ('.' {
|
|
/* dot followed by another dot is a range, not a float */
|
|
_input.LA(1) != '.' &&
|
|
/* dot followed by an identifier is an integer with a function call, not a float */
|
|
_input.LA(1) != '_' &&
|
|
!(_input.LA(1) >= 'a' && _input.LA(1) <= 'z') &&
|
|
!(_input.LA(1) >= 'A' && _input.LA(1) <= 'Z')
|
|
}? | ('.' [0-9][0-9_]*)? ([eE] [-+]? [0-9][0-9_]*)? SUFFIX?)
|
|
;
|
|
|
|
LIT_STR
|
|
: '"' ('\\\n' | '\\\r\n' | '\\' CHAR_ESCAPE | .)*? '"' SUFFIX?
|
|
;
|
|
|
|
LIT_BYTE_STR : 'b' LIT_STR ;
|
|
LIT_BYTE_STR_RAW : 'b' LIT_STR_RAW ;
|
|
|
|
/* this is a bit messy */
|
|
|
|
fragment LIT_STR_RAW_INNER
|
|
: '"' .*? '"'
|
|
| LIT_STR_RAW_INNER2
|
|
;
|
|
|
|
fragment LIT_STR_RAW_INNER2
|
|
: POUND LIT_STR_RAW_INNER POUND
|
|
;
|
|
|
|
LIT_STR_RAW
|
|
: 'r' LIT_STR_RAW_INNER SUFFIX?
|
|
;
|
|
|
|
|
|
QUESTION : '?';
|
|
|
|
IDENT : XID_Start XID_Continue* ;
|
|
|
|
fragment QUESTION_IDENTIFIER : QUESTION? IDENT;
|
|
|
|
LIFETIME : '\'' IDENT ;
|
|
|
|
WHITESPACE : [ \r\n\t]+ ;
|
|
|
|
UNDOC_COMMENT : '////' ~[\n]* -> type(COMMENT) ;
|
|
YESDOC_COMMENT : '///' ~[\r\n]* -> type(DOC_COMMENT) ;
|
|
OUTER_DOC_COMMENT : '//!' ~[\r\n]* -> type(DOC_COMMENT) ;
|
|
LINE_COMMENT : '//' ( ~[/\n] ~[\n]* )? -> type(COMMENT) ;
|
|
|
|
DOC_BLOCK_COMMENT
|
|
: ('/**' ~[*] | '/*!') (DOC_BLOCK_COMMENT | .)*? '*/' -> type(DOC_COMMENT)
|
|
;
|
|
|
|
BLOCK_COMMENT : '/*' (BLOCK_COMMENT | .)*? '*/' -> type(COMMENT) ;
|
|
|
|
/* these appear at the beginning of a file */
|
|
|
|
SHEBANG : '#!' { is_at(2) && _input.LA(1) != '[' }? ~[\r\n]* -> type(SHEBANG) ;
|
|
|
|
UTF8_BOM : '\ufeff' { is_at(1) }? -> skip ;
|