2014-07-14 03:52:18 -05:00
|
|
|
#![feature(globs, phase, macro_rules)]
|
|
|
|
|
|
|
|
extern crate syntax;
|
|
|
|
extern crate rustc;
|
|
|
|
|
|
|
|
#[phase(link)]
|
|
|
|
extern crate regex;
|
|
|
|
|
|
|
|
#[phase(link, plugin)]
|
|
|
|
extern crate log;
|
|
|
|
|
|
|
|
#[phase(plugin)] extern crate regex_macros;
|
|
|
|
|
|
|
|
use std::collections::HashMap;
|
|
|
|
use std::io::File;
|
|
|
|
|
|
|
|
use syntax::parse;
|
|
|
|
use syntax::parse::lexer;
|
|
|
|
use rustc::driver::{session, config};
|
|
|
|
|
|
|
|
use syntax::ast;
|
|
|
|
use syntax::ast::Name;
|
|
|
|
use syntax::parse::token::*;
|
|
|
|
use syntax::parse::lexer::TokenAndSpan;
|
|
|
|
|
|
|
|
fn parse_token_list(file: &str) -> HashMap<String, Token> {
|
|
|
|
fn id() -> Token {
|
|
|
|
IDENT(ast::Ident { name: Name(0), ctxt: 0, }, false)
|
|
|
|
}
|
|
|
|
|
|
|
|
let mut res = HashMap::new();
|
|
|
|
|
|
|
|
res.insert("-1".to_string(), EOF);
|
|
|
|
|
|
|
|
for line in file.split('\n') {
|
|
|
|
let eq = match line.trim().rfind('=') {
|
|
|
|
Some(val) => val,
|
|
|
|
None => continue
|
|
|
|
};
|
|
|
|
|
|
|
|
let val = line.slice_to(eq);
|
|
|
|
let num = line.slice_from(eq + 1);
|
|
|
|
|
|
|
|
let tok = match val {
|
|
|
|
"SHR" => BINOP(SHR),
|
|
|
|
"DOLLAR" => DOLLAR,
|
|
|
|
"LT" => LT,
|
|
|
|
"STAR" => BINOP(STAR),
|
|
|
|
"FLOAT_SUFFIX" => id(),
|
|
|
|
"INT_SUFFIX" => id(),
|
|
|
|
"SHL" => BINOP(SHL),
|
|
|
|
"LBRACE" => LBRACE,
|
|
|
|
"RARROW" => RARROW,
|
|
|
|
"LIT_STR" => LIT_STR(Name(0)),
|
|
|
|
"DOTDOT" => DOTDOT,
|
|
|
|
"MOD_SEP" => MOD_SEP,
|
|
|
|
"DOTDOTDOT" => DOTDOTDOT,
|
|
|
|
"NOT" => NOT,
|
|
|
|
"AND" => BINOP(AND),
|
|
|
|
"LPAREN" => LPAREN,
|
|
|
|
"ANDAND" => ANDAND,
|
|
|
|
"AT" => AT,
|
|
|
|
"LBRACKET" => LBRACKET,
|
|
|
|
"LIT_STR_RAW" => LIT_STR_RAW(Name(0), 0),
|
|
|
|
"RPAREN" => RPAREN,
|
|
|
|
"SLASH" => BINOP(SLASH),
|
|
|
|
"COMMA" => COMMA,
|
|
|
|
"LIFETIME" => LIFETIME(ast::Ident { name: Name(0), ctxt: 0 }),
|
|
|
|
"CARET" => BINOP(CARET),
|
|
|
|
"TILDE" => TILDE,
|
|
|
|
"IDENT" => id(),
|
|
|
|
"PLUS" => BINOP(PLUS),
|
|
|
|
"LIT_CHAR" => LIT_CHAR(Name(0)),
|
2014-07-14 22:45:39 -05:00
|
|
|
"LIT_BYTE" => LIT_BYTE(Name(0)),
|
2014-07-14 03:52:18 -05:00
|
|
|
"EQ" => EQ,
|
|
|
|
"RBRACKET" => RBRACKET,
|
|
|
|
"COMMENT" => COMMENT,
|
|
|
|
"DOC_COMMENT" => DOC_COMMENT(Name(0)),
|
|
|
|
"DOT" => DOT,
|
|
|
|
"EQEQ" => EQEQ,
|
|
|
|
"NE" => NE,
|
|
|
|
"GE" => GE,
|
|
|
|
"PERCENT" => BINOP(PERCENT),
|
|
|
|
"RBRACE" => RBRACE,
|
|
|
|
"BINOP" => BINOP(PLUS),
|
|
|
|
"POUND" => POUND,
|
|
|
|
"OROR" => OROR,
|
|
|
|
"LIT_INTEGER" => LIT_INTEGER(Name(0)),
|
|
|
|
"BINOPEQ" => BINOPEQ(PLUS),
|
|
|
|
"LIT_FLOAT" => LIT_FLOAT(Name(0)),
|
|
|
|
"WHITESPACE" => WS,
|
|
|
|
"UNDERSCORE" => UNDERSCORE,
|
|
|
|
"MINUS" => BINOP(MINUS),
|
|
|
|
"SEMI" => SEMI,
|
|
|
|
"COLON" => COLON,
|
|
|
|
"FAT_ARROW" => FAT_ARROW,
|
|
|
|
"OR" => BINOP(OR),
|
|
|
|
"GT" => GT,
|
|
|
|
"LE" => LE,
|
|
|
|
"LIT_BINARY" => LIT_BINARY(Name(0)),
|
|
|
|
"LIT_BINARY_RAW" => LIT_BINARY_RAW(Name(0), 0),
|
|
|
|
_ => continue
|
|
|
|
};
|
|
|
|
|
|
|
|
res.insert(num.to_string(), tok);
|
|
|
|
}
|
|
|
|
|
|
|
|
debug!("Token map: {}", res);
|
|
|
|
res
|
|
|
|
}
|
|
|
|
|
2014-07-14 19:27:28 -05:00
|
|
|
fn str_to_binop(s: &str) -> BinOp {
|
2014-07-14 03:52:18 -05:00
|
|
|
match s {
|
|
|
|
"+" => PLUS,
|
2014-07-14 19:27:28 -05:00
|
|
|
"/" => SLASH,
|
2014-07-14 03:52:18 -05:00
|
|
|
"-" => MINUS,
|
|
|
|
"*" => STAR,
|
|
|
|
"%" => PERCENT,
|
|
|
|
"^" => CARET,
|
|
|
|
"&" => AND,
|
|
|
|
"|" => OR,
|
|
|
|
"<<" => SHL,
|
|
|
|
">>" => SHR,
|
2014-07-14 19:27:28 -05:00
|
|
|
_ => fail!("Bad binop str `{}`", s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-07-14 22:45:39 -05:00
|
|
|
/// Assuming a string/binary literal, strip out the leading/trailing
|
2014-07-14 19:27:28 -05:00
|
|
|
/// hashes and surrounding quotes/raw/binary prefix.
|
|
|
|
fn fix(mut lit: &str) -> ast::Name {
|
|
|
|
if lit.char_at(0) == 'r' {
|
|
|
|
if lit.char_at(1) == 'b' {
|
|
|
|
lit = lit.slice_from(2)
|
|
|
|
} else {
|
|
|
|
lit = lit.slice_from(1);
|
|
|
|
}
|
|
|
|
} else if lit.char_at(0) == 'b' {
|
|
|
|
lit = lit.slice_from(1);
|
2014-07-14 03:52:18 -05:00
|
|
|
}
|
2014-07-14 19:27:28 -05:00
|
|
|
|
|
|
|
let leading_hashes = count(lit);
|
|
|
|
|
|
|
|
// +1/-1 to adjust for single quotes
|
|
|
|
parse::token::intern(lit.slice(leading_hashes + 1, lit.len() - leading_hashes - 1))
|
|
|
|
}
|
|
|
|
|
2014-07-14 22:45:39 -05:00
|
|
|
/// Assuming a char/byte literal, strip the 'b' prefix and the single quotes.
|
|
|
|
fn fixchar(mut lit: &str) -> ast::Name {
|
|
|
|
if lit.char_at(0) == 'b' {
|
|
|
|
lit = lit.slice_from(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
parse::token::intern(lit.slice(1, lit.len() - 1))
|
|
|
|
}
|
|
|
|
|
2014-07-14 19:27:28 -05:00
|
|
|
fn count(lit: &str) -> uint {
|
|
|
|
lit.chars().take_while(|c| *c == '#').count()
|
2014-07-14 03:52:18 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
fn parse_antlr_token(s: &str, tokens: &HashMap<String, Token>) -> TokenAndSpan {
|
2014-07-14 19:27:28 -05:00
|
|
|
let re = regex!(r"\[@(?P<seq>\d+),(?P<start>\d+):(?P<end>\d+)='(?P<content>.+?)',<(?P<toknum>-?\d+)>,\d+:\d+]");
|
2014-07-14 03:52:18 -05:00
|
|
|
|
|
|
|
let m = re.captures(s).expect(format!("The regex didn't match {}", s).as_slice());
|
|
|
|
let start = m.name("start");
|
|
|
|
let end = m.name("end");
|
|
|
|
let toknum = m.name("toknum");
|
|
|
|
let content = m.name("content");
|
|
|
|
|
|
|
|
let proto_tok = tokens.find_equiv(&toknum).expect(format!("didn't find token {} in the map", toknum).as_slice());
|
2014-07-14 19:27:28 -05:00
|
|
|
|
|
|
|
let nm = parse::token::intern(content);
|
|
|
|
|
|
|
|
debug!("What we got: content (`{}`), proto: {}", content, proto_tok);
|
|
|
|
|
2014-07-14 03:52:18 -05:00
|
|
|
let real_tok = match *proto_tok {
|
2014-07-14 19:27:28 -05:00
|
|
|
BINOP(..) => BINOP(str_to_binop(content)),
|
|
|
|
BINOPEQ(..) => BINOPEQ(str_to_binop(content.slice_to(content.len() - 1))),
|
|
|
|
LIT_STR(..) => LIT_STR(fix(content)),
|
|
|
|
LIT_STR_RAW(..) => LIT_STR_RAW(fix(content), count(content)),
|
2014-07-14 22:45:39 -05:00
|
|
|
LIT_CHAR(..) => LIT_CHAR(fixchar(content)),
|
|
|
|
LIT_BYTE(..) => LIT_BYTE(fixchar(content)),
|
2014-07-14 19:27:28 -05:00
|
|
|
DOC_COMMENT(..) => DOC_COMMENT(nm),
|
|
|
|
LIT_INTEGER(..) => LIT_INTEGER(nm),
|
|
|
|
LIT_FLOAT(..) => LIT_FLOAT(nm),
|
|
|
|
LIT_BINARY(..) => LIT_BINARY(nm),
|
|
|
|
LIT_BINARY_RAW(..) => LIT_BINARY_RAW(fix(content), count(content)),
|
|
|
|
IDENT(..) => IDENT(ast::Ident { name: nm, ctxt: 0 }, true),
|
|
|
|
LIFETIME(..) => LIFETIME(ast::Ident { name: nm, ctxt: 0 }),
|
2014-07-14 03:52:18 -05:00
|
|
|
ref t => t.clone()
|
|
|
|
};
|
|
|
|
|
|
|
|
let offset = if real_tok == EOF {
|
|
|
|
1
|
|
|
|
} else {
|
|
|
|
0
|
|
|
|
};
|
|
|
|
|
|
|
|
let sp = syntax::codemap::Span {
|
|
|
|
lo: syntax::codemap::BytePos(from_str::<u32>(start).unwrap() - offset),
|
|
|
|
hi: syntax::codemap::BytePos(from_str::<u32>(end).unwrap() + 1),
|
|
|
|
expn_info: None
|
|
|
|
};
|
|
|
|
|
|
|
|
TokenAndSpan {
|
|
|
|
tok: real_tok,
|
|
|
|
sp: sp
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-07-14 19:27:28 -05:00
|
|
|
fn tok_cmp(a: &Token, b: &Token) -> bool {
|
|
|
|
match a {
|
|
|
|
&IDENT(id, _) => match b {
|
|
|
|
&IDENT(id2, _) => id == id2,
|
|
|
|
_ => false
|
|
|
|
},
|
|
|
|
_ => a == b
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-07-14 03:52:18 -05:00
|
|
|
fn main() {
|
|
|
|
fn next(r: &mut lexer::StringReader) -> TokenAndSpan {
|
|
|
|
use syntax::parse::lexer::Reader;
|
|
|
|
r.next_token()
|
|
|
|
}
|
|
|
|
|
|
|
|
let token_map = parse_token_list(File::open(&Path::new("RustLexer.tokens")).unwrap().read_to_string().unwrap().as_slice());
|
|
|
|
let mut stdin = std::io::stdin();
|
|
|
|
let mut antlr_tokens = stdin.lines().map(|l| parse_antlr_token(l.unwrap().as_slice().trim(), &token_map));
|
|
|
|
|
|
|
|
let code = File::open(&Path::new(std::os::args().get(1).as_slice())).unwrap().read_to_string().unwrap();
|
|
|
|
let options = config::basic_options();
|
2014-07-14 19:27:28 -05:00
|
|
|
let session = session::build_session(options, None,
|
|
|
|
syntax::diagnostics::registry::Registry::new([]));
|
2014-07-14 03:52:18 -05:00
|
|
|
let filemap = parse::string_to_filemap(&session.parse_sess,
|
|
|
|
code,
|
|
|
|
String::from_str("<n/a>"));
|
|
|
|
let mut lexer = lexer::StringReader::new(session.diagnostic(), filemap);
|
|
|
|
|
|
|
|
for antlr_tok in antlr_tokens {
|
|
|
|
let rustc_tok = next(&mut lexer);
|
|
|
|
if rustc_tok.tok == EOF && antlr_tok.tok == EOF {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
assert!(rustc_tok.sp == antlr_tok.sp, "{} and {} have different spans", rustc_tok, antlr_tok);
|
|
|
|
|
|
|
|
macro_rules! matches (
|
|
|
|
( $($x:pat),+ ) => (
|
|
|
|
match rustc_tok.tok {
|
|
|
|
$($x => match antlr_tok.tok {
|
2014-07-14 19:27:28 -05:00
|
|
|
$x => {
|
|
|
|
if !tok_cmp(&rustc_tok.tok, &antlr_tok.tok) {
|
|
|
|
// FIXME #15677: needs more robust escaping in
|
|
|
|
// antlr
|
|
|
|
warn!("Different names for {} and {}", rustc_tok, antlr_tok);
|
|
|
|
}
|
|
|
|
}
|
2014-07-14 03:52:18 -05:00
|
|
|
_ => fail!("{} is not {}", antlr_tok, rustc_tok)
|
|
|
|
},)*
|
2014-07-14 19:27:28 -05:00
|
|
|
ref c => assert!(c == &antlr_tok.tok, "{} is not {}", rustc_tok, antlr_tok)
|
2014-07-14 03:52:18 -05:00
|
|
|
}
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
matches!(LIT_BYTE(..),
|
|
|
|
LIT_CHAR(..),
|
|
|
|
LIT_INTEGER(..),
|
|
|
|
LIT_FLOAT(..),
|
|
|
|
LIT_STR(..),
|
|
|
|
LIT_STR_RAW(..),
|
|
|
|
LIT_BINARY(..),
|
|
|
|
LIT_BINARY_RAW(..),
|
|
|
|
IDENT(..),
|
|
|
|
LIFETIME(..),
|
|
|
|
INTERPOLATED(..),
|
|
|
|
DOC_COMMENT(..),
|
|
|
|
SHEBANG(..)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|