rust/src/grammar/verify.rs

// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

#![feature(globs, phase, macro_rules)]

extern crate syntax;
extern crate rustc;

#[phase(link)]
extern crate regex;

#[phase(link, plugin)]
extern crate log;

#[phase(plugin)] extern crate regex_macros;

use std::collections::HashMap;
use std::io::File;

use syntax::parse;
use syntax::parse::lexer;
use rustc::session::{mod, config};

use syntax::ast;
use syntax::ast::Name;
use syntax::parse::token;
use syntax::parse::lexer::TokenAndSpan;

fn parse_token_list(file: &str) -> HashMap<String, token::Token> {
    fn id() -> token::Token {
        token::Ident(ast::Ident { name: Name(0), ctxt: 0, }, token::Plain)
    }

    let mut res = HashMap::new();

    res.insert("-1".to_string(), token::Eof);

    for line in file.split('\n') {
        let eq = match line.trim().rfind('=') {
            Some(val) => val,
            None => continue
        };

        let val = line.slice_to(eq);
        let num = line.slice_from(eq + 1);

        let tok = match val {
            "SHR"               => token::BinOp(token::Shr),
            "DOLLAR"            => token::Dollar,
            "LT"                => token::Lt,
            "STAR"              => token::BinOp(token::Star),
            "FLOAT_SUFFIX"      => id(),
            "INT_SUFFIX"        => id(),
            "SHL"               => token::BinOp(token::Shl),
            "LBRACE"            => token::OpenDelim(token::Brace),
            "RARROW"            => token::RArrow,
            "LIT_STR"           => token::Literal(token::Str_(Name(0))),
            "DOTDOT"            => token::DotDot,
            "MOD_SEP"           => token::ModSep,
            "DOTDOTDOT"         => token::DotDotDot,
            "NOT"               => token::Not,
            "AND"               => token::BinOp(token::And),
            "LPAREN"            => token::OpenDelim(token::Paren),
            "ANDAND"            => token::AndAnd,
            "AT"                => token::At,
            "LBRACKET"          => token::OpenDelim(token::Bracket),
            "LIT_STR_RAW"       => token::Literal(token::StrRaw(Name(0), 0)),
            "RPAREN"            => token::CloseDelim(token::Paren),
            "SLASH"             => token::BinOp(token::Slash),
            "COMMA"             => token::Comma,
            "LIFETIME"          => token::Lifetime(ast::Ident { name: Name(0), ctxt: 0 }),
            "CARET"             => token::BinOp(token::Caret),
            "TILDE"             => token::Tilde,
            "IDENT"             => id(),
            "PLUS"              => token::BinOp(token::Plus),
            "LIT_CHAR"          => token::Literal(token::Char(Name(0))),
            "LIT_BYTE"          => token::Literal(token::Byte(Name(0))),
            "EQ"                => token::Eq,
            "RBRACKET"          => token::CloseDelim(token::Bracket),
            "COMMENT"           => token::Comment,
            "DOC_COMMENT"       => token::DocComment(Name(0)),
            "DOT"               => token::Dot,
            "EQEQ"              => token::EqEq,
            "NE"                => token::Ne,
            "GE"                => token::Ge,
            "PERCENT"           => token::BinOp(token::Percent),
            "RBRACE"            => token::CloseDelim(token::Brace),
            "BINOP"             => token::BinOp(token::Plus),
            "POUND"             => token::Pound,
            "OROR"              => token::OrOr,
            "LIT_INTEGER"       => token::Literal(token::Integer(Name(0))),
            "BINOPEQ"           => token::BinOpEq(token::Plus),
            "LIT_FLOAT"         => token::Literal(token::Float(Name(0))),
            "WHITESPACE"        => token::Whitespace,
            "UNDERSCORE"        => token::Underscore,
            "MINUS"             => token::BinOp(token::Minus),
            "SEMI"              => token::Semi,
            "COLON"             => token::Colon,
            "FAT_ARROW"         => token::FatArrow,
            "OR"                => token::BinOp(token::Or),
            "GT"                => token::Gt,
            "LE"                => token::Le,
            "LIT_BINARY"        => token::Literal(token::Binary(Name(0))),
            "LIT_BINARY_RAW"    => token::Literal(token::BinaryRaw(Name(0), 0)),
            _                   => continue,
        };

        res.insert(num.to_string(), tok);
    }

    debug!("Token map: {}", res);
    res
}

fn str_to_binop(s: &str) -> token::BinOpToken {
    match s {
        "+"     => token::Plus,
        "/"     => token::Slash,
        "-"     => token::Minus,
        "*"     => token::Star,
        "%"     => token::Percent,
        "^"     => token::Caret,
        "&"     => token::And,
        "|"     => token::Or,
        "<<"    => token::Shl,
        ">>"    => token::Shr,
        _       => panic!("Bad binop str `{}`", s),
    }
}

/// Assuming a string/binary literal, strip out the leading/trailing
/// hashes and surrounding quotes/raw/binary prefix.
fn fix(mut lit: &str) -> ast::Name {
    if lit.char_at(0) == 'r' {
        if lit.char_at(1) == 'b' {
            lit = lit.slice_from(2)
        } else {
            lit = lit.slice_from(1);
        }
    } else if lit.char_at(0) == 'b' {
        lit = lit.slice_from(1);
    }

    let leading_hashes = count(lit);

    // +1/-1 to adjust for single quotes
    parse::token::intern(lit.slice(leading_hashes + 1, lit.len() - leading_hashes - 1))
}

/// Assuming a char/byte literal, strip the 'b' prefix and the single quotes.
fn fixchar(mut lit: &str) -> ast::Name {
    if lit.char_at(0) == 'b' {
        lit = lit.slice_from(1);
    }

    parse::token::intern(lit.slice(1, lit.len() - 1))
}

fn count(lit: &str) -> uint {
    lit.chars().take_while(|c| *c == '#').count()
}

fn parse_antlr_token(s: &str, tokens: &HashMap<String, token::Token>) -> TokenAndSpan {
    let re = regex!(
      r"\[@(?P<seq>\d+),(?P<start>\d+):(?P<end>\d+)='(?P<content>.+?)',<(?P<toknum>-?\d+)>,\d+:\d+]"
    );

    let m = re.captures(s).expect(format!("The regex didn't match {}", s).as_slice());
    let start = m.name("start");
    let end = m.name("end");
    let toknum = m.name("toknum");
    let content = m.name("content");

    let proto_tok = tokens.get(toknum).expect(format!("didn't find token {} in the map",
                                                              toknum).as_slice());

    let nm = parse::token::intern(content);

    debug!("What we got: content (`{}`), proto: {}", content, proto_tok);

    let real_tok = match *proto_tok {
        token::BinOp(..)           => token::BinOp(str_to_binop(content)),
        token::BinOpEq(..)         => token::BinOpEq(str_to_binop(content.slice_to(
                                                                    content.len() - 1))),
        token::Literal(token::Str_(..))      => token::Literal(token::Str_(fix(content))),
        token::Literal(token::StrRaw(..))    => token::Literal(token::StrRaw(fix(content),
                                                                             count(content))),
        token::Literal(token::Char(..))      => token::Literal(token::Char(fixchar(content))),
        token::Literal(token::Byte(..))      => token::Literal(token::Byte(fixchar(content))),
        token::DocComment(..)      => token::DocComment(nm),
        token::Literal(token::Integer(..))   => token::Literal(token::Integer(nm)),
        token::Literal(token::Float(..))     => token::Literal(token::Float(nm)),
        token::Literal(token::Binary(..))    => token::Literal(token::Binary(nm)),
        token::Literal(token::BinaryRaw(..)) => token::Literal(token::BinaryRaw(fix(content),
                                                                                count(content))),
        token::Ident(..)           => token::Ident(ast::Ident { name: nm, ctxt: 0 },
                                                   token::ModName),
        token::Lifetime(..)        => token::Lifetime(ast::Ident { name: nm, ctxt: 0 }),
        ref t => t.clone()
    };

    let offset = if real_tok == token::Eof
 {
        1
    } else {
        0
    };

    let sp = syntax::codemap::Span {
        lo: syntax::codemap::BytePos(from_str::<u32>(start).unwrap() - offset),
        hi: syntax::codemap::BytePos(from_str::<u32>(end).unwrap() + 1),
        expn_id: syntax::codemap::NO_EXPANSION
    };

    TokenAndSpan {
        tok: real_tok,
        sp: sp
    }
}

fn tok_cmp(a: &token::Token, b: &token::Token) -> bool {
    match a {
        &token::Ident(id, _) => match b {
                &token::Ident(id2, _) => id == id2,
                _ => false
        },
        _ => a == b
    }
}

fn main() {
    fn next(r: &mut lexer::StringReader) -> TokenAndSpan {
        use syntax::parse::lexer::Reader;
        r.next_token()
    }

    let args = std::os::args();

    let mut token_file = File::open(&Path::new(args[2].as_slice()));
    let token_map = parse_token_list(token_file.read_to_string().unwrap().as_slice());

    let mut stdin = std::io::stdin();
    let mut antlr_tokens = stdin.lines().map(|l| parse_antlr_token(l.unwrap().as_slice().trim(),
                                                                   &token_map));

    let code = File::open(&Path::new(args[1].as_slice())).unwrap().read_to_string().unwrap();
    let options = config::basic_options();
    let session = session::build_session(options, None,
                                         syntax::diagnostics::registry::Registry::new(&[]));
    let filemap = parse::string_to_filemap(&session.parse_sess,
                                           code,
                                           String::from_str("<n/a>"));
    let mut lexer = lexer::StringReader::new(session.diagnostic(), filemap);

    for antlr_tok in antlr_tokens {
        let rustc_tok = next(&mut lexer);
        if rustc_tok.tok == token::Eof && antlr_tok.tok == token::Eof {
            continue
        }

        assert!(rustc_tok.sp == antlr_tok.sp, "{} and {} have different spans", rustc_tok,
                antlr_tok);

        macro_rules! matches (
            ( $($x:pat),+ ) => (
                match rustc_tok.tok {
                    $($x => match antlr_tok.tok {
                        $x => {
                            if !tok_cmp(&rustc_tok.tok, &antlr_tok.tok) {
                                // FIXME #15677: needs more robust escaping in
                                // antlr
                                warn!("Different names for {} and {}", rustc_tok, antlr_tok);
                            }
                        }
                        _ => panic!("{} is not {}", antlr_tok, rustc_tok)
                    },)*
                    ref c => assert!(c == &antlr_tok.tok, "{} is not {}", rustc_tok, antlr_tok)
                }
            )
        )

        matches!(
            token::Literal(token::Byte(..)),
            token::Literal(token::Char(..)),
            token::Literal(token::Integer(..)),
            token::Literal(token::Float(..)),
            token::Literal(token::Str_(..)),
            token::Literal(token::StrRaw(..)),
            token::Literal(token::Binary(..)),
            token::Literal(token::BinaryRaw(..)),
            token::Ident(..),
            token::Lifetime(..),
            token::Interpolated(..),
            token::DocComment(..),
            token::Shebang(..)
        );
    }
}
Shuffle around check-lexer conditions 2014-07-21 15:04:35 -05:00			`// Copyright 2014 The Rust Project Developers. See the COPYRIGHT`
			`// file at the top-level directory of this distribution and at`
			`// http://rust-lang.org/COPYRIGHT.`
			`//`
			`// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or`
			`// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license`
			`// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your`
			`// option. This file may not be copied, modified, or distributed`
			`// except according to those terms.`

Lexer; subtly wrong; no makefile 2014-07-14 03:52:18 -05:00			`#![feature(globs, phase, macro_rules)]`

			`extern crate syntax;`
			`extern crate rustc;`

			`#[phase(link)]`
			`extern crate regex;`

			`#[phase(link, plugin)]`
			`extern crate log;`

			`#[phase(plugin)] extern crate regex_macros;`

			`use std::collections::HashMap;`
			`use std::io::File;`

			`use syntax::parse;`
			`use syntax::parse::lexer;`
Update src/grammar for language changes. 2014-11-18 19:52:44 -06:00			`use rustc::session::{mod, config};`
Lexer; subtly wrong; no makefile 2014-07-14 03:52:18 -05:00
			`use syntax::ast;`
			`use syntax::ast::Name;`
Use PascalCase for token variants 2014-10-27 03:22:52 -05:00			`use syntax::parse::token;`
Lexer; subtly wrong; no makefile 2014-07-14 03:52:18 -05:00			`use syntax::parse::lexer::TokenAndSpan;`

Update src/grammar for language changes. 2014-11-18 19:52:44 -06:00			`fn parse_token_list(file: &str) -> HashMap<String, token::Token> {`
			`fn id() -> token::Token {`
Use an enum rather than a bool in token::Ident 2014-10-27 10:01:44 -05:00			`token::Ident(ast::Ident { name: Name(0), ctxt: 0, }, token::Plain)`
Lexer; subtly wrong; no makefile 2014-07-14 03:52:18 -05:00			`}`

			`let mut res = HashMap::new();`

Update src/grammar for language changes. 2014-11-18 19:52:44 -06:00			`res.insert("-1".to_string(), token::Eof);`
Lexer; subtly wrong; no makefile 2014-07-14 03:52:18 -05:00
			`for line in file.split('\n') {`
			`let eq = match line.trim().rfind('=') {`
			`Some(val) => val,`
			`None => continue`
			`};`

			`let val = line.slice_to(eq);`
			`let num = line.slice_from(eq + 1);`

			`let tok = match val {`
Use PascalCase for token variants 2014-10-27 03:22:52 -05:00			`"SHR" => token::BinOp(token::Shr),`
			`"DOLLAR" => token::Dollar,`
			`"LT" => token::Lt,`
			`"STAR" => token::BinOp(token::Star),`
			`"FLOAT_SUFFIX" => id(),`
			`"INT_SUFFIX" => id(),`
			`"SHL" => token::BinOp(token::Shl),`
Use common variants for open and close delimiters This common representation for delimeters should make pattern matching easier. Having a separate `token::DelimToken` enum also allows us to enforce the invariant that the opening and closing delimiters must be the same in `ast::TtDelimited`, removing the need to ensure matched delimiters when working with token trees. 2014-10-29 05:37:54 -05:00			`"LBRACE" => token::OpenDelim(token::Brace),`
Update src/grammar for language changes. 2014-11-18 19:52:44 -06:00			`"RARROW" => token::RArrow,`
Switch to an independent enum for `Lit*` subtokens. 2014-11-18 17:17:40 -06:00			`"LIT_STR" => token::Literal(token::Str_(Name(0))),`
Use PascalCase for token variants 2014-10-27 03:22:52 -05:00			`"DOTDOT" => token::DotDot,`
			`"MOD_SEP" => token::ModSep,`
			`"DOTDOTDOT" => token::DotDotDot,`
			`"NOT" => token::Not,`
			`"AND" => token::BinOp(token::And),`
Use common variants for open and close delimiters This common representation for delimeters should make pattern matching easier. Having a separate `token::DelimToken` enum also allows us to enforce the invariant that the opening and closing delimiters must be the same in `ast::TtDelimited`, removing the need to ensure matched delimiters when working with token trees. 2014-10-29 05:37:54 -05:00			`"LPAREN" => token::OpenDelim(token::Paren),`
Use PascalCase for token variants 2014-10-27 03:22:52 -05:00			`"ANDAND" => token::AndAnd,`
			`"AT" => token::At,`
Use common variants for open and close delimiters This common representation for delimeters should make pattern matching easier. Having a separate `token::DelimToken` enum also allows us to enforce the invariant that the opening and closing delimiters must be the same in `ast::TtDelimited`, removing the need to ensure matched delimiters when working with token trees. 2014-10-29 05:37:54 -05:00			`"LBRACKET" => token::OpenDelim(token::Bracket),`
Switch to an independent enum for `Lit*` subtokens. 2014-11-18 17:17:40 -06:00			`"LIT_STR_RAW" => token::Literal(token::StrRaw(Name(0), 0)),`
Use common variants for open and close delimiters This common representation for delimeters should make pattern matching easier. Having a separate `token::DelimToken` enum also allows us to enforce the invariant that the opening and closing delimiters must be the same in `ast::TtDelimited`, removing the need to ensure matched delimiters when working with token trees. 2014-10-29 05:37:54 -05:00			`"RPAREN" => token::CloseDelim(token::Paren),`
Use PascalCase for token variants 2014-10-27 03:22:52 -05:00			`"SLASH" => token::BinOp(token::Slash),`
			`"COMMA" => token::Comma,`
			`"LIFETIME" => token::Lifetime(ast::Ident { name: Name(0), ctxt: 0 }),`
			`"CARET" => token::BinOp(token::Caret),`
			`"TILDE" => token::Tilde,`
Update src/grammar for language changes. 2014-11-18 19:52:44 -06:00			`"IDENT" => id(),`
Use PascalCase for token variants 2014-10-27 03:22:52 -05:00			`"PLUS" => token::BinOp(token::Plus),`
Switch to an independent enum for `Lit*` subtokens. 2014-11-18 17:17:40 -06:00			`"LIT_CHAR" => token::Literal(token::Char(Name(0))),`
			`"LIT_BYTE" => token::Literal(token::Byte(Name(0))),`
Use PascalCase for token variants 2014-10-27 03:22:52 -05:00			`"EQ" => token::Eq,`
Use common variants for open and close delimiters This common representation for delimeters should make pattern matching easier. Having a separate `token::DelimToken` enum also allows us to enforce the invariant that the opening and closing delimiters must be the same in `ast::TtDelimited`, removing the need to ensure matched delimiters when working with token trees. 2014-10-29 05:37:54 -05:00			`"RBRACKET" => token::CloseDelim(token::Bracket),`
Use PascalCase for token variants 2014-10-27 03:22:52 -05:00			`"COMMENT" => token::Comment,`
			`"DOC_COMMENT" => token::DocComment(Name(0)),`
			`"DOT" => token::Dot,`
			`"EQEQ" => token::EqEq,`
			`"NE" => token::Ne,`
			`"GE" => token::Ge,`
			`"PERCENT" => token::BinOp(token::Percent),`
Use common variants for open and close delimiters This common representation for delimeters should make pattern matching easier. Having a separate `token::DelimToken` enum also allows us to enforce the invariant that the opening and closing delimiters must be the same in `ast::TtDelimited`, removing the need to ensure matched delimiters when working with token trees. 2014-10-29 05:37:54 -05:00			`"RBRACE" => token::CloseDelim(token::Brace),`
Use PascalCase for token variants 2014-10-27 03:22:52 -05:00			`"BINOP" => token::BinOp(token::Plus),`
			`"POUND" => token::Pound,`
			`"OROR" => token::OrOr,`
Switch to an independent enum for `Lit*` subtokens. 2014-11-18 17:17:40 -06:00			`"LIT_INTEGER" => token::Literal(token::Integer(Name(0))),`
Use PascalCase for token variants 2014-10-27 03:22:52 -05:00			`"BINOPEQ" => token::BinOpEq(token::Plus),`
Switch to an independent enum for `Lit*` subtokens. 2014-11-18 17:17:40 -06:00			`"LIT_FLOAT" => token::Literal(token::Float(Name(0))),`
Use PascalCase for token variants 2014-10-27 03:22:52 -05:00			`"WHITESPACE" => token::Whitespace,`
			`"UNDERSCORE" => token::Underscore,`
			`"MINUS" => token::BinOp(token::Minus),`
			`"SEMI" => token::Semi,`
			`"COLON" => token::Colon,`
			`"FAT_ARROW" => token::FatArrow,`
			`"OR" => token::BinOp(token::Or),`
			`"GT" => token::Gt,`
			`"LE" => token::Le,`
Switch to an independent enum for `Lit*` subtokens. 2014-11-18 17:17:40 -06:00			`"LIT_BINARY" => token::Literal(token::Binary(Name(0))),`
			`"LIT_BINARY_RAW" => token::Literal(token::BinaryRaw(Name(0), 0)),`
Use PascalCase for token variants 2014-10-27 03:22:52 -05:00			`_ => continue,`
Lexer; subtly wrong; no makefile 2014-07-14 03:52:18 -05:00			`};`

			`res.insert(num.to_string(), tok);`
			`}`

			`debug!("Token map: {}", res);`
			`res`
			`}`

Update src/grammar for language changes. 2014-11-18 19:52:44 -06:00			`fn str_to_binop(s: &str) -> token::BinOpToken {`
Lexer; subtly wrong; no makefile 2014-07-14 03:52:18 -05:00			`match s {`
Use PascalCase for token variants 2014-10-27 03:22:52 -05:00			`"+" => token::Plus,`
			`"/" => token::Slash,`
			`"-" => token::Minus,`
			`"*" => token::Star,`
			`"%" => token::Percent,`
			`"^" => token::Caret,`
			`"&" => token::And,`
			`"\|" => token::Or,`
			`"<<" => token::Shl,`
			`">>" => token::Shr,`
Rename fail! to panic! https://github.com/rust-lang/rfcs/pull/221 The current terminology of "task failure" often causes problems when writing or speaking about code. You often want to talk about the possibility of an operation that returns a Result "failing", but cannot because of the ambiguity with task failure. Instead, you have to speak of "the failing case" or "when the operation does not succeed" or other circumlocutions. Likewise, we use a "Failure" header in rustdoc to describe when operations may fail the task, but it would often be helpful to separate out a section describing the "Err-producing" case. We have been steadily moving away from task failure and toward Result as an error-handling mechanism, so we should optimize our terminology accordingly: Result-producing functions should be easy to describe. To update your code, rename any call to `fail!` to `panic!` instead. Assuming you have not created your own macro named `panic!`, this will work on UNIX based systems: grep -lZR 'fail!' . \| xargs -0 -l sed -i -e 's/fail!/panic!/g' You can of course also do this by hand. [breaking-change] 2014-10-09 14:17:22 -05:00			_ => panic!("Bad binop str `{}`", s),
Refine the tooling, handle comments 2014-07-14 19:27:28 -05:00			`}`
			`}`

Byte/raw binary literal fixes 2014-07-14 22:45:39 -05:00			`/// Assuming a string/binary literal, strip out the leading/trailing`
Refine the tooling, handle comments 2014-07-14 19:27:28 -05:00			`/// hashes and surrounding quotes/raw/binary prefix.`
			`fn fix(mut lit: &str) -> ast::Name {`
			`if lit.char_at(0) == 'r' {`
			`if lit.char_at(1) == 'b' {`
			`lit = lit.slice_from(2)`
			`} else {`
			`lit = lit.slice_from(1);`
			`}`
			`} else if lit.char_at(0) == 'b' {`
			`lit = lit.slice_from(1);`
Lexer; subtly wrong; no makefile 2014-07-14 03:52:18 -05:00			`}`
Refine the tooling, handle comments 2014-07-14 19:27:28 -05:00
			`let leading_hashes = count(lit);`

			`// +1/-1 to adjust for single quotes`
			`parse::token::intern(lit.slice(leading_hashes + 1, lit.len() - leading_hashes - 1))`
			`}`

Byte/raw binary literal fixes 2014-07-14 22:45:39 -05:00			`/// Assuming a char/byte literal, strip the 'b' prefix and the single quotes.`
			`fn fixchar(mut lit: &str) -> ast::Name {`
			`if lit.char_at(0) == 'b' {`
			`lit = lit.slice_from(1);`
			`}`

			`parse::token::intern(lit.slice(1, lit.len() - 1))`
			`}`

Refine the tooling, handle comments 2014-07-14 19:27:28 -05:00			`fn count(lit: &str) -> uint {`
			`lit.chars().take_while(\|c\| *c == '#').count()`
Lexer; subtly wrong; no makefile 2014-07-14 03:52:18 -05:00			`}`

Update src/grammar for language changes. 2014-11-18 19:52:44 -06:00			`fn parse_antlr_token(s: &str, tokens: &HashMap<String, token::Token>) -> TokenAndSpan {`
Shuffle around check-lexer conditions 2014-07-21 15:04:35 -05:00			`let re = regex!(`
			`r"\[@(?P<seq>\d+),(?P<start>\d+):(?P<end>\d+)='(?P<content>.+?)',<(?P<toknum>-?\d+)>,\d+:\d+]"`
			`);`
Lexer; subtly wrong; no makefile 2014-07-14 03:52:18 -05:00
			`let m = re.captures(s).expect(format!("The regex didn't match {}", s).as_slice());`
			`let start = m.name("start");`
			`let end = m.name("end");`
			`let toknum = m.name("toknum");`
			`let content = m.name("content");`

Update src/grammar for language changes. 2014-11-18 19:52:44 -06:00			`let proto_tok = tokens.get(toknum).expect(format!("didn't find token {} in the map",`
Shuffle around check-lexer conditions 2014-07-21 15:04:35 -05:00			`toknum).as_slice());`
Refine the tooling, handle comments 2014-07-14 19:27:28 -05:00
			`let nm = parse::token::intern(content);`

			debug!("What we got: content (`{}`), proto: {}", content, proto_tok);

Lexer; subtly wrong; no makefile 2014-07-14 03:52:18 -05:00			`let real_tok = match *proto_tok {`
Use PascalCase for token variants 2014-10-27 03:22:52 -05:00			`token::BinOp(..) => token::BinOp(str_to_binop(content)),`
			`token::BinOpEq(..) => token::BinOpEq(str_to_binop(content.slice_to(`
			`content.len() - 1))),`
Switch to an independent enum for `Lit*` subtokens. 2014-11-18 17:17:40 -06:00			`token::Literal(token::Str_(..)) => token::Literal(token::Str_(fix(content))),`
			`token::Literal(token::StrRaw(..)) => token::Literal(token::StrRaw(fix(content),`
			`count(content))),`
			`token::Literal(token::Char(..)) => token::Literal(token::Char(fixchar(content))),`
			`token::Literal(token::Byte(..)) => token::Literal(token::Byte(fixchar(content))),`
Use PascalCase for token variants 2014-10-27 03:22:52 -05:00			`token::DocComment(..) => token::DocComment(nm),`
Switch to an independent enum for `Lit*` subtokens. 2014-11-18 17:17:40 -06:00			`token::Literal(token::Integer(..)) => token::Literal(token::Integer(nm)),`
			`token::Literal(token::Float(..)) => token::Literal(token::Float(nm)),`
			`token::Literal(token::Binary(..)) => token::Literal(token::Binary(nm)),`
			`token::Literal(token::BinaryRaw(..)) => token::Literal(token::BinaryRaw(fix(content),`
			`count(content))),`
Use an enum rather than a bool in token::Ident 2014-10-27 10:01:44 -05:00			`token::Ident(..) => token::Ident(ast::Ident { name: nm, ctxt: 0 },`
			`token::ModName),`
Use PascalCase for token variants 2014-10-27 03:22:52 -05:00			`token::Lifetime(..) => token::Lifetime(ast::Ident { name: nm, ctxt: 0 }),`
Lexer; subtly wrong; no makefile 2014-07-14 03:52:18 -05:00			`ref t => t.clone()`
			`};`

Update src/grammar for language changes. 2014-11-18 19:52:44 -06:00			`let offset = if real_tok == token::Eof`
			`{`
Lexer; subtly wrong; no makefile 2014-07-14 03:52:18 -05:00			`1`
			`} else {`
			`0`
			`};`

			`let sp = syntax::codemap::Span {`
			`lo: syntax::codemap::BytePos(from_str::<u32>(start).unwrap() - offset),`
			`hi: syntax::codemap::BytePos(from_str::<u32>(end).unwrap() + 1),`
Fix fallout in tests from removing the use of Gc in ExpnInfo. 2014-09-18 06:36:01 -05:00			`expn_id: syntax::codemap::NO_EXPANSION`
Lexer; subtly wrong; no makefile 2014-07-14 03:52:18 -05:00			`};`

			`TokenAndSpan {`
			`tok: real_tok,`
			`sp: sp`
			`}`
			`}`

Update src/grammar for language changes. 2014-11-18 19:52:44 -06:00			`fn tok_cmp(a: &token::Token, b: &token::Token) -> bool {`
Refine the tooling, handle comments 2014-07-14 19:27:28 -05:00			`match a {`
Use PascalCase for token variants 2014-10-27 03:22:52 -05:00			`&token::Ident(id, _) => match b {`
			`&token::Ident(id2, _) => id == id2,`
Refine the tooling, handle comments 2014-07-14 19:27:28 -05:00			`_ => false`
			`},`
			`_ => a == b`
			`}`
			`}`

Lexer; subtly wrong; no makefile 2014-07-14 03:52:18 -05:00			`fn main() {`
			`fn next(r: &mut lexer::StringReader) -> TokenAndSpan {`
			`use syntax::parse::lexer::Reader;`
			`r.next_token()`
			`}`

lexer tests: makefile/configure 2014-07-15 02:18:17 -05:00			`let args = std::os::args();`

Update src/grammar for language changes. 2014-11-18 19:52:44 -06:00			`let mut token_file = File::open(&Path::new(args[2].as_slice()));`
Break apart long lines in verify.rs 2014-07-21 14:59:25 -05:00			`let token_map = parse_token_list(token_file.read_to_string().unwrap().as_slice());`

Lexer; subtly wrong; no makefile 2014-07-14 03:52:18 -05:00			`let mut stdin = std::io::stdin();`
Shuffle around check-lexer conditions 2014-07-21 15:04:35 -05:00			`let mut antlr_tokens = stdin.lines().map(\|l\| parse_antlr_token(l.unwrap().as_slice().trim(),`
			`&token_map));`
Lexer; subtly wrong; no makefile 2014-07-14 03:52:18 -05:00
Update src/grammar for language changes. 2014-11-18 19:52:44 -06:00			`let code = File::open(&Path::new(args[1].as_slice())).unwrap().read_to_string().unwrap();`
Lexer; subtly wrong; no makefile 2014-07-14 03:52:18 -05:00			`let options = config::basic_options();`
Refine the tooling, handle comments 2014-07-14 19:27:28 -05:00			`let session = session::build_session(options, None,`
Update src/grammar for language changes. 2014-11-18 19:52:44 -06:00			`syntax::diagnostics::registry::Registry::new(&[]));`
Lexer; subtly wrong; no makefile 2014-07-14 03:52:18 -05:00			`let filemap = parse::string_to_filemap(&session.parse_sess,`
			`code,`
			`String::from_str("<n/a>"));`
			`let mut lexer = lexer::StringReader::new(session.diagnostic(), filemap);`

			`for antlr_tok in antlr_tokens {`
			`let rustc_tok = next(&mut lexer);`
Update src/grammar for language changes. 2014-11-18 19:52:44 -06:00			`if rustc_tok.tok == token::Eof && antlr_tok.tok == token::Eof {`
Lexer; subtly wrong; no makefile 2014-07-14 03:52:18 -05:00			`continue`
			`}`

Shuffle around check-lexer conditions 2014-07-21 15:04:35 -05:00			`assert!(rustc_tok.sp == antlr_tok.sp, "{} and {} have different spans", rustc_tok,`
			`antlr_tok);`
Lexer; subtly wrong; no makefile 2014-07-14 03:52:18 -05:00
			`macro_rules! matches (`
			`( $($x:pat),+ ) => (`
			`match rustc_tok.tok {`
			`$($x => match antlr_tok.tok {`
Refine the tooling, handle comments 2014-07-14 19:27:28 -05:00			`$x => {`
			`if !tok_cmp(&rustc_tok.tok, &antlr_tok.tok) {`
			`// FIXME #15677: needs more robust escaping in`
			`// antlr`
			`warn!("Different names for {} and {}", rustc_tok, antlr_tok);`
			`}`
			`}`
Rename fail! to panic! https://github.com/rust-lang/rfcs/pull/221 The current terminology of "task failure" often causes problems when writing or speaking about code. You often want to talk about the possibility of an operation that returns a Result "failing", but cannot because of the ambiguity with task failure. Instead, you have to speak of "the failing case" or "when the operation does not succeed" or other circumlocutions. Likewise, we use a "Failure" header in rustdoc to describe when operations may fail the task, but it would often be helpful to separate out a section describing the "Err-producing" case. We have been steadily moving away from task failure and toward Result as an error-handling mechanism, so we should optimize our terminology accordingly: Result-producing functions should be easy to describe. To update your code, rename any call to `fail!` to `panic!` instead. Assuming you have not created your own macro named `panic!`, this will work on UNIX based systems: grep -lZR 'fail!' . \| xargs -0 -l sed -i -e 's/fail!/panic!/g' You can of course also do this by hand. [breaking-change] 2014-10-09 14:17:22 -05:00			`_ => panic!("{} is not {}", antlr_tok, rustc_tok)`
Lexer; subtly wrong; no makefile 2014-07-14 03:52:18 -05:00			`},)*`
Refine the tooling, handle comments 2014-07-14 19:27:28 -05:00			`ref c => assert!(c == &antlr_tok.tok, "{} is not {}", rustc_tok, antlr_tok)`
Lexer; subtly wrong; no makefile 2014-07-14 03:52:18 -05:00			`}`
			`)`
			`)`

Use PascalCase for token variants 2014-10-27 03:22:52 -05:00			`matches!(`
Switch to an independent enum for `Lit*` subtokens. 2014-11-18 17:17:40 -06:00			`token::Literal(token::Byte(..)),`
			`token::Literal(token::Char(..)),`
			`token::Literal(token::Integer(..)),`
			`token::Literal(token::Float(..)),`
			`token::Literal(token::Str_(..)),`
			`token::Literal(token::StrRaw(..)),`
			`token::Literal(token::Binary(..)),`
			`token::Literal(token::BinaryRaw(..)),`
Update src/grammar for language changes. 2014-11-18 19:52:44 -06:00			`token::Ident(..),`
			`token::Lifetime(..),`
			`token::Interpolated(..),`
			`token::DocComment(..),`
			`token::Shebang(..)`
Lexer; subtly wrong; no makefile 2014-07-14 03:52:18 -05:00			`);`
			`}`
			`}`