Move lexer unit tests to rustc_lexer

StringReader is an intornal abstraction which at the moment changes a
lot, so these unit tests cause quite a bit of friction.

Moving them to rustc_lexer and more ingerated-testing style should
make them much less annoying, hopefully without decreasing their
usefulness much.

Note that coloncolon tests are removed (it's unclear what those are
testing).

\r\n tests are removed as well, as we normalize line endings even
before lexing.
This commit is contained in:
Aleksey Kladov 2020-08-29 12:10:16 +02:00
parent 85fbf49ce0
commit ccffea5b6b
4 changed files with 154 additions and 278 deletions

View File

@ -342,7 +342,7 @@ dependencies = [
name = "cargo-miri"
version = "0.1.0"
dependencies = [
"cargo_metadata 0.11.1",
"cargo_metadata 0.9.1",
"directories",
"rustc-workspace-hack",
"rustc_version",
@ -391,6 +391,18 @@ dependencies = [
"serde_json",
]
[[package]]
name = "cargo_metadata"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46e3374c604fb39d1a2f35ed5e4a4e30e60d01fab49446e08f1b3e9a90aef202"
dependencies = [
"semver 0.9.0",
"serde",
"serde_derive",
"serde_json",
]
[[package]]
name = "cargo_metadata"
version = "0.11.1"
@ -1979,6 +1991,7 @@ dependencies = [
name = "miri"
version = "0.1.0"
dependencies = [
"byteorder",
"colored",
"compiletest_rs",
"env_logger 0.7.1",

View File

@ -39,11 +39,6 @@
mod parse {
#[cfg(test)]
mod tests;
#[cfg(test)]
mod lexer {
#[cfg(test)]
mod tests;
}
}
#[cfg(test)]
mod tokenstream {

View File

@ -1,252 +0,0 @@
use rustc_ast::ast::AttrStyle;
use rustc_ast::token::{self, CommentKind, Token, TokenKind};
use rustc_data_structures::sync::Lrc;
use rustc_errors::{emitter::EmitterWriter, Handler};
use rustc_parse::lexer::StringReader;
use rustc_session::parse::ParseSess;
use rustc_span::source_map::{FilePathMapping, SourceMap};
use rustc_span::symbol::Symbol;
use rustc_span::with_default_session_globals;
use rustc_span::{BytePos, Span};
use std::io;
use std::path::PathBuf;
fn mk_sess(sm: Lrc<SourceMap>) -> ParseSess {
let emitter = EmitterWriter::new(
Box::new(io::sink()),
Some(sm.clone()),
false,
false,
false,
None,
false,
);
ParseSess::with_span_handler(Handler::with_emitter(true, None, Box::new(emitter)), sm)
}
// Creates a string reader for the given string.
fn setup<'a>(sm: &SourceMap, sess: &'a ParseSess, teststr: String) -> StringReader<'a> {
let sf = sm.new_source_file(PathBuf::from(teststr.clone()).into(), teststr);
StringReader::new(sess, sf, None)
}
#[test]
fn t1() {
with_default_session_globals(|| {
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
let sh = mk_sess(sm.clone());
let mut string_reader = setup(
&sm,
&sh,
"/* my source file */ fn main() { println!(\"zebra\"); }\n".to_string(),
);
assert_eq!(string_reader.next_token(), token::Comment);
assert_eq!(string_reader.next_token(), token::Whitespace);
let tok1 = string_reader.next_token();
let tok2 = Token::new(mk_ident("fn"), Span::with_root_ctxt(BytePos(21), BytePos(23)));
assert_eq!(tok1.kind, tok2.kind);
assert_eq!(tok1.span, tok2.span);
assert_eq!(string_reader.next_token(), token::Whitespace);
// Read another token.
let tok3 = string_reader.next_token();
assert_eq!(string_reader.pos(), BytePos(28));
let tok4 = Token::new(mk_ident("main"), Span::with_root_ctxt(BytePos(24), BytePos(28)));
assert_eq!(tok3.kind, tok4.kind);
assert_eq!(tok3.span, tok4.span);
assert_eq!(string_reader.next_token(), token::OpenDelim(token::Paren));
assert_eq!(string_reader.pos(), BytePos(29))
})
}
// Checks that the given reader produces the desired stream
// of tokens (stop checking after exhausting `expected`).
fn check_tokenization(mut string_reader: StringReader<'_>, expected: Vec<TokenKind>) {
for expected_tok in &expected {
assert_eq!(&string_reader.next_token(), expected_tok);
}
}
// Makes the identifier by looking up the string in the interner.
fn mk_ident(id: &str) -> TokenKind {
token::Ident(Symbol::intern(id), false)
}
fn mk_lit(kind: token::LitKind, symbol: &str, suffix: Option<&str>) -> TokenKind {
TokenKind::lit(kind, Symbol::intern(symbol), suffix.map(Symbol::intern))
}
#[test]
fn doublecolon_parsing() {
with_default_session_globals(|| {
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
let sh = mk_sess(sm.clone());
check_tokenization(
setup(&sm, &sh, "a b".to_string()),
vec![mk_ident("a"), token::Whitespace, mk_ident("b")],
);
})
}
#[test]
fn doublecolon_parsing_2() {
with_default_session_globals(|| {
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
let sh = mk_sess(sm.clone());
check_tokenization(
setup(&sm, &sh, "a::b".to_string()),
vec![mk_ident("a"), token::Colon, token::Colon, mk_ident("b")],
);
})
}
#[test]
fn doublecolon_parsing_3() {
with_default_session_globals(|| {
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
let sh = mk_sess(sm.clone());
check_tokenization(
setup(&sm, &sh, "a ::b".to_string()),
vec![mk_ident("a"), token::Whitespace, token::Colon, token::Colon, mk_ident("b")],
);
})
}
#[test]
fn doublecolon_parsing_4() {
with_default_session_globals(|| {
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
let sh = mk_sess(sm.clone());
check_tokenization(
setup(&sm, &sh, "a:: b".to_string()),
vec![mk_ident("a"), token::Colon, token::Colon, token::Whitespace, mk_ident("b")],
);
})
}
#[test]
fn character_a() {
with_default_session_globals(|| {
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
let sh = mk_sess(sm.clone());
assert_eq!(setup(&sm, &sh, "'a'".to_string()).next_token(), mk_lit(token::Char, "a", None),);
})
}
#[test]
fn character_space() {
with_default_session_globals(|| {
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
let sh = mk_sess(sm.clone());
assert_eq!(setup(&sm, &sh, "' '".to_string()).next_token(), mk_lit(token::Char, " ", None),);
})
}
#[test]
fn character_escaped() {
with_default_session_globals(|| {
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
let sh = mk_sess(sm.clone());
assert_eq!(
setup(&sm, &sh, "'\\n'".to_string()).next_token(),
mk_lit(token::Char, "\\n", None),
);
})
}
#[test]
fn lifetime_name() {
with_default_session_globals(|| {
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
let sh = mk_sess(sm.clone());
assert_eq!(
setup(&sm, &sh, "'abc".to_string()).next_token(),
token::Lifetime(Symbol::intern("'abc")),
);
})
}
#[test]
fn raw_string() {
with_default_session_globals(|| {
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
let sh = mk_sess(sm.clone());
assert_eq!(
setup(&sm, &sh, "r###\"\"#a\\b\x00c\"\"###".to_string()).next_token(),
mk_lit(token::StrRaw(3), "\"#a\\b\x00c\"", None),
);
})
}
#[test]
fn literal_suffixes() {
with_default_session_globals(|| {
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
let sh = mk_sess(sm.clone());
macro_rules! test {
($input: expr, $tok_type: ident, $tok_contents: expr) => {{
assert_eq!(
setup(&sm, &sh, format!("{}suffix", $input)).next_token(),
mk_lit(token::$tok_type, $tok_contents, Some("suffix")),
);
// with a whitespace separator
assert_eq!(
setup(&sm, &sh, format!("{} suffix", $input)).next_token(),
mk_lit(token::$tok_type, $tok_contents, None),
);
}};
}
test!("'a'", Char, "a");
test!("b'a'", Byte, "a");
test!("\"a\"", Str, "a");
test!("b\"a\"", ByteStr, "a");
test!("1234", Integer, "1234");
test!("0b101", Integer, "0b101");
test!("0xABC", Integer, "0xABC");
test!("1.0", Float, "1.0");
test!("1.0e10", Float, "1.0e10");
assert_eq!(
setup(&sm, &sh, "2us".to_string()).next_token(),
mk_lit(token::Integer, "2", Some("us")),
);
assert_eq!(
setup(&sm, &sh, "r###\"raw\"###suffix".to_string()).next_token(),
mk_lit(token::StrRaw(3), "raw", Some("suffix")),
);
assert_eq!(
setup(&sm, &sh, "br###\"raw\"###suffix".to_string()).next_token(),
mk_lit(token::ByteStrRaw(3), "raw", Some("suffix")),
);
})
}
#[test]
fn nested_block_comments() {
with_default_session_globals(|| {
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
let sh = mk_sess(sm.clone());
let mut lexer = setup(&sm, &sh, "/* /* */ */'a'".to_string());
assert_eq!(lexer.next_token(), token::Comment);
assert_eq!(lexer.next_token(), mk_lit(token::Char, "a", None));
})
}
#[test]
fn crlf_comments() {
with_default_session_globals(|| {
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
let sh = mk_sess(sm.clone());
let mut lexer = setup(&sm, &sh, "// test\r\n/// test\r\n".to_string());
let comment = lexer.next_token();
assert_eq!(comment.kind, token::Comment);
assert_eq!((comment.span.lo(), comment.span.hi()), (BytePos(0), BytePos(7)));
assert_eq!(lexer.next_token(), token::Whitespace);
assert_eq!(
lexer.next_token(),
token::DocComment(CommentKind::Line, AttrStyle::Outer, Symbol::intern(" test"))
);
})
}

View File

@ -128,6 +128,34 @@ fn check_lexing(src: &str, expect: Expect) {
expect.assert_eq(&actual)
}
#[test]
fn smoke_test() {
check_lexing(
"/* my source file */ fn main() { println!(\"zebra\"); }\n",
expect![[r#"
Token { kind: BlockComment { doc_style: None, terminated: true }, len: 20 }
Token { kind: Whitespace, len: 1 }
Token { kind: Ident, len: 2 }
Token { kind: Whitespace, len: 1 }
Token { kind: Ident, len: 4 }
Token { kind: OpenParen, len: 1 }
Token { kind: CloseParen, len: 1 }
Token { kind: Whitespace, len: 1 }
Token { kind: OpenBrace, len: 1 }
Token { kind: Whitespace, len: 1 }
Token { kind: Ident, len: 7 }
Token { kind: Bang, len: 1 }
Token { kind: OpenParen, len: 1 }
Token { kind: Literal { kind: Str { terminated: true }, suffix_start: 7 }, len: 7 }
Token { kind: CloseParen, len: 1 }
Token { kind: Semi, len: 1 }
Token { kind: Whitespace, len: 1 }
Token { kind: CloseBrace, len: 1 }
Token { kind: Whitespace, len: 1 }
"#]],
)
}
#[test]
fn comment_flavors() {
check_lexing(
@ -143,25 +171,117 @@ fn comment_flavors() {
/*! inner doc block */
",
expect![[r#"
Token { kind: Whitespace, len: 1 }
Token { kind: LineComment { doc_style: None }, len: 7 }
Token { kind: Whitespace, len: 1 }
Token { kind: LineComment { doc_style: None }, len: 17 }
Token { kind: Whitespace, len: 1 }
Token { kind: LineComment { doc_style: Some(Outer) }, len: 18 }
Token { kind: Whitespace, len: 1 }
Token { kind: LineComment { doc_style: Some(Inner) }, len: 18 }
Token { kind: Whitespace, len: 1 }
Token { kind: BlockComment { doc_style: None, terminated: true }, len: 11 }
Token { kind: Whitespace, len: 1 }
Token { kind: BlockComment { doc_style: None, terminated: true }, len: 4 }
Token { kind: Whitespace, len: 1 }
Token { kind: BlockComment { doc_style: None, terminated: true }, len: 18 }
Token { kind: Whitespace, len: 1 }
Token { kind: BlockComment { doc_style: Some(Outer), terminated: true }, len: 22 }
Token { kind: Whitespace, len: 1 }
Token { kind: BlockComment { doc_style: Some(Inner), terminated: true }, len: 22 }
Token { kind: Whitespace, len: 1 }
"#]],
Token { kind: Whitespace, len: 1 }
Token { kind: LineComment { doc_style: None }, len: 7 }
Token { kind: Whitespace, len: 1 }
Token { kind: LineComment { doc_style: None }, len: 17 }
Token { kind: Whitespace, len: 1 }
Token { kind: LineComment { doc_style: Some(Outer) }, len: 18 }
Token { kind: Whitespace, len: 1 }
Token { kind: LineComment { doc_style: Some(Inner) }, len: 18 }
Token { kind: Whitespace, len: 1 }
Token { kind: BlockComment { doc_style: None, terminated: true }, len: 11 }
Token { kind: Whitespace, len: 1 }
Token { kind: BlockComment { doc_style: None, terminated: true }, len: 4 }
Token { kind: Whitespace, len: 1 }
Token { kind: BlockComment { doc_style: None, terminated: true }, len: 18 }
Token { kind: Whitespace, len: 1 }
Token { kind: BlockComment { doc_style: Some(Outer), terminated: true }, len: 22 }
Token { kind: Whitespace, len: 1 }
Token { kind: BlockComment { doc_style: Some(Inner), terminated: true }, len: 22 }
Token { kind: Whitespace, len: 1 }
"#]],
)
}
#[test]
fn nested_block_comments() {
check_lexing(
"/* /* */ */'a'",
expect![[r#"
Token { kind: BlockComment { doc_style: None, terminated: true }, len: 11 }
Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 }
"#]],
)
}
#[test]
fn characters() {
check_lexing(
"'a' ' ' '\\n'",
expect![[r#"
Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 }
Token { kind: Whitespace, len: 1 }
Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 }
Token { kind: Whitespace, len: 1 }
Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 4 }, len: 4 }
"#]],
);
}
#[test]
fn lifetime() {
check_lexing(
"'abc",
expect![[r#"
Token { kind: Lifetime { starts_with_number: false }, len: 4 }
"#]],
);
}
#[test]
fn raw_string() {
check_lexing(
"r###\"\"#a\\b\x00c\"\"###",
expect![[r#"
Token { kind: Literal { kind: RawStr { n_hashes: 3, err: None }, suffix_start: 17 }, len: 17 }
"#]],
)
}
#[test]
fn literal_suffixes() {
check_lexing(
r####"
'a'
b'a'
"a"
b"a"
1234
0b101
0xABC
1.0
1.0e10
2us
r###"raw"###suffix
br###"raw"###suffix
"####,
expect![[r#"
Token { kind: Whitespace, len: 1 }
Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 }
Token { kind: Whitespace, len: 1 }
Token { kind: Literal { kind: Byte { terminated: true }, suffix_start: 4 }, len: 4 }
Token { kind: Whitespace, len: 1 }
Token { kind: Literal { kind: Str { terminated: true }, suffix_start: 3 }, len: 3 }
Token { kind: Whitespace, len: 1 }
Token { kind: Literal { kind: ByteStr { terminated: true }, suffix_start: 4 }, len: 4 }
Token { kind: Whitespace, len: 1 }
Token { kind: Literal { kind: Int { base: Decimal, empty_int: false }, suffix_start: 4 }, len: 4 }
Token { kind: Whitespace, len: 1 }
Token { kind: Literal { kind: Int { base: Binary, empty_int: false }, suffix_start: 5 }, len: 5 }
Token { kind: Whitespace, len: 1 }
Token { kind: Literal { kind: Int { base: Hexadecimal, empty_int: false }, suffix_start: 5 }, len: 5 }
Token { kind: Whitespace, len: 1 }
Token { kind: Literal { kind: Float { base: Decimal, empty_exponent: false }, suffix_start: 3 }, len: 3 }
Token { kind: Whitespace, len: 1 }
Token { kind: Literal { kind: Float { base: Decimal, empty_exponent: false }, suffix_start: 6 }, len: 6 }
Token { kind: Whitespace, len: 1 }
Token { kind: Literal { kind: Int { base: Decimal, empty_int: false }, suffix_start: 1 }, len: 3 }
Token { kind: Whitespace, len: 1 }
Token { kind: Literal { kind: RawStr { n_hashes: 3, err: None }, suffix_start: 12 }, len: 18 }
Token { kind: Whitespace, len: 1 }
Token { kind: Literal { kind: RawByteStr { n_hashes: 3, err: None }, suffix_start: 13 }, len: 19 }
Token { kind: Whitespace, len: 1 }
"#]],
)
}