ra_syntax: added tests for tokenization errors
This commit is contained in:
parent
9367b9a292
commit
a3e5663ae0
@ -1,19 +1,28 @@
|
||||
use std::{
|
||||
fmt::Write,
|
||||
path::{Component, PathBuf},
|
||||
path::{Component, Path, PathBuf},
|
||||
};
|
||||
|
||||
use test_utils::{collect_tests, dir_tests, project_dir, read_text};
|
||||
|
||||
use crate::{fuzz, SourceFile};
|
||||
use crate::{fuzz, tokenize, Location, SourceFile, SyntaxError, TextRange, Token};
|
||||
|
||||
#[test]
|
||||
fn lexer_tests() {
|
||||
dir_tests(&test_data_dir(), &["lexer"], |text, _| {
|
||||
// FIXME: add tests for errors (their format is up to discussion)
|
||||
let (tokens, _errors) = crate::tokenize(text);
|
||||
dump_tokens(&tokens, text)
|
||||
})
|
||||
// FIXME:
|
||||
// * Add tests for unicode escapes in byte-character and [raw]-byte-string literals
|
||||
// * Add tests for unescape errors
|
||||
|
||||
dir_tests(&test_data_dir(), &["lexer/ok"], |text, path| {
|
||||
let (tokens, errors) = tokenize(text);
|
||||
assert_errors_are_absent(&errors, path);
|
||||
dump_tokens_and_errors(&tokens, &errors, text)
|
||||
});
|
||||
dir_tests(&test_data_dir(), &["lexer/err"], |text, path| {
|
||||
let (tokens, errors) = tokenize(text);
|
||||
assert_errors_are_present(&errors, path);
|
||||
dump_tokens_and_errors(&tokens, &errors, text)
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -33,18 +42,13 @@ fn parser_tests() {
|
||||
dir_tests(&test_data_dir(), &["parser/inline/ok", "parser/ok"], |text, path| {
|
||||
let parse = SourceFile::parse(text);
|
||||
let errors = parse.errors();
|
||||
assert_eq!(
|
||||
errors,
|
||||
&[] as &[crate::SyntaxError],
|
||||
"There should be no errors in the file {:?}",
|
||||
path.display(),
|
||||
);
|
||||
assert_errors_are_absent(&errors, path);
|
||||
parse.debug_dump()
|
||||
});
|
||||
dir_tests(&test_data_dir(), &["parser/err", "parser/inline/err"], |text, path| {
|
||||
let parse = SourceFile::parse(text);
|
||||
let errors = parse.errors();
|
||||
assert!(!errors.is_empty(), "There should be errors in the file {:?}", path.display());
|
||||
assert_errors_are_present(&errors, path);
|
||||
parse.debug_dump()
|
||||
});
|
||||
}
|
||||
@ -76,7 +80,7 @@ fn self_hosting_parsing() {
|
||||
.into_iter()
|
||||
.filter_entry(|entry| {
|
||||
!entry.path().components().any(|component| {
|
||||
// Get all files which are not in the crates/ra_syntax/tests/data folder
|
||||
// Get all files which are not in the crates/ra_syntax/test_data folder
|
||||
component == Component::Normal(OsStr::new("test_data"))
|
||||
})
|
||||
})
|
||||
@ -102,15 +106,47 @@ fn test_data_dir() -> PathBuf {
|
||||
project_dir().join("crates/ra_syntax/test_data")
|
||||
}
|
||||
|
||||
fn dump_tokens(tokens: &[crate::Token], text: &str) -> String {
|
||||
fn assert_errors_are_present(errors: &[SyntaxError], path: &Path) {
|
||||
assert!(!errors.is_empty(), "There should be errors in the file {:?}", path.display());
|
||||
}
|
||||
fn assert_errors_are_absent(errors: &[SyntaxError], path: &Path) {
|
||||
assert_eq!(
|
||||
errors,
|
||||
&[] as &[SyntaxError],
|
||||
"There should be no errors in the file {:?}",
|
||||
path.display(),
|
||||
);
|
||||
}
|
||||
|
||||
fn dump_tokens_and_errors(tokens: &[Token], errors: &[SyntaxError], text: &str) -> String {
|
||||
let mut acc = String::new();
|
||||
let mut offset = 0;
|
||||
for token in tokens {
|
||||
let len: u32 = token.len.into();
|
||||
let len = len as usize;
|
||||
let token_text = &text[offset..offset + len];
|
||||
offset += len;
|
||||
write!(acc, "{:?} {} {:?}\n", token.kind, token.len, token_text).unwrap()
|
||||
let token_len = token.len.to_usize();
|
||||
let token_text = &text[offset..offset + token_len];
|
||||
offset += token_len;
|
||||
writeln!(acc, "{:?} {} {:?}", token.kind, token_len, token_text).unwrap();
|
||||
}
|
||||
for err in errors {
|
||||
let err_range = location_to_range(err.location());
|
||||
writeln!(
|
||||
acc,
|
||||
"> error{:?} token({:?}) msg({})",
|
||||
err.location(),
|
||||
&text[err_range],
|
||||
err.kind()
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
return acc;
|
||||
|
||||
// FIXME: copy-pasted this from `ra_ide/src/diagnostics.rs`
|
||||
// `Location` will be refactored soon in new PR, see todos here:
|
||||
// https://github.com/rust-analyzer/rust-analyzer/issues/223
|
||||
fn location_to_range(location: Location) -> TextRange {
|
||||
match location {
|
||||
Location::Offset(offset) => TextRange::offset_len(offset, 1.into()),
|
||||
Location::Range(range) => range,
|
||||
}
|
||||
}
|
||||
acc
|
||||
}
|
||||
|
@ -94,6 +94,12 @@ impl From<rustc_lexer::unescape::EscapeError> for SyntaxErrorKind {
|
||||
}
|
||||
|
||||
pub(crate) fn validate(root: &SyntaxNode) -> Vec<SyntaxError> {
|
||||
// FIXME:
|
||||
// * Add validation of character literal containing only a single char
|
||||
// * Add validation of `crate` keyword not appearing in the middle of the symbol path
|
||||
// * Add validation of doc comments are being attached to nodes
|
||||
// * Remove validation of unterminated literals (it is already implemented in `tokenize()`)
|
||||
|
||||
let mut errors = Vec::new();
|
||||
for node in root.descendants() {
|
||||
match_ast! {
|
||||
|
@ -1,3 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
// hello
|
||||
//! World
|
@ -1,6 +0,0 @@
|
||||
SHEBANG 19 "#!/usr/bin/env bash"
|
||||
WHITESPACE 1 "\n"
|
||||
COMMENT 8 "// hello"
|
||||
WHITESPACE 1 "\n"
|
||||
COMMENT 9 "//! World"
|
||||
WHITESPACE 1 "\n"
|
@ -1 +0,0 @@
|
||||
'1
|
@ -1 +0,0 @@
|
||||
LIFETIME 2 "\'1"
|
@ -1 +0,0 @@
|
||||
"hello
|
@ -1 +0,0 @@
|
||||
STRING 7 "\"hello\n"
|
@ -0,0 +1 @@
|
||||
'
|
@ -0,0 +1,2 @@
|
||||
CHAR 1 "\'"
|
||||
> error[0; 1) token("\'") msg(Missing trailing `'` symbol to terminate the character literal)
|
@ -0,0 +1 @@
|
||||
'🦀
|
@ -0,0 +1,2 @@
|
||||
CHAR 5 "\'🦀"
|
||||
> error[0; 5) token("\'🦀") msg(Missing trailing `'` symbol to terminate the character literal)
|
@ -0,0 +1 @@
|
||||
'\x7f
|
@ -0,0 +1,2 @@
|
||||
CHAR 5 "\'\\x7f"
|
||||
> error[0; 5) token("\'\\x7f") msg(Missing trailing `'` symbol to terminate the character literal)
|
@ -0,0 +1 @@
|
||||
'\u{20AA}
|
@ -0,0 +1,2 @@
|
||||
CHAR 9 "\'\\u{20AA}"
|
||||
> error[0; 9) token("\'\\u{20AA}") msg(Missing trailing `'` symbol to terminate the character literal)
|
@ -0,0 +1 @@
|
||||
'
|
@ -0,0 +1,2 @@
|
||||
CHAR 2 "\' "
|
||||
> error[0; 2) token("\' ") msg(Missing trailing `'` symbol to terminate the character literal)
|
@ -0,0 +1 @@
|
||||
'\
|
@ -0,0 +1,2 @@
|
||||
CHAR 2 "\'\\"
|
||||
> error[0; 2) token("\'\\") msg(Missing trailing `'` symbol to terminate the character literal)
|
@ -0,0 +1 @@
|
||||
'\n
|
@ -0,0 +1,2 @@
|
||||
CHAR 3 "\'\\n"
|
||||
> error[0; 3) token("\'\\n") msg(Missing trailing `'` symbol to terminate the character literal)
|
@ -0,0 +1 @@
|
||||
'\'
|
@ -0,0 +1,2 @@
|
||||
CHAR 3 "\'\\\'"
|
||||
> error[0; 3) token("\'\\\'") msg(Missing trailing `'` symbol to terminate the character literal)
|
@ -0,0 +1 @@
|
||||
b'
|
@ -0,0 +1,2 @@
|
||||
BYTE 2 "b\'"
|
||||
> error[0; 2) token("b\'") msg(Missing trailing `'` symbol to terminate the byte literal)
|
@ -0,0 +1 @@
|
||||
b'🦀
|
@ -0,0 +1,2 @@
|
||||
BYTE 6 "b\'🦀"
|
||||
> error[0; 6) token("b\'🦀") msg(Missing trailing `'` symbol to terminate the byte literal)
|
@ -0,0 +1 @@
|
||||
b'\x7f
|
@ -0,0 +1,2 @@
|
||||
BYTE 6 "b\'\\x7f"
|
||||
> error[0; 6) token("b\'\\x7f") msg(Missing trailing `'` symbol to terminate the byte literal)
|
@ -0,0 +1 @@
|
||||
b'\u{20AA}
|
@ -0,0 +1,2 @@
|
||||
BYTE 10 "b\'\\u{20AA}"
|
||||
> error[0; 10) token("b\'\\u{20AA}") msg(Missing trailing `'` symbol to terminate the byte literal)
|
@ -0,0 +1 @@
|
||||
b'
|
@ -0,0 +1,2 @@
|
||||
BYTE 3 "b\' "
|
||||
> error[0; 3) token("b\' ") msg(Missing trailing `'` symbol to terminate the byte literal)
|
@ -0,0 +1 @@
|
||||
b'\
|
@ -0,0 +1,2 @@
|
||||
BYTE 3 "b\'\\"
|
||||
> error[0; 3) token("b\'\\") msg(Missing trailing `'` symbol to terminate the byte literal)
|
@ -0,0 +1 @@
|
||||
b'\n
|
@ -0,0 +1,2 @@
|
||||
BYTE 4 "b\'\\n"
|
||||
> error[0; 4) token("b\'\\n") msg(Missing trailing `'` symbol to terminate the byte literal)
|
@ -0,0 +1 @@
|
||||
b'\'
|
@ -0,0 +1,2 @@
|
||||
BYTE 4 "b\'\\\'"
|
||||
> error[0; 4) token("b\'\\\'") msg(Missing trailing `'` symbol to terminate the byte literal)
|
@ -0,0 +1 @@
|
||||
"
|
@ -0,0 +1,2 @@
|
||||
STRING 1 "\""
|
||||
> error[0; 1) token("\"") msg(Missing trailing `"` symbol to terminate the string literal)
|
@ -0,0 +1 @@
|
||||
"🦀
|
@ -0,0 +1,2 @@
|
||||
STRING 5 "\"🦀"
|
||||
> error[0; 5) token("\"🦀") msg(Missing trailing `"` symbol to terminate the string literal)
|
@ -0,0 +1 @@
|
||||
"\x7f
|
@ -0,0 +1,2 @@
|
||||
STRING 5 "\"\\x7f"
|
||||
> error[0; 5) token("\"\\x7f") msg(Missing trailing `"` symbol to terminate the string literal)
|
@ -0,0 +1 @@
|
||||
"\u{20AA}
|
@ -0,0 +1,2 @@
|
||||
STRING 9 "\"\\u{20AA}"
|
||||
> error[0; 9) token("\"\\u{20AA}") msg(Missing trailing `"` symbol to terminate the string literal)
|
@ -0,0 +1 @@
|
||||
"
|
@ -0,0 +1,2 @@
|
||||
STRING 2 "\" "
|
||||
> error[0; 2) token("\" ") msg(Missing trailing `"` symbol to terminate the string literal)
|
@ -0,0 +1 @@
|
||||
"\
|
@ -0,0 +1,2 @@
|
||||
STRING 2 "\"\\"
|
||||
> error[0; 2) token("\"\\") msg(Missing trailing `"` symbol to terminate the string literal)
|
@ -0,0 +1 @@
|
||||
"\n
|
@ -0,0 +1,2 @@
|
||||
STRING 3 "\"\\n"
|
||||
> error[0; 3) token("\"\\n") msg(Missing trailing `"` symbol to terminate the string literal)
|
@ -0,0 +1 @@
|
||||
"\"
|
@ -0,0 +1,2 @@
|
||||
STRING 3 "\"\\\""
|
||||
> error[0; 3) token("\"\\\"") msg(Missing trailing `"` symbol to terminate the string literal)
|
@ -0,0 +1 @@
|
||||
b"
|
@ -0,0 +1,2 @@
|
||||
BYTE_STRING 2 "b\""
|
||||
> error[0; 2) token("b\"") msg(Missing trailing `"` symbol to terminate the byte string literal)
|
@ -0,0 +1 @@
|
||||
b"🦀
|
@ -0,0 +1,2 @@
|
||||
BYTE_STRING 6 "b\"🦀"
|
||||
> error[0; 6) token("b\"🦀") msg(Missing trailing `"` symbol to terminate the byte string literal)
|
@ -0,0 +1 @@
|
||||
b"\x7f
|
@ -0,0 +1,2 @@
|
||||
BYTE_STRING 6 "b\"\\x7f"
|
||||
> error[0; 6) token("b\"\\x7f") msg(Missing trailing `"` symbol to terminate the byte string literal)
|
@ -0,0 +1 @@
|
||||
b"\u{20AA}
|
@ -0,0 +1,2 @@
|
||||
BYTE_STRING 10 "b\"\\u{20AA}"
|
||||
> error[0; 10) token("b\"\\u{20AA}") msg(Missing trailing `"` symbol to terminate the byte string literal)
|
@ -0,0 +1 @@
|
||||
b"
|
@ -0,0 +1,2 @@
|
||||
BYTE_STRING 3 "b\" "
|
||||
> error[0; 3) token("b\" ") msg(Missing trailing `"` symbol to terminate the byte string literal)
|
@ -0,0 +1 @@
|
||||
b"\
|
@ -0,0 +1,2 @@
|
||||
BYTE_STRING 3 "b\"\\"
|
||||
> error[0; 3) token("b\"\\") msg(Missing trailing `"` symbol to terminate the byte string literal)
|
@ -0,0 +1 @@
|
||||
b"\n
|
@ -0,0 +1,2 @@
|
||||
BYTE_STRING 4 "b\"\\n"
|
||||
> error[0; 4) token("b\"\\n") msg(Missing trailing `"` symbol to terminate the byte string literal)
|
@ -0,0 +1 @@
|
||||
b"\"
|
@ -0,0 +1,2 @@
|
||||
BYTE_STRING 4 "b\"\\\""
|
||||
> error[0; 4) token("b\"\\\"") msg(Missing trailing `"` symbol to terminate the byte string literal)
|
@ -0,0 +1 @@
|
||||
r##"
|
@ -0,0 +1,2 @@
|
||||
RAW_STRING 4 "r##\""
|
||||
> error[0; 4) token("r##\"") msg(Missing trailing `"` with `#` symbols to terminate the raw string literal)
|
@ -0,0 +1 @@
|
||||
r##"🦀
|
@ -0,0 +1,2 @@
|
||||
RAW_STRING 8 "r##\"🦀"
|
||||
> error[0; 8) token("r##\"🦀") msg(Missing trailing `"` with `#` symbols to terminate the raw string literal)
|
@ -0,0 +1 @@
|
||||
r##"\x7f
|
@ -0,0 +1,2 @@
|
||||
RAW_STRING 8 "r##\"\\x7f"
|
||||
> error[0; 8) token("r##\"\\x7f") msg(Missing trailing `"` with `#` symbols to terminate the raw string literal)
|
@ -0,0 +1 @@
|
||||
r##"\u{20AA}
|
@ -0,0 +1,2 @@
|
||||
RAW_STRING 12 "r##\"\\u{20AA}"
|
||||
> error[0; 12) token("r##\"\\u{20AA}") msg(Missing trailing `"` with `#` symbols to terminate the raw string literal)
|
@ -0,0 +1 @@
|
||||
r##"
|
@ -0,0 +1,2 @@
|
||||
RAW_STRING 5 "r##\" "
|
||||
> error[0; 5) token("r##\" ") msg(Missing trailing `"` with `#` symbols to terminate the raw string literal)
|
@ -0,0 +1 @@
|
||||
r##"\
|
@ -0,0 +1,2 @@
|
||||
RAW_STRING 5 "r##\"\\"
|
||||
> error[0; 5) token("r##\"\\") msg(Missing trailing `"` with `#` symbols to terminate the raw string literal)
|
@ -0,0 +1 @@
|
||||
r##"\n
|
@ -0,0 +1,2 @@
|
||||
RAW_STRING 6 "r##\"\\n"
|
||||
> error[0; 6) token("r##\"\\n") msg(Missing trailing `"` with `#` symbols to terminate the raw string literal)
|
@ -0,0 +1 @@
|
||||
br##"
|
@ -0,0 +1,2 @@
|
||||
RAW_BYTE_STRING 5 "br##\""
|
||||
> error[0; 5) token("br##\"") msg(Missing trailing `"` with `#` symbols to terminate the raw byte string literal)
|
@ -0,0 +1 @@
|
||||
br##"🦀
|
@ -0,0 +1,2 @@
|
||||
RAW_BYTE_STRING 9 "br##\"🦀"
|
||||
> error[0; 9) token("br##\"🦀") msg(Missing trailing `"` with `#` symbols to terminate the raw byte string literal)
|
@ -0,0 +1 @@
|
||||
br##"\x7f
|
@ -0,0 +1,2 @@
|
||||
RAW_BYTE_STRING 9 "br##\"\\x7f"
|
||||
> error[0; 9) token("br##\"\\x7f") msg(Missing trailing `"` with `#` symbols to terminate the raw byte string literal)
|
@ -0,0 +1 @@
|
||||
br##"\u{20AA}
|
@ -0,0 +1,2 @@
|
||||
RAW_BYTE_STRING 13 "br##\"\\u{20AA}"
|
||||
> error[0; 13) token("br##\"\\u{20AA}") msg(Missing trailing `"` with `#` symbols to terminate the raw byte string literal)
|
@ -0,0 +1 @@
|
||||
br##"
|
@ -0,0 +1,2 @@
|
||||
RAW_BYTE_STRING 6 "br##\" "
|
||||
> error[0; 6) token("br##\" ") msg(Missing trailing `"` with `#` symbols to terminate the raw byte string literal)
|
@ -0,0 +1 @@
|
||||
br##"\
|
@ -0,0 +1,2 @@
|
||||
RAW_BYTE_STRING 6 "br##\"\\"
|
||||
> error[0; 6) token("br##\"\\") msg(Missing trailing `"` with `#` symbols to terminate the raw byte string literal)
|
@ -0,0 +1 @@
|
||||
br##"\n
|
@ -0,0 +1,2 @@
|
||||
RAW_BYTE_STRING 7 "br##\"\\n"
|
||||
> error[0; 7) token("br##\"\\n") msg(Missing trailing `"` with `#` symbols to terminate the raw byte string literal)
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user