ra_syntax: added tests for tokenization errors

This commit is contained in:
Veetaha 2020-02-01 22:25:01 +02:00
parent 9367b9a292
commit a3e5663ae0
150 changed files with 427 additions and 49 deletions

View File

@ -1,19 +1,28 @@
use std::{
fmt::Write,
path::{Component, PathBuf},
path::{Component, Path, PathBuf},
};
use test_utils::{collect_tests, dir_tests, project_dir, read_text};
use crate::{fuzz, SourceFile};
use crate::{fuzz, tokenize, Location, SourceFile, SyntaxError, TextRange, Token};
#[test]
fn lexer_tests() {
dir_tests(&test_data_dir(), &["lexer"], |text, _| {
// FIXME: add tests for errors (their format is up to discussion)
let (tokens, _errors) = crate::tokenize(text);
dump_tokens(&tokens, text)
})
// FIXME:
// * Add tests for unicode escapes in byte-character and [raw]-byte-string literals
// * Add tests for unescape errors
dir_tests(&test_data_dir(), &["lexer/ok"], |text, path| {
let (tokens, errors) = tokenize(text);
assert_errors_are_absent(&errors, path);
dump_tokens_and_errors(&tokens, &errors, text)
});
dir_tests(&test_data_dir(), &["lexer/err"], |text, path| {
let (tokens, errors) = tokenize(text);
assert_errors_are_present(&errors, path);
dump_tokens_and_errors(&tokens, &errors, text)
});
}
#[test]
@ -33,18 +42,13 @@ fn parser_tests() {
dir_tests(&test_data_dir(), &["parser/inline/ok", "parser/ok"], |text, path| {
let parse = SourceFile::parse(text);
let errors = parse.errors();
assert_eq!(
errors,
&[] as &[crate::SyntaxError],
"There should be no errors in the file {:?}",
path.display(),
);
assert_errors_are_absent(&errors, path);
parse.debug_dump()
});
dir_tests(&test_data_dir(), &["parser/err", "parser/inline/err"], |text, path| {
let parse = SourceFile::parse(text);
let errors = parse.errors();
assert!(!errors.is_empty(), "There should be errors in the file {:?}", path.display());
assert_errors_are_present(&errors, path);
parse.debug_dump()
});
}
@ -76,7 +80,7 @@ fn self_hosting_parsing() {
.into_iter()
.filter_entry(|entry| {
!entry.path().components().any(|component| {
// Get all files which are not in the crates/ra_syntax/tests/data folder
// Get all files which are not in the crates/ra_syntax/test_data folder
component == Component::Normal(OsStr::new("test_data"))
})
})
@ -102,15 +106,47 @@ fn test_data_dir() -> PathBuf {
project_dir().join("crates/ra_syntax/test_data")
}
fn dump_tokens(tokens: &[crate::Token], text: &str) -> String {
fn assert_errors_are_present(errors: &[SyntaxError], path: &Path) {
assert!(!errors.is_empty(), "There should be errors in the file {:?}", path.display());
}
fn assert_errors_are_absent(errors: &[SyntaxError], path: &Path) {
assert_eq!(
errors,
&[] as &[SyntaxError],
"There should be no errors in the file {:?}",
path.display(),
);
}
fn dump_tokens_and_errors(tokens: &[Token], errors: &[SyntaxError], text: &str) -> String {
let mut acc = String::new();
let mut offset = 0;
for token in tokens {
let len: u32 = token.len.into();
let len = len as usize;
let token_text = &text[offset..offset + len];
offset += len;
write!(acc, "{:?} {} {:?}\n", token.kind, token.len, token_text).unwrap()
let token_len = token.len.to_usize();
let token_text = &text[offset..offset + token_len];
offset += token_len;
writeln!(acc, "{:?} {} {:?}", token.kind, token_len, token_text).unwrap();
}
for err in errors {
let err_range = location_to_range(err.location());
writeln!(
acc,
"> error{:?} token({:?}) msg({})",
err.location(),
&text[err_range],
err.kind()
)
.unwrap();
}
return acc;
// FIXME: copy-pasted this from `ra_ide/src/diagnostics.rs`
// `Location` will be refactored soon in new PR, see todos here:
// https://github.com/rust-analyzer/rust-analyzer/issues/223
fn location_to_range(location: Location) -> TextRange {
match location {
Location::Offset(offset) => TextRange::offset_len(offset, 1.into()),
Location::Range(range) => range,
}
}
acc
}

View File

@ -94,6 +94,12 @@ impl From<rustc_lexer::unescape::EscapeError> for SyntaxErrorKind {
}
pub(crate) fn validate(root: &SyntaxNode) -> Vec<SyntaxError> {
// FIXME:
// * Add validation of character literal containing only a single char
// * Add validation of `crate` keyword not appearing in the middle of the symbol path
// * Add validation of doc comments are being attached to nodes
// * Remove validation of unterminated literals (it is already implemented in `tokenize()`)
let mut errors = Vec::new();
for node in root.descendants() {
match_ast! {

View File

@ -1,3 +0,0 @@
#!/usr/bin/env bash
// hello
//! World

View File

@ -1,6 +0,0 @@
SHEBANG 19 "#!/usr/bin/env bash"
WHITESPACE 1 "\n"
COMMENT 8 "// hello"
WHITESPACE 1 "\n"
COMMENT 9 "//! World"
WHITESPACE 1 "\n"

View File

@ -1 +0,0 @@
LIFETIME 2 "\'1"

View File

@ -1 +0,0 @@
STRING 7 "\"hello\n"

View File

@ -0,0 +1,2 @@
CHAR 1 "\'"
> error[0; 1) token("\'") msg(Missing trailing `'` symbol to terminate the character literal)

View File

@ -0,0 +1,2 @@
CHAR 5 "\'🦀"
> error[0; 5) token("\'🦀") msg(Missing trailing `'` symbol to terminate the character literal)

View File

@ -0,0 +1,2 @@
CHAR 5 "\'\\x7f"
> error[0; 5) token("\'\\x7f") msg(Missing trailing `'` symbol to terminate the character literal)

View File

@ -0,0 +1,2 @@
CHAR 9 "\'\\u{20AA}"
> error[0; 9) token("\'\\u{20AA}") msg(Missing trailing `'` symbol to terminate the character literal)

View File

@ -0,0 +1,2 @@
CHAR 2 "\' "
> error[0; 2) token("\' ") msg(Missing trailing `'` symbol to terminate the character literal)

View File

@ -0,0 +1,2 @@
CHAR 2 "\'\\"
> error[0; 2) token("\'\\") msg(Missing trailing `'` symbol to terminate the character literal)

View File

@ -0,0 +1,2 @@
CHAR 3 "\'\\n"
> error[0; 3) token("\'\\n") msg(Missing trailing `'` symbol to terminate the character literal)

View File

@ -0,0 +1,2 @@
CHAR 3 "\'\\\'"
> error[0; 3) token("\'\\\'") msg(Missing trailing `'` symbol to terminate the character literal)

View File

@ -0,0 +1,2 @@
BYTE 2 "b\'"
> error[0; 2) token("b\'") msg(Missing trailing `'` symbol to terminate the byte literal)

View File

@ -0,0 +1 @@
b'🦀

View File

@ -0,0 +1,2 @@
BYTE 6 "b\'🦀"
> error[0; 6) token("b\'🦀") msg(Missing trailing `'` symbol to terminate the byte literal)

View File

@ -0,0 +1,2 @@
BYTE 6 "b\'\\x7f"
> error[0; 6) token("b\'\\x7f") msg(Missing trailing `'` symbol to terminate the byte literal)

View File

@ -0,0 +1,2 @@
BYTE 10 "b\'\\u{20AA}"
> error[0; 10) token("b\'\\u{20AA}") msg(Missing trailing `'` symbol to terminate the byte literal)

View File

@ -0,0 +1,2 @@
BYTE 3 "b\' "
> error[0; 3) token("b\' ") msg(Missing trailing `'` symbol to terminate the byte literal)

View File

@ -0,0 +1,2 @@
BYTE 3 "b\'\\"
> error[0; 3) token("b\'\\") msg(Missing trailing `'` symbol to terminate the byte literal)

View File

@ -0,0 +1,2 @@
BYTE 4 "b\'\\n"
> error[0; 4) token("b\'\\n") msg(Missing trailing `'` symbol to terminate the byte literal)

View File

@ -0,0 +1,2 @@
BYTE 4 "b\'\\\'"
> error[0; 4) token("b\'\\\'") msg(Missing trailing `'` symbol to terminate the byte literal)

View File

@ -0,0 +1,2 @@
STRING 1 "\""
> error[0; 1) token("\"") msg(Missing trailing `"` symbol to terminate the string literal)

View File

@ -0,0 +1,2 @@
STRING 5 "\"🦀"
> error[0; 5) token("\"🦀") msg(Missing trailing `"` symbol to terminate the string literal)

View File

@ -0,0 +1,2 @@
STRING 5 "\"\\x7f"
> error[0; 5) token("\"\\x7f") msg(Missing trailing `"` symbol to terminate the string literal)

View File

@ -0,0 +1,2 @@
STRING 9 "\"\\u{20AA}"
> error[0; 9) token("\"\\u{20AA}") msg(Missing trailing `"` symbol to terminate the string literal)

View File

@ -0,0 +1,2 @@
STRING 2 "\" "
> error[0; 2) token("\" ") msg(Missing trailing `"` symbol to terminate the string literal)

View File

@ -0,0 +1,2 @@
STRING 2 "\"\\"
> error[0; 2) token("\"\\") msg(Missing trailing `"` symbol to terminate the string literal)

View File

@ -0,0 +1,2 @@
STRING 3 "\"\\n"
> error[0; 3) token("\"\\n") msg(Missing trailing `"` symbol to terminate the string literal)

View File

@ -0,0 +1,2 @@
STRING 3 "\"\\\""
> error[0; 3) token("\"\\\"") msg(Missing trailing `"` symbol to terminate the string literal)

View File

@ -0,0 +1,2 @@
BYTE_STRING 2 "b\""
> error[0; 2) token("b\"") msg(Missing trailing `"` symbol to terminate the byte string literal)

View File

@ -0,0 +1,2 @@
BYTE_STRING 6 "b\"🦀"
> error[0; 6) token("b\"🦀") msg(Missing trailing `"` symbol to terminate the byte string literal)

View File

@ -0,0 +1,2 @@
BYTE_STRING 6 "b\"\\x7f"
> error[0; 6) token("b\"\\x7f") msg(Missing trailing `"` symbol to terminate the byte string literal)

View File

@ -0,0 +1,2 @@
BYTE_STRING 10 "b\"\\u{20AA}"
> error[0; 10) token("b\"\\u{20AA}") msg(Missing trailing `"` symbol to terminate the byte string literal)

View File

@ -0,0 +1,2 @@
BYTE_STRING 3 "b\" "
> error[0; 3) token("b\" ") msg(Missing trailing `"` symbol to terminate the byte string literal)

View File

@ -0,0 +1,2 @@
BYTE_STRING 3 "b\"\\"
> error[0; 3) token("b\"\\") msg(Missing trailing `"` symbol to terminate the byte string literal)

View File

@ -0,0 +1,2 @@
BYTE_STRING 4 "b\"\\n"
> error[0; 4) token("b\"\\n") msg(Missing trailing `"` symbol to terminate the byte string literal)

View File

@ -0,0 +1,2 @@
BYTE_STRING 4 "b\"\\\""
> error[0; 4) token("b\"\\\"") msg(Missing trailing `"` symbol to terminate the byte string literal)

View File

@ -0,0 +1,2 @@
RAW_STRING 4 "r##\""
> error[0; 4) token("r##\"") msg(Missing trailing `"` with `#` symbols to terminate the raw string literal)

View File

@ -0,0 +1,2 @@
RAW_STRING 8 "r##\"🦀"
> error[0; 8) token("r##\"🦀") msg(Missing trailing `"` with `#` symbols to terminate the raw string literal)

View File

@ -0,0 +1,2 @@
RAW_STRING 8 "r##\"\\x7f"
> error[0; 8) token("r##\"\\x7f") msg(Missing trailing `"` with `#` symbols to terminate the raw string literal)

View File

@ -0,0 +1,2 @@
RAW_STRING 12 "r##\"\\u{20AA}"
> error[0; 12) token("r##\"\\u{20AA}") msg(Missing trailing `"` with `#` symbols to terminate the raw string literal)

View File

@ -0,0 +1,2 @@
RAW_STRING 5 "r##\" "
> error[0; 5) token("r##\" ") msg(Missing trailing `"` with `#` symbols to terminate the raw string literal)

View File

@ -0,0 +1,2 @@
RAW_STRING 5 "r##\"\\"
> error[0; 5) token("r##\"\\") msg(Missing trailing `"` with `#` symbols to terminate the raw string literal)

View File

@ -0,0 +1,2 @@
RAW_STRING 6 "r##\"\\n"
> error[0; 6) token("r##\"\\n") msg(Missing trailing `"` with `#` symbols to terminate the raw string literal)

View File

@ -0,0 +1,2 @@
RAW_BYTE_STRING 5 "br##\""
> error[0; 5) token("br##\"") msg(Missing trailing `"` with `#` symbols to terminate the raw byte string literal)

View File

@ -0,0 +1,2 @@
RAW_BYTE_STRING 9 "br##\"🦀"
> error[0; 9) token("br##\"🦀") msg(Missing trailing `"` with `#` symbols to terminate the raw byte string literal)

View File

@ -0,0 +1,2 @@
RAW_BYTE_STRING 9 "br##\"\\x7f"
> error[0; 9) token("br##\"\\x7f") msg(Missing trailing `"` with `#` symbols to terminate the raw byte string literal)

View File

@ -0,0 +1,2 @@
RAW_BYTE_STRING 13 "br##\"\\u{20AA}"
> error[0; 13) token("br##\"\\u{20AA}") msg(Missing trailing `"` with `#` symbols to terminate the raw byte string literal)

View File

@ -0,0 +1,2 @@
RAW_BYTE_STRING 6 "br##\" "
> error[0; 6) token("br##\" ") msg(Missing trailing `"` with `#` symbols to terminate the raw byte string literal)

View File

@ -0,0 +1,2 @@
RAW_BYTE_STRING 6 "br##\"\\"
> error[0; 6) token("br##\"\\") msg(Missing trailing `"` with `#` symbols to terminate the raw byte string literal)

View File

@ -0,0 +1,2 @@
RAW_BYTE_STRING 7 "br##\"\\n"
> error[0; 7) token("br##\"\\n") msg(Missing trailing `"` with `#` symbols to terminate the raw byte string literal)

Some files were not shown because too many files have changed in this diff Show More