2019-09-30 11:58:53 +03:00
|
|
|
//! FIXME: write short doc here
|
|
|
|
|
2019-01-28 20:03:56 +00:00
|
|
|
mod block;
|
2018-11-08 15:42:00 +01:00
|
|
|
|
2020-04-25 00:57:47 +02:00
|
|
|
use std::convert::TryFrom;
|
|
|
|
|
2019-08-20 19:16:57 +03:00
|
|
|
use rustc_lexer::unescape;
|
2019-07-24 11:47:28 +03:00
|
|
|
|
2019-01-07 16:15:47 +03:00
|
|
|
use crate::{
|
2020-02-06 02:33:18 +02:00
|
|
|
ast, match_ast, AstNode, SyntaxError,
|
2019-11-11 11:26:57 +03:00
|
|
|
SyntaxKind::{BYTE, BYTE_STRING, CHAR, CONST_DEF, FN_DEF, INT_NUMBER, STRING, TYPE_ALIAS_DEF},
|
2020-04-24 23:40:41 +02:00
|
|
|
SyntaxNode, SyntaxToken, TextSize, T,
|
2019-01-07 16:15:47 +03:00
|
|
|
};
|
|
|
|
|
2020-02-06 02:33:18 +02:00
|
|
|
fn rustc_unescape_error_to_string(err: unescape::EscapeError) -> &'static str {
|
|
|
|
use unescape::EscapeError as EE;
|
2019-07-24 11:47:28 +03:00
|
|
|
|
2020-02-06 02:33:18 +02:00
|
|
|
#[rustfmt::skip]
|
|
|
|
let err_message = match err {
|
|
|
|
EE::ZeroChars => {
|
|
|
|
"Literal must not be empty"
|
2019-07-24 11:47:28 +03:00
|
|
|
}
|
2020-02-06 02:33:18 +02:00
|
|
|
EE::MoreThanOneChar => {
|
|
|
|
"Literal must be one character long"
|
|
|
|
}
|
|
|
|
EE::LoneSlash => {
|
|
|
|
"Character must be escaped: `\\`"
|
|
|
|
}
|
|
|
|
EE::InvalidEscape => {
|
|
|
|
"Invalid escape"
|
|
|
|
}
|
|
|
|
EE::BareCarriageReturn | EE::BareCarriageReturnInRawString => {
|
|
|
|
"Character must be escaped: `\r`"
|
|
|
|
}
|
|
|
|
EE::EscapeOnlyChar => {
|
|
|
|
"Escape character `\\` must be escaped itself"
|
|
|
|
}
|
|
|
|
EE::TooShortHexEscape => {
|
|
|
|
"ASCII hex escape code must have exactly two digits"
|
|
|
|
}
|
|
|
|
EE::InvalidCharInHexEscape => {
|
|
|
|
"ASCII hex escape code must contain only hex characters"
|
|
|
|
}
|
|
|
|
EE::OutOfRangeHexEscape => {
|
|
|
|
"ASCII hex escape code must be at most 0x7F"
|
|
|
|
}
|
|
|
|
EE::NoBraceInUnicodeEscape => {
|
|
|
|
"Missing `{` to begin the unicode escape"
|
|
|
|
}
|
|
|
|
EE::InvalidCharInUnicodeEscape => {
|
|
|
|
"Unicode escape must contain only hex characters and underscores"
|
|
|
|
}
|
|
|
|
EE::EmptyUnicodeEscape => {
|
|
|
|
"Unicode escape must not be empty"
|
|
|
|
}
|
|
|
|
EE::UnclosedUnicodeEscape => {
|
|
|
|
"Missing '}' to terminate the unicode escape"
|
|
|
|
}
|
|
|
|
EE::LeadingUnderscoreUnicodeEscape => {
|
|
|
|
"Unicode escape code must not begin with an underscore"
|
|
|
|
}
|
|
|
|
EE::OverlongUnicodeEscape => {
|
|
|
|
"Unicode escape code must have at most 6 digits"
|
|
|
|
}
|
|
|
|
EE::LoneSurrogateUnicodeEscape => {
|
|
|
|
"Unicode escape code must not be a surrogate"
|
|
|
|
}
|
|
|
|
EE::OutOfRangeUnicodeEscape => {
|
|
|
|
"Unicode escape code must be at most 0x10FFFF"
|
|
|
|
}
|
|
|
|
EE::UnicodeEscapeInByte => {
|
|
|
|
"Byte literals must not contain unicode escapes"
|
|
|
|
}
|
|
|
|
EE::NonAsciiCharInByte | EE::NonAsciiCharInByteString => {
|
|
|
|
"Byte literals must not contain non-ASCII characters"
|
|
|
|
}
|
|
|
|
};
|
2019-07-24 11:47:28 +03:00
|
|
|
|
2020-02-06 02:33:18 +02:00
|
|
|
err_message
|
2019-07-24 11:47:28 +03:00
|
|
|
}
|
2019-05-07 19:38:26 +03:00
|
|
|
|
2019-07-21 13:34:15 +03:00
|
|
|
pub(crate) fn validate(root: &SyntaxNode) -> Vec<SyntaxError> {
|
2020-02-01 22:25:01 +02:00
|
|
|
// FIXME:
|
|
|
|
// * Add validation of character literal containing only a single char
|
|
|
|
// * Add validation of `crate` keyword not appearing in the middle of the symbol path
|
|
|
|
// * Add validation of doc comments are being attached to nodes
|
|
|
|
// * Remove validation of unterminated literals (it is already implemented in `tokenize()`)
|
|
|
|
|
2018-11-08 15:42:00 +01:00
|
|
|
let mut errors = Vec::new();
|
2019-07-21 13:34:15 +03:00
|
|
|
for node in root.descendants() {
|
2019-10-05 17:03:03 +03:00
|
|
|
match_ast! {
|
|
|
|
match node {
|
2020-04-06 17:21:33 +03:00
|
|
|
ast::Literal(it) => validate_literal(it, &mut errors),
|
|
|
|
ast::BlockExpr(it) => block::validate_block_expr(it, &mut errors),
|
|
|
|
ast::FieldExpr(it) => validate_numeric_name(it.name_ref(), &mut errors),
|
|
|
|
ast::RecordField(it) => validate_numeric_name(it.name_ref(), &mut errors),
|
|
|
|
ast::Visibility(it) => validate_visibility(it, &mut errors),
|
|
|
|
ast::RangeExpr(it) => validate_range_expr(it, &mut errors),
|
2020-04-27 10:02:47 -07:00
|
|
|
ast::PathSegment(it) => validate_crate_keyword_in_path_segment(it, &mut errors),
|
2019-10-05 17:03:03 +03:00
|
|
|
_ => (),
|
|
|
|
}
|
|
|
|
}
|
2018-11-08 15:42:00 +01:00
|
|
|
}
|
|
|
|
errors
|
|
|
|
}
|
2019-02-21 15:51:22 +03:00
|
|
|
|
2019-07-18 19:23:05 +03:00
|
|
|
fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
|
2020-02-06 02:33:18 +02:00
|
|
|
// FIXME: move this function to outer scope (https://github.com/rust-analyzer/rust-analyzer/pull/2834#discussion_r366196658)
|
2020-01-14 03:34:38 +02:00
|
|
|
fn unquote(text: &str, prefix_len: usize, end_delimiter: char) -> Option<&str> {
|
|
|
|
text.rfind(end_delimiter).and_then(|end| text.get(prefix_len..end))
|
|
|
|
}
|
|
|
|
|
2019-05-07 19:38:26 +03:00
|
|
|
let token = literal.token();
|
|
|
|
let text = token.text().as_str();
|
2020-01-14 03:34:38 +02:00
|
|
|
|
2020-02-06 02:33:18 +02:00
|
|
|
// FIXME: lift this lambda refactor to `fn` (https://github.com/rust-analyzer/rust-analyzer/pull/2834#discussion_r366199205)
|
2020-01-14 03:34:38 +02:00
|
|
|
let mut push_err = |prefix_len, (off, err): (usize, unescape::EscapeError)| {
|
2020-04-25 00:57:47 +02:00
|
|
|
let off = token.text_range().start() + TextSize::try_from(off + prefix_len).unwrap();
|
2020-02-06 02:33:18 +02:00
|
|
|
acc.push(SyntaxError::new_at_offset(rustc_unescape_error_to_string(err), off));
|
2020-01-14 03:34:38 +02:00
|
|
|
};
|
|
|
|
|
2019-05-07 19:38:26 +03:00
|
|
|
match token.kind() {
|
|
|
|
BYTE => {
|
2020-01-14 03:34:38 +02:00
|
|
|
if let Some(Err(e)) = unquote(text, 2, '\'').map(unescape::unescape_byte) {
|
|
|
|
push_err(2, e);
|
2019-05-07 19:38:26 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
CHAR => {
|
2020-01-14 03:34:38 +02:00
|
|
|
if let Some(Err(e)) = unquote(text, 1, '\'').map(unescape::unescape_char) {
|
|
|
|
push_err(1, e);
|
2019-05-07 19:38:26 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
BYTE_STRING => {
|
2020-01-14 03:34:38 +02:00
|
|
|
if let Some(without_quotes) = unquote(text, 2, '"') {
|
|
|
|
unescape::unescape_byte_str(without_quotes, &mut |range, char| {
|
|
|
|
if let Err(err) = char {
|
|
|
|
push_err(2, (range.start, err));
|
|
|
|
}
|
|
|
|
})
|
2019-05-07 19:38:26 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
STRING => {
|
2020-01-14 03:34:38 +02:00
|
|
|
if let Some(without_quotes) = unquote(text, 1, '"') {
|
|
|
|
unescape::unescape_str(without_quotes, &mut |range, char| {
|
|
|
|
if let Err(err) = char {
|
|
|
|
push_err(1, (range.start, err));
|
|
|
|
}
|
|
|
|
})
|
2019-05-07 19:38:26 +03:00
|
|
|
}
|
|
|
|
}
|
2019-03-30 13:25:53 +03:00
|
|
|
_ => (),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-21 15:51:22 +03:00
|
|
|
pub(crate) fn validate_block_structure(root: &SyntaxNode) {
|
|
|
|
let mut stack = Vec::new();
|
|
|
|
for node in root.descendants() {
|
|
|
|
match node.kind() {
|
2019-05-15 15:35:47 +03:00
|
|
|
T!['{'] => stack.push(node),
|
|
|
|
T!['}'] => {
|
2019-02-21 15:51:22 +03:00
|
|
|
if let Some(pair) = stack.pop() {
|
|
|
|
assert_eq!(
|
|
|
|
node.parent(),
|
|
|
|
pair.parent(),
|
2019-07-20 12:48:24 +03:00
|
|
|
"\nunpaired curleys:\n{}\n{:#?}\n",
|
2019-02-21 15:51:22 +03:00
|
|
|
root.text(),
|
2019-07-20 12:48:24 +03:00
|
|
|
root,
|
2019-02-21 15:51:22 +03:00
|
|
|
);
|
|
|
|
assert!(
|
|
|
|
node.next_sibling().is_none() && pair.prev_sibling().is_none(),
|
|
|
|
"\nfloating curlys at {:?}\nfile:\n{}\nerror:\n{}\n",
|
|
|
|
node,
|
|
|
|
root.text(),
|
|
|
|
node.text(),
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
_ => (),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-08-09 12:16:47 +02:00
|
|
|
|
|
|
|
fn validate_numeric_name(name_ref: Option<ast::NameRef>, errors: &mut Vec<SyntaxError>) {
|
|
|
|
if let Some(int_token) = int_token(name_ref) {
|
|
|
|
if int_token.text().chars().any(|c| !c.is_digit(10)) {
|
|
|
|
errors.push(SyntaxError::new(
|
2020-02-06 02:33:18 +02:00
|
|
|
"Tuple (struct) field access is only allowed through \
|
|
|
|
decimal integers with no underscores or suffix",
|
2019-08-09 12:16:47 +02:00
|
|
|
int_token.text_range(),
|
|
|
|
));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn int_token(name_ref: Option<ast::NameRef>) -> Option<SyntaxToken> {
|
|
|
|
name_ref?.syntax().first_child_or_token()?.into_token().filter(|it| it.kind() == INT_NUMBER)
|
|
|
|
}
|
|
|
|
}
|
2019-11-11 11:26:57 +03:00
|
|
|
|
|
|
|
fn validate_visibility(vis: ast::Visibility, errors: &mut Vec<SyntaxError>) {
|
|
|
|
let parent = match vis.syntax().parent() {
|
|
|
|
Some(it) => it,
|
|
|
|
None => return,
|
|
|
|
};
|
|
|
|
match parent.kind() {
|
|
|
|
FN_DEF | CONST_DEF | TYPE_ALIAS_DEF => (),
|
|
|
|
_ => return,
|
|
|
|
}
|
2020-02-06 02:33:18 +02:00
|
|
|
|
2020-02-29 21:24:40 +01:00
|
|
|
let impl_def = match parent.parent().and_then(|it| it.parent()).and_then(ast::ImplDef::cast) {
|
2019-11-11 11:26:57 +03:00
|
|
|
Some(it) => it,
|
|
|
|
None => return,
|
|
|
|
};
|
2020-02-29 21:24:40 +01:00
|
|
|
if impl_def.target_trait().is_some() {
|
2020-02-06 02:33:18 +02:00
|
|
|
errors.push(SyntaxError::new("Unnecessary visibility qualifier", vis.syntax.text_range()));
|
2019-11-11 11:26:57 +03:00
|
|
|
}
|
|
|
|
}
|
2019-11-15 01:04:37 -08:00
|
|
|
|
|
|
|
fn validate_range_expr(expr: ast::RangeExpr, errors: &mut Vec<SyntaxError>) {
|
2019-11-15 12:05:29 -08:00
|
|
|
if expr.op_kind() == Some(ast::RangeOp::Inclusive) && expr.end().is_none() {
|
2019-11-15 01:04:37 -08:00
|
|
|
errors.push(SyntaxError::new(
|
2020-02-06 02:33:18 +02:00
|
|
|
"An inclusive range must have an end expression",
|
2019-11-15 12:05:29 -08:00
|
|
|
expr.syntax().text_range(),
|
2019-11-15 01:04:37 -08:00
|
|
|
));
|
|
|
|
}
|
|
|
|
}
|
2020-04-27 10:02:47 -07:00
|
|
|
|
|
|
|
fn validate_crate_keyword_in_path_segment(
|
|
|
|
segment: ast::PathSegment,
|
|
|
|
errors: &mut Vec<SyntaxError>,
|
|
|
|
) {
|
|
|
|
const ERR_MSG: &str = "The `crate` keyword is only allowed as the first segment of a path";
|
|
|
|
|
|
|
|
let crate_token = match segment.crate_token() {
|
|
|
|
None => return,
|
|
|
|
Some(it) => it,
|
|
|
|
};
|
|
|
|
|
|
|
|
// Disallow both ::crate and foo::crate
|
2020-04-30 10:41:24 -07:00
|
|
|
let mut path = segment.parent_path();
|
2020-04-27 10:02:47 -07:00
|
|
|
if segment.coloncolon_token().is_some() || path.qualifier().is_some() {
|
|
|
|
errors.push(SyntaxError::new(ERR_MSG, crate_token.text_range()));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// For expressions and types, validation is complete, but we still have
|
2020-04-30 10:41:24 -07:00
|
|
|
// to handle invalid UseItems like this:
|
|
|
|
//
|
|
|
|
// use foo:{crate::bar::baz};
|
|
|
|
//
|
|
|
|
// To handle this we must inspect the parent `UseItem`s and `UseTree`s
|
|
|
|
// but right now we're looking deep inside the nested `Path` nodes because
|
|
|
|
// `Path`s are left-associative:
|
|
|
|
//
|
|
|
|
// ((crate)::bar)::baz)
|
|
|
|
// ^ current value of path
|
|
|
|
//
|
|
|
|
// So we need to climb to the top
|
|
|
|
while let Some(parent) = path.parent_path() {
|
|
|
|
path = parent;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Now that we've found the whole path we need to see if there's a prefix
|
|
|
|
// somewhere in the UseTree hierarchy. This check is arbitrarily deep
|
|
|
|
// because rust allows arbitrary nesting like so:
|
|
|
|
//
|
|
|
|
// use {foo::{{{{crate::bar::baz}}}}};
|
2020-04-27 10:02:47 -07:00
|
|
|
for node in path.syntax().ancestors().skip(1) {
|
|
|
|
match_ast! {
|
|
|
|
match node {
|
|
|
|
ast::UseTree(it) => if let Some(tree_path) = it.path() {
|
2020-04-30 10:41:24 -07:00
|
|
|
// Even a top-level path exists within a `UseTree` so we must explicitly
|
|
|
|
// allow our path but disallow anything else
|
2020-04-27 10:02:47 -07:00
|
|
|
if tree_path != path {
|
|
|
|
errors.push(SyntaxError::new(ERR_MSG, crate_token.text_range()));
|
|
|
|
}
|
|
|
|
},
|
|
|
|
ast::UseTreeList(_it) => continue,
|
|
|
|
_ => return,
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
}
|