Auto merge of #15744 - pvalletbo:15395/character-byte-literals-diagnose, r=Veykril
fix: add diagnostics messages for chars and byte literal errors This PR adds error messages for different invalid byte or character literals. Fixes #15395
This commit is contained in:
commit
c3873616d3
@ -9,8 +9,11 @@
|
||||
//! include info about comments and whitespace.
|
||||
|
||||
use rustc_dependencies::lexer as rustc_lexer;
|
||||
|
||||
use std::ops;
|
||||
|
||||
use rustc_lexer::unescape::{EscapeError, Mode};
|
||||
|
||||
use crate::{
|
||||
SyntaxKind::{self, *},
|
||||
T,
|
||||
@ -254,13 +257,28 @@ fn extend_literal(&mut self, len: usize, kind: &rustc_lexer::LiteralKind) {
|
||||
rustc_lexer::LiteralKind::Char { terminated } => {
|
||||
if !terminated {
|
||||
err = "Missing trailing `'` symbol to terminate the character literal";
|
||||
} else {
|
||||
let text = &self.res.text[self.offset + 1..][..len - 1];
|
||||
let i = text.rfind('\'').unwrap();
|
||||
let text = &text[..i];
|
||||
if let Err(e) = rustc_lexer::unescape::unescape_char(text) {
|
||||
err = error_to_diagnostic_message(e, Mode::Char);
|
||||
}
|
||||
}
|
||||
CHAR
|
||||
}
|
||||
rustc_lexer::LiteralKind::Byte { terminated } => {
|
||||
if !terminated {
|
||||
err = "Missing trailing `'` symbol to terminate the byte literal";
|
||||
} else {
|
||||
let text = &self.res.text[self.offset + 2..][..len - 2];
|
||||
let i = text.rfind('\'').unwrap();
|
||||
let text = &text[..i];
|
||||
if let Err(e) = rustc_lexer::unescape::unescape_char(text) {
|
||||
err = error_to_diagnostic_message(e, Mode::Byte);
|
||||
}
|
||||
}
|
||||
|
||||
BYTE
|
||||
}
|
||||
rustc_lexer::LiteralKind::Str { terminated } => {
|
||||
@ -305,3 +323,40 @@ fn extend_literal(&mut self, len: usize, kind: &rustc_lexer::LiteralKind) {
|
||||
self.push(syntax_kind, len, err);
|
||||
}
|
||||
}
|
||||
|
||||
fn error_to_diagnostic_message(error: EscapeError, mode: Mode) -> &'static str {
|
||||
match error {
|
||||
EscapeError::ZeroChars => "empty character literal",
|
||||
EscapeError::MoreThanOneChar => "character literal may only contain one codepoint",
|
||||
EscapeError::LoneSlash => "",
|
||||
EscapeError::InvalidEscape if mode == Mode::Byte || mode == Mode::ByteStr => {
|
||||
"unknown byte escape"
|
||||
}
|
||||
EscapeError::InvalidEscape => "unknown character escape",
|
||||
EscapeError::BareCarriageReturn => "",
|
||||
EscapeError::BareCarriageReturnInRawString => "",
|
||||
EscapeError::EscapeOnlyChar if mode == Mode::Byte => "byte constant must be escaped",
|
||||
EscapeError::EscapeOnlyChar => "character constant must be escaped",
|
||||
EscapeError::TooShortHexEscape => "numeric character escape is too short",
|
||||
EscapeError::InvalidCharInHexEscape => "invalid character in numeric character escape",
|
||||
EscapeError::OutOfRangeHexEscape => "out of range hex escape",
|
||||
EscapeError::NoBraceInUnicodeEscape => "incorrect unicode escape sequence",
|
||||
EscapeError::InvalidCharInUnicodeEscape => "invalid character in unicode escape",
|
||||
EscapeError::EmptyUnicodeEscape => "empty unicode escape",
|
||||
EscapeError::UnclosedUnicodeEscape => "unterminated unicode escape",
|
||||
EscapeError::LeadingUnderscoreUnicodeEscape => "invalid start of unicode escape",
|
||||
EscapeError::OverlongUnicodeEscape => "overlong unicode escape",
|
||||
EscapeError::LoneSurrogateUnicodeEscape => "invalid unicode character escape",
|
||||
EscapeError::OutOfRangeUnicodeEscape => "invalid unicode character escape",
|
||||
EscapeError::UnicodeEscapeInByte => "unicode escape in byte string",
|
||||
EscapeError::NonAsciiCharInByte if mode == Mode::Byte => {
|
||||
"non-ASCII character in byte literal"
|
||||
}
|
||||
EscapeError::NonAsciiCharInByte if mode == Mode::ByteStr => {
|
||||
"non-ASCII character in byte string literal"
|
||||
}
|
||||
EscapeError::NonAsciiCharInByte => "non-ASCII character in raw byte string literal",
|
||||
EscapeError::UnskippedWhitespaceWarning => "",
|
||||
EscapeError::MultipleSkippedLinesWarning => "",
|
||||
}
|
||||
}
|
||||
|
92
crates/parser/test_data/lexer/err/byte_char_literals.rast
Normal file
92
crates/parser/test_data/lexer/err/byte_char_literals.rast
Normal file
@ -0,0 +1,92 @@
|
||||
BYTE "b''" error: empty character literal
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\'" error: Missing trailing `'` symbol to terminate the byte literal
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\n'" error: byte constant must be escaped
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'spam'" error: character literal may only contain one codepoint
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\x0ff'" error: character literal may only contain one codepoint
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\\"a'" error: character literal may only contain one codepoint
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\na'" error: character literal may only contain one codepoint
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\ra'" error: character literal may only contain one codepoint
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\ta'" error: character literal may only contain one codepoint
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\\\a'" error: character literal may only contain one codepoint
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\'a'" error: character literal may only contain one codepoint
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\0a'" error: character literal may only contain one codepoint
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\u{0}x'" error: character literal may only contain one codepoint
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\u{1F63b}}'" error: character literal may only contain one codepoint
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\v'" error: unknown byte escape
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\💩'" error: unknown byte escape
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\●'" error: unknown byte escape
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\\\\\r'" error: character literal may only contain one codepoint
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\x'" error: numeric character escape is too short
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\x0'" error: numeric character escape is too short
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\xf'" error: numeric character escape is too short
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\xa'" error: numeric character escape is too short
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\xx'" error: invalid character in numeric character escape
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\xы'" error: invalid character in numeric character escape
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\x🦀'" error: invalid character in numeric character escape
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\xtt'" error: invalid character in numeric character escape
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\xff'" error: out of range hex escape
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\xFF'" error: out of range hex escape
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\x80'" error: out of range hex escape
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\u'" error: incorrect unicode escape sequence
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\u[0123]'" error: incorrect unicode escape sequence
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\u{0x}'" error: invalid character in unicode escape
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\u{'" error: unterminated unicode escape
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\u{0000'" error: unterminated unicode escape
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\u{}'" error: empty unicode escape
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\u{_0000}'" error: invalid start of unicode escape
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\u{0000000}'" error: overlong unicode escape
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\u{FFFFFF}'" error: invalid unicode character escape
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\u{ffffff}'" error: invalid unicode character escape
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\u{ffffff}'" error: invalid unicode character escape
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\u{DC00}'" error: invalid unicode character escape
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\u{DDDD}'" error: invalid unicode character escape
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\u{DFFF}'" error: invalid unicode character escape
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\u{D800}'" error: invalid unicode character escape
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\u{DAAA}'" error: invalid unicode character escape
|
||||
WHITESPACE "\n"
|
||||
BYTE "b'\\u{DBFF}'" error: invalid unicode character escape
|
||||
WHITESPACE "\n"
|
47
crates/parser/test_data/lexer/err/byte_char_literals.rs
Normal file
47
crates/parser/test_data/lexer/err/byte_char_literals.rs
Normal file
@ -0,0 +1,47 @@
|
||||
b''
|
||||
b'\'
|
||||
b'
|
||||
'
|
||||
b'spam'
|
||||
b'\x0ff'
|
||||
b'\"a'
|
||||
b'\na'
|
||||
b'\ra'
|
||||
b'\ta'
|
||||
b'\\a'
|
||||
b'\'a'
|
||||
b'\0a'
|
||||
b'\u{0}x'
|
||||
b'\u{1F63b}}'
|
||||
b'\v'
|
||||
b'\💩'
|
||||
b'\●'
|
||||
b'\\\r'
|
||||
b'\x'
|
||||
b'\x0'
|
||||
b'\xf'
|
||||
b'\xa'
|
||||
b'\xx'
|
||||
b'\xы'
|
||||
b'\x🦀'
|
||||
b'\xtt'
|
||||
b'\xff'
|
||||
b'\xFF'
|
||||
b'\x80'
|
||||
b'\u'
|
||||
b'\u[0123]'
|
||||
b'\u{0x}'
|
||||
b'\u{'
|
||||
b'\u{0000'
|
||||
b'\u{}'
|
||||
b'\u{_0000}'
|
||||
b'\u{0000000}'
|
||||
b'\u{FFFFFF}'
|
||||
b'\u{ffffff}'
|
||||
b'\u{ffffff}'
|
||||
b'\u{DC00}'
|
||||
b'\u{DDDD}'
|
||||
b'\u{DFFF}'
|
||||
b'\u{D800}'
|
||||
b'\u{DAAA}'
|
||||
b'\u{DBFF}'
|
92
crates/parser/test_data/lexer/err/char_literals.rast
Normal file
92
crates/parser/test_data/lexer/err/char_literals.rast
Normal file
@ -0,0 +1,92 @@
|
||||
CHAR "'hello'" error: character literal may only contain one codepoint
|
||||
WHITESPACE "\n"
|
||||
CHAR "''" error: empty character literal
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\n'" error: character constant must be escaped
|
||||
WHITESPACE "\n"
|
||||
CHAR "'spam'" error: character literal may only contain one codepoint
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\x0ff'" error: character literal may only contain one codepoint
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\\"a'" error: character literal may only contain one codepoint
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\na'" error: character literal may only contain one codepoint
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\ra'" error: character literal may only contain one codepoint
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\ta'" error: character literal may only contain one codepoint
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\\\a'" error: character literal may only contain one codepoint
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\'a'" error: character literal may only contain one codepoint
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\0a'" error: character literal may only contain one codepoint
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\u{0}x'" error: character literal may only contain one codepoint
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\u{1F63b}}'" error: character literal may only contain one codepoint
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\v'" error: unknown character escape
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\💩'" error: unknown character escape
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\●'" error: unknown character escape
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\\\\\r'" error: character literal may only contain one codepoint
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\x'" error: numeric character escape is too short
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\x0'" error: numeric character escape is too short
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\xf'" error: numeric character escape is too short
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\xa'" error: numeric character escape is too short
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\xx'" error: invalid character in numeric character escape
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\xы'" error: invalid character in numeric character escape
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\x🦀'" error: invalid character in numeric character escape
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\xtt'" error: invalid character in numeric character escape
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\xff'" error: out of range hex escape
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\xFF'" error: out of range hex escape
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\x80'" error: out of range hex escape
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\u'" error: incorrect unicode escape sequence
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\u[0123]'" error: incorrect unicode escape sequence
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\u{0x}'" error: invalid character in unicode escape
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\u{'" error: unterminated unicode escape
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\u{0000'" error: unterminated unicode escape
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\u{}'" error: empty unicode escape
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\u{_0000}'" error: invalid start of unicode escape
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\u{0000000}'" error: overlong unicode escape
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\u{FFFFFF}'" error: invalid unicode character escape
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\u{ffffff}'" error: invalid unicode character escape
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\u{ffffff}'" error: invalid unicode character escape
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\u{DC00}'" error: invalid unicode character escape
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\u{DDDD}'" error: invalid unicode character escape
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\u{DFFF}'" error: invalid unicode character escape
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\u{D800}'" error: invalid unicode character escape
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\u{DAAA}'" error: invalid unicode character escape
|
||||
WHITESPACE "\n"
|
||||
CHAR "'\\u{DBFF}'" error: invalid unicode character escape
|
||||
WHITESPACE "\n"
|
47
crates/parser/test_data/lexer/err/char_literals.rs
Normal file
47
crates/parser/test_data/lexer/err/char_literals.rs
Normal file
@ -0,0 +1,47 @@
|
||||
'hello'
|
||||
''
|
||||
'
|
||||
'
|
||||
'spam'
|
||||
'\x0ff'
|
||||
'\"a'
|
||||
'\na'
|
||||
'\ra'
|
||||
'\ta'
|
||||
'\\a'
|
||||
'\'a'
|
||||
'\0a'
|
||||
'\u{0}x'
|
||||
'\u{1F63b}}'
|
||||
'\v'
|
||||
'\💩'
|
||||
'\●'
|
||||
'\\\r'
|
||||
'\x'
|
||||
'\x0'
|
||||
'\xf'
|
||||
'\xa'
|
||||
'\xx'
|
||||
'\xы'
|
||||
'\x🦀'
|
||||
'\xtt'
|
||||
'\xff'
|
||||
'\xFF'
|
||||
'\x80'
|
||||
'\u'
|
||||
'\u[0123]'
|
||||
'\u{0x}'
|
||||
'\u{'
|
||||
'\u{0000'
|
||||
'\u{}'
|
||||
'\u{_0000}'
|
||||
'\u{0000000}'
|
||||
'\u{FFFFFF}'
|
||||
'\u{ffffff}'
|
||||
'\u{ffffff}'
|
||||
'\u{DC00}'
|
||||
'\u{DDDD}'
|
||||
'\u{DFFF}'
|
||||
'\u{D800}'
|
||||
'\u{DAAA}'
|
||||
'\u{DBFF}'
|
@ -1,13 +1,9 @@
|
||||
BYTE "b''"
|
||||
WHITESPACE " "
|
||||
BYTE "b'x'"
|
||||
WHITESPACE " "
|
||||
BYTE_STRING "b\"foo\""
|
||||
WHITESPACE " "
|
||||
BYTE_STRING "br\"\""
|
||||
WHITESPACE "\n"
|
||||
BYTE "b''suf"
|
||||
WHITESPACE " "
|
||||
BYTE_STRING "b\"\"ix"
|
||||
WHITESPACE " "
|
||||
BYTE_STRING "br\"\"br"
|
||||
@ -17,6 +13,4 @@ WHITESPACE " "
|
||||
BYTE "b'\\\\'"
|
||||
WHITESPACE " "
|
||||
BYTE "b'\\''"
|
||||
WHITESPACE " "
|
||||
BYTE "b'hello'"
|
||||
WHITESPACE "\n"
|
||||
|
@ -1,3 +1,3 @@
|
||||
b'' b'x' b"foo" br""
|
||||
b''suf b""ix br""br
|
||||
b'\n' b'\\' b'\'' b'hello'
|
||||
b'x' b"foo" br""
|
||||
b""ix br""br
|
||||
b'\n' b'\\' b'\''
|
||||
|
@ -4,8 +4,6 @@ CHAR "' '"
|
||||
WHITESPACE " "
|
||||
CHAR "'0'"
|
||||
WHITESPACE " "
|
||||
CHAR "'hello'"
|
||||
WHITESPACE " "
|
||||
CHAR "'\\x7f'"
|
||||
WHITESPACE " "
|
||||
CHAR "'\\n'"
|
||||
|
@ -1 +1 @@
|
||||
'x' ' ' '0' 'hello' '\x7f' '\n' '\\' '\''
|
||||
'x' ' ' '0' '\x7f' '\n' '\\' '\''
|
||||
|
Loading…
Reference in New Issue
Block a user