From e1aeb7fa794e228ca9099ac7679e8a1d0b22238a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pol=20Valletb=C3=B3?= Date: Wed, 11 Oct 2023 15:25:52 +0200 Subject: [PATCH] fix: handle errors for string byte string and c_string --- crates/parser/src/lexed_str.rs | 42 ++++++++++++++++++- .../test_data/lexer/err/byte_strings.rast | 28 +++++++++++++ .../test_data/lexer/err/byte_strings.rs | 14 +++++++ .../parser/test_data/lexer/err/c_strings.rast | 28 +++++++++++++ .../parser/test_data/lexer/err/c_strings.rs | 14 +++++++ .../parser/test_data/lexer/err/strings.rast | 28 +++++++++++++ crates/parser/test_data/lexer/err/strings.rs | 14 +++++++ 7 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 crates/parser/test_data/lexer/err/byte_strings.rast create mode 100644 crates/parser/test_data/lexer/err/byte_strings.rs create mode 100644 crates/parser/test_data/lexer/err/c_strings.rast create mode 100644 crates/parser/test_data/lexer/err/c_strings.rs create mode 100644 crates/parser/test_data/lexer/err/strings.rast create mode 100644 crates/parser/test_data/lexer/err/strings.rs diff --git a/crates/parser/src/lexed_str.rs b/crates/parser/src/lexed_str.rs index 84cedc1fa3f..c2e25daf37f 100644 --- a/crates/parser/src/lexed_str.rs +++ b/crates/parser/src/lexed_str.rs @@ -8,7 +8,10 @@ //! Note that these tokens, unlike the tokens we feed into the parser, do //! include info about comments and whitespace. -use rustc_dependencies::lexer as rustc_lexer; +use rustc_dependencies::lexer::{ + self as rustc_lexer, + unescape::{unescape_c_string, unescape_literal}, +}; use std::ops; @@ -284,18 +287,45 @@ impl<'a> Converter<'a> { rustc_lexer::LiteralKind::Str { terminated } => { if !terminated { err = "Missing trailing `\"` symbol to terminate the string literal"; + } else { + let text = &self.res.text[self.offset + 1..][..len - 1]; + let i = text.rfind('"').unwrap(); + let text = &text[..i]; + rustc_lexer::unescape::unescape_literal(text, Mode::Str, &mut |_, res| { + if let Err(e) = res { + err = error_to_diagnostic_message(e, Mode::Str); + } + }); } STRING } rustc_lexer::LiteralKind::ByteStr { terminated } => { if !terminated { err = "Missing trailing `\"` symbol to terminate the byte string literal"; + } else { + let text = &self.res.text[self.offset + 2..][..len - 2]; + let i = text.rfind('"').unwrap(); + let text = &text[..i]; + rustc_lexer::unescape::unescape_literal(text, Mode::ByteStr, &mut |_, res| { + if let Err(e) = res { + err = error_to_diagnostic_message(e, Mode::ByteStr); + } + }) } BYTE_STRING } rustc_lexer::LiteralKind::CStr { terminated } => { if !terminated { err = "Missing trailing `\"` symbol to terminate the string literal"; + } else { + let text = &self.res.text[self.offset + 2..][..len - 2]; + let i = text.rfind('"').unwrap(); + let text = &text[..i]; + rustc_lexer::unescape::unescape_c_string(text, Mode::CStr, &mut |_, res| { + if let Err(e) = res { + err = error_to_diagnostic_message(e, Mode::CStr); + } + }) } C_STRING } @@ -360,3 +390,13 @@ fn error_to_diagnostic_message(error: EscapeError, mode: Mode) -> &'static str { EscapeError::MultipleSkippedLinesWarning => "", } } + +fn fill_unescape_string_error(text: &str, mode: Mode, mut error_message: &str) { + + rustc_lexer::unescape::unescape_c_string(text, mode, &mut |_, res| { + if let Err(e) = res { + error_message = error_to_diagnostic_message(e, mode); + } + }); +} + diff --git a/crates/parser/test_data/lexer/err/byte_strings.rast b/crates/parser/test_data/lexer/err/byte_strings.rast new file mode 100644 index 00000000000..e8d8ff8cefb --- /dev/null +++ b/crates/parser/test_data/lexer/err/byte_strings.rast @@ -0,0 +1,28 @@ +BYTE_STRING "b\"\\💩\"" error: unknown byte escape +WHITESPACE "\n" +BYTE_STRING "b\"\\●\"" error: unknown byte escape +WHITESPACE "\n" +BYTE_STRING "b\"\\u{_0000}\"" error: invalid start of unicode escape +WHITESPACE "\n" +BYTE_STRING "b\"\\u{0000000}\"" error: overlong unicode escape +WHITESPACE "\n" +BYTE_STRING "b\"\\u{FFFFFF}\"" error: unicode escape in byte string +WHITESPACE "\n" +BYTE_STRING "b\"\\u{ffffff}\"" error: unicode escape in byte string +WHITESPACE "\n" +BYTE_STRING "b\"\\u{ffffff}\"" error: unicode escape in byte string +WHITESPACE "\n" +BYTE_STRING "b\"\\u{DC00}\"" error: unicode escape in byte string +WHITESPACE "\n" +BYTE_STRING "b\"\\u{DDDD}\"" error: unicode escape in byte string +WHITESPACE "\n" +BYTE_STRING "b\"\\u{DFFF}\"" error: unicode escape in byte string +WHITESPACE "\n" +BYTE_STRING "b\"\\u{D800}\"" error: unicode escape in byte string +WHITESPACE "\n" +BYTE_STRING "b\"\\u{DAAA}\"" error: unicode escape in byte string +WHITESPACE "\n" +BYTE_STRING "b\"\\u{DBFF}\"" error: unicode escape in byte string +WHITESPACE "\n" +BYTE_STRING "b\"\\xы\"" error: invalid character in numeric character escape +WHITESPACE "\n" diff --git a/crates/parser/test_data/lexer/err/byte_strings.rs b/crates/parser/test_data/lexer/err/byte_strings.rs new file mode 100644 index 00000000000..e74847137b1 --- /dev/null +++ b/crates/parser/test_data/lexer/err/byte_strings.rs @@ -0,0 +1,14 @@ +b"\💩" +b"\●" +b"\u{_0000}" +b"\u{0000000}" +b"\u{FFFFFF}" +b"\u{ffffff}" +b"\u{ffffff}" +b"\u{DC00}" +b"\u{DDDD}" +b"\u{DFFF}" +b"\u{D800}" +b"\u{DAAA}" +b"\u{DBFF}" +b"\xы" diff --git a/crates/parser/test_data/lexer/err/c_strings.rast b/crates/parser/test_data/lexer/err/c_strings.rast new file mode 100644 index 00000000000..1b4424ba5c7 --- /dev/null +++ b/crates/parser/test_data/lexer/err/c_strings.rast @@ -0,0 +1,28 @@ +C_STRING "c\"\\💩\"" error: unknown character escape +WHITESPACE "\n" +C_STRING "c\"\\●\"" error: unknown character escape +WHITESPACE "\n" +C_STRING "c\"\\u{_0000}\"" error: invalid start of unicode escape +WHITESPACE "\n" +C_STRING "c\"\\u{0000000}\"" error: overlong unicode escape +WHITESPACE "\n" +C_STRING "c\"\\u{FFFFFF}\"" error: invalid unicode character escape +WHITESPACE "\n" +C_STRING "c\"\\u{ffffff}\"" error: invalid unicode character escape +WHITESPACE "\n" +C_STRING "c\"\\u{ffffff}\"" error: invalid unicode character escape +WHITESPACE "\n" +C_STRING "c\"\\u{DC00}\"" error: invalid unicode character escape +WHITESPACE "\n" +C_STRING "c\"\\u{DDDD}\"" error: invalid unicode character escape +WHITESPACE "\n" +C_STRING "c\"\\u{DFFF}\"" error: invalid unicode character escape +WHITESPACE "\n" +C_STRING "c\"\\u{D800}\"" error: invalid unicode character escape +WHITESPACE "\n" +C_STRING "c\"\\u{DAAA}\"" error: invalid unicode character escape +WHITESPACE "\n" +C_STRING "c\"\\u{DBFF}\"" error: invalid unicode character escape +WHITESPACE "\n" +C_STRING "c\"\\xы\"" error: invalid character in numeric character escape +WHITESPACE "\n" diff --git a/crates/parser/test_data/lexer/err/c_strings.rs b/crates/parser/test_data/lexer/err/c_strings.rs new file mode 100644 index 00000000000..1b78ffc28a0 --- /dev/null +++ b/crates/parser/test_data/lexer/err/c_strings.rs @@ -0,0 +1,14 @@ +c"\💩" +c"\●" +c"\u{_0000}" +c"\u{0000000}" +c"\u{FFFFFF}" +c"\u{ffffff}" +c"\u{ffffff}" +c"\u{DC00}" +c"\u{DDDD}" +c"\u{DFFF}" +c"\u{D800}" +c"\u{DAAA}" +c"\u{DBFF}" +c"\xы" diff --git a/crates/parser/test_data/lexer/err/strings.rast b/crates/parser/test_data/lexer/err/strings.rast new file mode 100644 index 00000000000..0cd1747208e --- /dev/null +++ b/crates/parser/test_data/lexer/err/strings.rast @@ -0,0 +1,28 @@ +STRING "\"\\💩\"" error: unknown character escape +WHITESPACE "\n" +STRING "\"\\●\"" error: unknown character escape +WHITESPACE "\n" +STRING "\"\\u{_0000}\"" error: invalid start of unicode escape +WHITESPACE "\n" +STRING "\"\\u{0000000}\"" error: overlong unicode escape +WHITESPACE "\n" +STRING "\"\\u{FFFFFF}\"" error: invalid unicode character escape +WHITESPACE "\n" +STRING "\"\\u{ffffff}\"" error: invalid unicode character escape +WHITESPACE "\n" +STRING "\"\\u{ffffff}\"" error: invalid unicode character escape +WHITESPACE "\n" +STRING "\"\\u{DC00}\"" error: invalid unicode character escape +WHITESPACE "\n" +STRING "\"\\u{DDDD}\"" error: invalid unicode character escape +WHITESPACE "\n" +STRING "\"\\u{DFFF}\"" error: invalid unicode character escape +WHITESPACE "\n" +STRING "\"\\u{D800}\"" error: invalid unicode character escape +WHITESPACE "\n" +STRING "\"\\u{DAAA}\"" error: invalid unicode character escape +WHITESPACE "\n" +STRING "\"\\u{DBFF}\"" error: invalid unicode character escape +WHITESPACE "\n" +STRING "\"\\xы\"" error: invalid character in numeric character escape +WHITESPACE "\n" diff --git a/crates/parser/test_data/lexer/err/strings.rs b/crates/parser/test_data/lexer/err/strings.rs new file mode 100644 index 00000000000..2499516d3fa --- /dev/null +++ b/crates/parser/test_data/lexer/err/strings.rs @@ -0,0 +1,14 @@ +"\💩" +"\●" +"\u{_0000}" +"\u{0000000}" +"\u{FFFFFF}" +"\u{ffffff}" +"\u{ffffff}" +"\u{DC00}" +"\u{DDDD}" +"\u{DFFF}" +"\u{D800}" +"\u{DAAA}" +"\u{DBFF}" +"\xы"