Auto merge of #14836 - Veykril:rustc-lexer, r=Veykril

internal: Bump rustc_lexer
2023-05-18 08:50:49 +00:00 · 2023-05-18 08:50:49 +00:00 · e4977e74de
commit e4977e74de
parent 1c0235e3ff 099b5b3b15
26 changed files with 134 additions and 73 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1193,7 +1193,7 @@ dependencies = [
 "drop_bomb",
 "expect-test",
 "limit",
- "rustc-ap-rustc_lexer",
+ "ra-ap-rustc_lexer",
 "sourcegen",
 "stdx",
 ]
@ -1396,6 +1396,16 @@ dependencies = [
 "proc-macro2",
 ]
 [[package]]
 name = "ra-ap-rustc_lexer"
 version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e1c145702ed3f237918e512685185dc8a4d0edc3a5326c63d20361d8ba9b45b3"
 dependencies = [
 "unic-emoji-char",
 "unicode-xid",
 ]
 [[package]]
 name = "rayon"
 version = "1.7.0"
@ -1524,15 +1534,6 @@ dependencies = [
 "xshell",
 ]
 [[package]]
 name = "rustc-ap-rustc_lexer"
 version = "727.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8f40f26e7abdcd3b982f36c09a634cc6187988fbf6ec466c91f8d30a12ac0237"
 dependencies = [
 "unicode-xid",
 ]
 [[package]]
 name = "rustc-demangle"
 version = "0.1.22"
@ -1753,9 +1754,9 @@ dependencies = [
 "proc-macro2",
 "profile",
 "quote",
 "ra-ap-rustc_lexer",
 "rayon",
 "rowan",
 "rustc-ap-rustc_lexer",
 "rustc-hash",
 "smol_str",
 "sourcegen",
@ -1998,6 +1999,47 @@ version = "1.16.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a3e5df347f0bf3ec1d670aad6ca5c6a1859cd9ea61d2113125794654ccced68f"
 [[package]]
 name = "unic-char-property"
 version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a8c57a407d9b6fa02b4795eb81c5b6652060a15a7903ea981f3d723e6c0be221"
 dependencies = [
 "unic-char-range",
 ]
 [[package]]
 name = "unic-char-range"
 version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0398022d5f700414f6b899e10b8348231abf9173fa93144cbc1a43b9793c1fbc"
 [[package]]
 name = "unic-common"
 version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc"
 [[package]]
 name = "unic-emoji-char"
 version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0b07221e68897210270a38bde4babb655869637af0f69407f96053a34f76494d"
 dependencies = [
 "unic-char-property",
 "unic-char-range",
 "unic-ucd-version",
 ]
 [[package]]
 name = "unic-ucd-version"
 version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "96bd2f2237fe450fcd0a1d2f5f4e91711124f7857ba2e964247776ebeeb7b0c4"
 dependencies = [
 "unic-common",
 ]
 [[package]]
 name = "unicase"
 version = "2.6.0"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -89,3 +89,5 @@ text-size = "1.1.0"
 serde = { version = "=1.0.156", features = ["derive"] }
 serde_json = "1.0.94"
 triomphe = { version = "0.1.8", default-features = false, features = ["std"] }
 rustc_lexer = { version = "0.1.0", package = "ra-ap-rustc_lexer" }
--- a/crates/parser/Cargo.toml
+++ b/crates/parser/Cargo.toml
@ -13,7 +13,7 @@ doctest = false
 [dependencies]
 drop_bomb = "0.1.5"
-rustc_lexer = { version = "727.0.0", package = "rustc-ap-rustc_lexer" }
+rustc_lexer.workspace = true
 limit.workspace = true
--- a/crates/parser/src/lexed_str.rs
+++ b/crates/parser/src/lexed_str.rs
@ -36,7 +36,7 @@ pub fn new(text: &'a str) -> LexedStr<'a> {
        };
        for token in rustc_lexer::tokenize(&text[conv.offset..]) {
-            let token_text = &text[conv.offset..][..token.len];
+            let token_text = &text[conv.offset..][..token.len as usize];
            conv.extend_token(&token.kind, token_text);
        }
@ -49,8 +49,8 @@ pub fn single_token(text: &'a str) -> Option<(SyntaxKind, Option<String>)> {
            return None;
        }
-        let token = rustc_lexer::first_token(text);
+        let token = rustc_lexer::tokenize(text).next()?;
-        if token.len != text.len() {
+        if token.len as usize != text.len() {
            return None;
        }
@ -175,6 +175,10 @@ fn extend_token(&mut self, kind: &rustc_lexer::TokenKind, token_text: &str) {
                rustc_lexer::TokenKind::Ident => {
                    SyntaxKind::from_keyword(token_text).unwrap_or(IDENT)
                }
                rustc_lexer::TokenKind::InvalidIdent => {
                    err = "Ident contains invalid characters";
                    IDENT
                }
                rustc_lexer::TokenKind::RawIdent => IDENT,
                rustc_lexer::TokenKind::Literal { kind, .. } => {
@ -221,6 +225,7 @@ fn extend_token(&mut self, kind: &rustc_lexer::TokenKind, token_text: &str) {
                    err = "unknown literal prefix";
                    IDENT
                }
                rustc_lexer::TokenKind::Eof => EOF,
            }
        };
@ -268,35 +273,30 @@ fn extend_literal(&mut self, len: usize, kind: &rustc_lexer::LiteralKind) {
                }
                BYTE_STRING
            }
-            rustc_lexer::LiteralKind::RawStr { err: raw_str_err, .. } => {
+            rustc_lexer::LiteralKind::CStr { terminated } => {
-                if let Some(raw_str_err) = raw_str_err {
+                if !terminated {
-                    err = match raw_str_err {
+                    err = "Missing trailing `\"` symbol to terminate the string literal";
-                        rustc_lexer::RawStrError::InvalidStarter { .. } => "Missing `\"` symbol after `#` symbols to begin the raw string literal",
+                }
                        rustc_lexer::RawStrError::NoTerminator { expected, found, .. } => if expected == found {
                            "Missing trailing `\"` to terminate the raw string literal"
                        } else {
                            "Missing trailing `\"` with `#` symbols to terminate the raw string literal"
                        },
                        rustc_lexer::RawStrError::TooManyDelimiters { .. } => "Too many `#` symbols: raw strings may be delimited by up to 65535 `#` symbols",
                    };
                };
                STRING
            }
-            rustc_lexer::LiteralKind::RawByteStr { err: raw_str_err, .. } => {
+            rustc_lexer::LiteralKind::RawStr { n_hashes } => {
-                if let Some(raw_str_err) = raw_str_err {
+                if n_hashes.is_none() {
-                    err = match raw_str_err {
+                    err = "Invalid raw string literal";
-                        rustc_lexer::RawStrError::InvalidStarter { .. } => "Missing `\"` symbol after `#` symbols to begin the raw byte string literal",
+                }
-                        rustc_lexer::RawStrError::NoTerminator { expected, found, .. } => if expected == found {
+                STRING
-                            "Missing trailing `\"` to terminate the raw byte string literal"
+            }
-                        } else {
+            rustc_lexer::LiteralKind::RawByteStr { n_hashes } => {
-                            "Missing trailing `\"` with `#` symbols to terminate the raw byte string literal"
+                if n_hashes.is_none() {
-                        },
+                    err = "Invalid raw string literal";
-                        rustc_lexer::RawStrError::TooManyDelimiters { .. } => "Too many `#` symbols: raw byte strings may be delimited by up to 65535 `#` symbols",
+                }
                    };
                };
                BYTE_STRING
            }
            rustc_lexer::LiteralKind::RawCStr { n_hashes } => {
                if n_hashes.is_none() {
                    err = "Invalid raw string literal";
                }
                STRING
            }
        };
        let err = if err.is_empty() { None } else { Some(err) };
--- a/crates/parser/src/syntax_kind/generated.rs
+++ b/crates/parser/src/syntax_kind/generated.rs
@ -117,6 +117,7 @@ pub enum SyntaxKind {
    BYTE,
    STRING,
    BYTE_STRING,
    C_STRING,
    ERROR,
    IDENT,
    WHITESPACE,
@ -379,7 +380,7 @@ pub fn is_punct(self) -> bool {
        )
    }
    pub fn is_literal(self) -> bool {
-        matches!(self, INT_NUMBER | FLOAT_NUMBER | CHAR | BYTE | STRING | BYTE_STRING)
+        matches!(self, INT_NUMBER | FLOAT_NUMBER | CHAR | BYTE | STRING | BYTE_STRING | C_STRING)
    }
    pub fn from_keyword(ident: &str) -> Option<SyntaxKind> {
        let kw = match ident {
--- a/crates/parser/test_data/lexer/err/unclosed_raw_byte_string_at_eof.rast
+++ b/crates/parser/test_data/lexer/err/unclosed_raw_byte_string_at_eof.rast
@ -1 +1 @@
-BYTE_STRING "br##\"" error: Missing trailing `"` with `#` symbols to terminate the raw byte string literal
+BYTE_STRING "br##\"" error: Invalid raw string literal
--- a/crates/parser/test_data/lexer/err/unclosed_raw_byte_string_with_ascii_escape.rast
+++ b/crates/parser/test_data/lexer/err/unclosed_raw_byte_string_with_ascii_escape.rast
@ -1 +1 @@
-BYTE_STRING "br##\"\\x7f" error: Missing trailing `"` with `#` symbols to terminate the raw byte string literal
+BYTE_STRING "br##\"\\x7f" error: Invalid raw string literal
--- a/crates/parser/test_data/lexer/err/unclosed_raw_byte_string_with_ferris.rast
+++ b/crates/parser/test_data/lexer/err/unclosed_raw_byte_string_with_ferris.rast
@ -1 +1 @@
-BYTE_STRING "br##\"🦀" error: Missing trailing `"` with `#` symbols to terminate the raw byte string literal
+BYTE_STRING "br##\"🦀" error: Invalid raw string literal
--- a/crates/parser/test_data/lexer/err/unclosed_raw_byte_string_with_slash.rast
+++ b/crates/parser/test_data/lexer/err/unclosed_raw_byte_string_with_slash.rast
@ -1 +1 @@
-BYTE_STRING "br##\"\\" error: Missing trailing `"` with `#` symbols to terminate the raw byte string literal
+BYTE_STRING "br##\"\\" error: Invalid raw string literal
--- a/crates/parser/test_data/lexer/err/unclosed_raw_byte_string_with_slash_n.rast
+++ b/crates/parser/test_data/lexer/err/unclosed_raw_byte_string_with_slash_n.rast
@ -1 +1 @@
-BYTE_STRING "br##\"\\n" error: Missing trailing `"` with `#` symbols to terminate the raw byte string literal
+BYTE_STRING "br##\"\\n" error: Invalid raw string literal
--- a/crates/parser/test_data/lexer/err/unclosed_raw_byte_string_with_space.rast
+++ b/crates/parser/test_data/lexer/err/unclosed_raw_byte_string_with_space.rast
@ -1 +1 @@
-BYTE_STRING "br##\" " error: Missing trailing `"` with `#` symbols to terminate the raw byte string literal
+BYTE_STRING "br##\" " error: Invalid raw string literal
--- a/crates/parser/test_data/lexer/err/unclosed_raw_byte_string_with_unicode_escape.rast
+++ b/crates/parser/test_data/lexer/err/unclosed_raw_byte_string_with_unicode_escape.rast
@ -1 +1 @@
-BYTE_STRING "br##\"\\u{20AA}" error: Missing trailing `"` with `#` symbols to terminate the raw byte string literal
+BYTE_STRING "br##\"\\u{20AA}" error: Invalid raw string literal
--- a/crates/parser/test_data/lexer/err/unclosed_raw_string_at_eof.rast
+++ b/crates/parser/test_data/lexer/err/unclosed_raw_string_at_eof.rast
@ -1 +1 @@
-STRING "r##\"" error: Missing trailing `"` with `#` symbols to terminate the raw string literal
+STRING "r##\"" error: Invalid raw string literal
--- a/crates/parser/test_data/lexer/err/unclosed_raw_string_with_ascii_escape.rast
+++ b/crates/parser/test_data/lexer/err/unclosed_raw_string_with_ascii_escape.rast
@ -1 +1 @@
-STRING "r##\"\\x7f" error: Missing trailing `"` with `#` symbols to terminate the raw string literal
+STRING "r##\"\\x7f" error: Invalid raw string literal
--- a/crates/parser/test_data/lexer/err/unclosed_raw_string_with_ferris.rast
+++ b/crates/parser/test_data/lexer/err/unclosed_raw_string_with_ferris.rast
@ -1 +1 @@
-STRING "r##\"🦀" error: Missing trailing `"` with `#` symbols to terminate the raw string literal
+STRING "r##\"🦀" error: Invalid raw string literal
--- a/crates/parser/test_data/lexer/err/unclosed_raw_string_with_slash.rast
+++ b/crates/parser/test_data/lexer/err/unclosed_raw_string_with_slash.rast
@ -1 +1 @@
-STRING "r##\"\\" error: Missing trailing `"` with `#` symbols to terminate the raw string literal
+STRING "r##\"\\" error: Invalid raw string literal
--- a/crates/parser/test_data/lexer/err/unclosed_raw_string_with_slash_n.rast
+++ b/crates/parser/test_data/lexer/err/unclosed_raw_string_with_slash_n.rast
@ -1 +1 @@
-STRING "r##\"\\n" error: Missing trailing `"` with `#` symbols to terminate the raw string literal
+STRING "r##\"\\n" error: Invalid raw string literal
--- a/crates/parser/test_data/lexer/err/unclosed_raw_string_with_space.rast
+++ b/crates/parser/test_data/lexer/err/unclosed_raw_string_with_space.rast
@ -1 +1 @@
-STRING "r##\" " error: Missing trailing `"` with `#` symbols to terminate the raw string literal
+STRING "r##\" " error: Invalid raw string literal
--- a/crates/parser/test_data/lexer/err/unclosed_raw_string_with_unicode_escape.rast
+++ b/crates/parser/test_data/lexer/err/unclosed_raw_string_with_unicode_escape.rast
@ -1 +1 @@
-STRING "r##\"\\u{20AA}" error: Missing trailing `"` with `#` symbols to terminate the raw string literal
+STRING "r##\"\\u{20AA}" error: Invalid raw string literal
--- a/crates/parser/test_data/lexer/err/unstarted_raw_byte_string_at_eof.rast
+++ b/crates/parser/test_data/lexer/err/unstarted_raw_byte_string_at_eof.rast
@ -1 +1 @@
-BYTE_STRING "br##" error: Missing `"` symbol after `#` symbols to begin the raw byte string literal
+BYTE_STRING "br##" error: Invalid raw string literal
--- a/crates/parser/test_data/lexer/err/unstarted_raw_byte_string_with_ascii.rast
+++ b/crates/parser/test_data/lexer/err/unstarted_raw_byte_string_with_ascii.rast
@ -1,4 +1,4 @@
-BYTE_STRING "br## " error: Missing `"` symbol after `#` symbols to begin the raw byte string literal
+BYTE_STRING "br## " error: Invalid raw string literal
 IDENT "I"
 WHITESPACE " "
 IDENT "lack"
--- a/crates/parser/test_data/lexer/err/unstarted_raw_string_at_eof.rast
+++ b/crates/parser/test_data/lexer/err/unstarted_raw_string_at_eof.rast
@ -1 +1 @@
-STRING "r##" error: Missing `"` symbol after `#` symbols to begin the raw string literal
+STRING "r##" error: Invalid raw string literal
--- a/crates/parser/test_data/lexer/err/unstarted_raw_string_with_ascii.rast
+++ b/crates/parser/test_data/lexer/err/unstarted_raw_string_with_ascii.rast
@ -1,4 +1,4 @@
-STRING "r## " error: Missing `"` symbol after `#` symbols to begin the raw string literal
+STRING "r## " error: Invalid raw string literal
 IDENT "I"
 WHITESPACE " "
 IDENT "lack"
--- a/crates/syntax/Cargo.toml
+++ b/crates/syntax/Cargo.toml
@ -17,13 +17,14 @@ cov-mark = "2.0.0-pre.1"
 either = "1.7.0"
 itertools = "0.10.5"
 rowan = "0.15.11"
 rustc_lexer = { version = "727.0.0", package = "rustc-ap-rustc_lexer" }
 rustc-hash = "1.1.0"
 once_cell = "1.17.0"
 indexmap = "1.9.1"
 smol_str.workspace = true
 triomphe.workspace = true
 rustc_lexer.workspace = true
 parser.workspace = true
 profile.workspace = true
 stdx.workspace = true
--- a/crates/syntax/src/tests/ast_src.rs
+++ b/crates/syntax/src/tests/ast_src.rs
@ -71,7 +71,7 @@ pub(crate) struct KindsSrc<'a> {
        "super", "trait", "true", "try", "type", "unsafe", "use", "where", "while", "yield",
    ],
    contextual_keywords: &["auto", "default", "existential", "union", "raw", "macro_rules", "yeet"],
-    literals: &["INT_NUMBER", "FLOAT_NUMBER", "CHAR", "BYTE", "STRING", "BYTE_STRING"],
+    literals: &["INT_NUMBER", "FLOAT_NUMBER", "CHAR", "BYTE", "STRING", "BYTE_STRING", "C_STRING"],
    tokens: &["ERROR", "IDENT", "WHITESPACE", "LIFETIME_IDENT", "COMMENT", "SHEBANG"],
    nodes: &[
        "SOURCE_FILE",
--- a/crates/syntax/src/validation.rs
+++ b/crates/syntax/src/validation.rs
@ -5,7 +5,7 @@
 mod block;
 use rowan::Direction;
-use rustc_lexer::unescape::{self, unescape_byte, unescape_char, unescape_literal, Mode};
+use rustc_lexer::unescape::{self, unescape_literal, Mode};
 use crate::{
    algo,
@ -44,7 +44,7 @@ pub(crate) fn validate(root: &SyntaxNode) -> Vec<SyntaxError> {
    errors
 }
-fn rustc_unescape_error_to_string(err: unescape::EscapeError) -> &'static str {
+fn rustc_unescape_error_to_string(err: unescape::EscapeError) -> (&'static str, bool) {
    use unescape::EscapeError as EE;
    #[rustfmt::skip]
@ -103,12 +103,15 @@ fn rustc_unescape_error_to_string(err: unescape::EscapeError) -> &'static str {
        EE::UnicodeEscapeInByte => {
            "Byte literals must not contain unicode escapes"
        }
-        EE::NonAsciiCharInByte | EE::NonAsciiCharInByteString => {
+        EE::NonAsciiCharInByte  => {
            "Byte literals must not contain non-ASCII characters"
        }
        EE::UnskippedWhitespaceWarning => "Whitespace after this escape is not skipped",
        EE::MultipleSkippedLinesWarning => "Multiple lines are skipped by this escape",
    };
-    err_message
+    (err_message, err.is_fatal())
 }
 fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
@ -121,9 +124,13 @@ fn unquote(text: &str, prefix_len: usize, end_delimiter: char) -> Option<&str> {
    let text = token.text();
    // FIXME: lift this lambda refactor to `fn` (https://github.com/rust-lang/rust-analyzer/pull/2834#discussion_r366199205)
-    let mut push_err = |prefix_len, (off, err): (usize, unescape::EscapeError)| {
+    let mut push_err = |prefix_len, off, err: unescape::EscapeError| {
        let off = token.text_range().start() + TextSize::try_from(off + prefix_len).unwrap();
-        acc.push(SyntaxError::new_at_offset(rustc_unescape_error_to_string(err), off));
+        let (message, is_err) = rustc_unescape_error_to_string(err);
        // FIXME: Emit lexer warnings
        if is_err {
            acc.push(SyntaxError::new_at_offset(message, off));
        }
    };
    match literal.kind() {
@ -132,7 +139,7 @@ fn unquote(text: &str, prefix_len: usize, end_delimiter: char) -> Option<&str> {
                if let Some(without_quotes) = unquote(text, 1, '"') {
                    unescape_literal(without_quotes, Mode::Str, &mut |range, char| {
                        if let Err(err) = char {
-                            push_err(1, (range.start, err));
+                            push_err(1, range.start, err);
                        }
                    });
                }
@ -143,20 +150,28 @@ fn unquote(text: &str, prefix_len: usize, end_delimiter: char) -> Option<&str> {
                if let Some(without_quotes) = unquote(text, 2, '"') {
                    unescape_literal(without_quotes, Mode::ByteStr, &mut |range, char| {
                        if let Err(err) = char {
-                            push_err(2, (range.start, err));
+                            push_err(1, range.start, err);
                        }
                    });
                }
            }
        }
        ast::LiteralKind::Char(_) => {
-            if let Some(Err(e)) = unquote(text, 1, '\'').map(unescape_char) {
+            if let Some(without_quotes) = unquote(text, 1, '\'') {
-                push_err(1, e);
+                unescape_literal(without_quotes, Mode::Char, &mut |range, char| {
                    if let Err(err) = char {
                        push_err(1, range.start, err);
                    }
                });
            }
        }
        ast::LiteralKind::Byte(_) => {
-            if let Some(Err(e)) = unquote(text, 2, '\'').map(unescape_byte) {
+            if let Some(without_quotes) = unquote(text, 2, '\'') {
-                push_err(2, e);
+                unescape_literal(without_quotes, Mode::Byte, &mut |range, char| {
                    if let Err(err) = char {
                        push_err(2, range.start, err);
                    }
                });
            }
        }
        ast::LiteralKind::IntNumber(_)
`@ -1 +1 @@`
	BYTE_STRING "br##\"" error: Missing trailing `"` with `#` symbols to terminate the raw byte string literal	`BYTE_STRING "br##\"" error: Invalid raw string literal`
`@ -1 +1 @@`
	BYTE_STRING "br##\"\\x7f" error: Missing trailing `"` with `#` symbols to terminate the raw byte string literal	`BYTE_STRING "br##\"\\x7f" error: Invalid raw string literal`
`@ -1 +1 @@`
	BYTE_STRING "br##\"🦀" error: Missing trailing `"` with `#` symbols to terminate the raw byte string literal	`BYTE_STRING "br##\"🦀" error: Invalid raw string literal`
`@ -1 +1 @@`
	BYTE_STRING "br##\"\\" error: Missing trailing `"` with `#` symbols to terminate the raw byte string literal	`BYTE_STRING "br##\"\\" error: Invalid raw string literal`
`@ -1 +1 @@`
	BYTE_STRING "br##\"\\n" error: Missing trailing `"` with `#` symbols to terminate the raw byte string literal	`BYTE_STRING "br##\"\\n" error: Invalid raw string literal`
`@ -1 +1 @@`
	BYTE_STRING "br##\" " error: Missing trailing `"` with `#` symbols to terminate the raw byte string literal	`BYTE_STRING "br##\" " error: Invalid raw string literal`
`@ -1 +1 @@`
	BYTE_STRING "br##\"\\u{20AA}" error: Missing trailing `"` with `#` symbols to terminate the raw byte string literal	`BYTE_STRING "br##\"\\u{20AA}" error: Invalid raw string literal`
`@ -1 +1 @@`
	STRING "r##\"" error: Missing trailing `"` with `#` symbols to terminate the raw string literal	`STRING "r##\"" error: Invalid raw string literal`
`@ -1 +1 @@`
	STRING "r##\"\\x7f" error: Missing trailing `"` with `#` symbols to terminate the raw string literal	`STRING "r##\"\\x7f" error: Invalid raw string literal`
`@ -1 +1 @@`
	STRING "r##\"🦀" error: Missing trailing `"` with `#` symbols to terminate the raw string literal	`STRING "r##\"🦀" error: Invalid raw string literal`