Handle str literals written with ' lexed as lifetime

Given `'hello world'` and `'1 str', provide a structured suggestion for a valid string literal: ``` error[E0762]: unterminated character literal --> $DIR/lex-bad-str-literal-as-char-3.rs:2:26 | LL | println!('hello world'); | ^^^^ | help: if you meant to write a `str` literal, use double quotes | LL | println!("hello world"); | ~ ~ ``` ``` error[E0762]: unterminated character literal --> $DIR/lex-bad-str-literal-as-char-1.rs:2:20 | LL | println!('1 + 1'); | ^^^^ | help: if you meant to write a `str` literal, use double quotes | LL | println!("1 + 1"); | ~ ~ ``` Fix #119685.
2024-03-09 00:35:57 +00:00 · 2024-03-09 00:35:57 +00:00 · 982918f493
commit 982918f493
parent 22e241e32e
13 changed files with 130 additions and 5 deletions
--- a/compiler/rustc_lexer/src/cursor.rs
+++ b/compiler/rustc_lexer/src/cursor.rs
@ -46,7 +46,7 @@ pub(crate) fn prev(&self) -> char {
    /// If requested position doesn't exist, `EOF_CHAR` is returned.
    /// However, getting `EOF_CHAR` doesn't always mean actual end of file,
    /// it should be checked with `is_eof` method.
-    pub(crate) fn first(&self) -> char {
+    pub fn first(&self) -> char {
        // `.next()` optimizes better than `.nth(0)`
        self.chars.clone().next().unwrap_or(EOF_CHAR)
    }
--- a/compiler/rustc_parse/messages.ftl
+++ b/compiler/rustc_parse/messages.ftl
@ -835,6 +835,7 @@ parse_unknown_prefix = prefix `{$prefix}` is unknown
    .label = unknown prefix
    .note =  prefixed identifiers and literals are reserved since Rust 2021
    .suggestion_br = use `br` for a raw byte string
    .suggestion_str = if you meant to write a `str` literal, use double quotes
    .suggestion_whitespace = consider inserting whitespace here
 parse_unknown_start_of_token = unknown start of token: {$escaped}
--- a/compiler/rustc_parse/src/errors.rs
+++ b/compiler/rustc_parse/src/errors.rs
@ -1994,6 +1994,17 @@ pub enum UnknownPrefixSugg {
        style = "verbose"
    )]
    Whitespace(#[primary_span] Span),
    #[multipart_suggestion(
        parse_suggestion_str,
        applicability = "maybe-incorrect",
        style = "verbose"
    )]
    MeantStr {
        #[suggestion_part(code = "\"")]
        start: Span,
        #[suggestion_part(code = "\"")]
        end: Span,
    },
 }
 #[derive(Diagnostic)]
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@ -63,6 +63,7 @@ pub(crate) fn parse_token_trees<'psess, 'src>(
        cursor,
        override_span,
        nbsp_is_whitespace: false,
        last_lifetime: None,
    };
    let (stream, res, unmatched_delims) =
        tokentrees::TokenTreesReader::parse_all_token_trees(string_reader);
@ -105,6 +106,10 @@ struct StringReader<'psess, 'src> {
    /// in this file, it's safe to treat further occurrences of the non-breaking
    /// space character as whitespace.
    nbsp_is_whitespace: bool,
    /// Track the `Span` for the leading `'` of the last lifetime. Used for
    /// diagnostics to detect possible typo where `"` was meant.
    last_lifetime: Option<Span>,
 }
 impl<'psess, 'src> StringReader<'psess, 'src> {
@ -130,6 +135,18 @@ fn next_token(&mut self) -> (Token, bool) {
            debug!("next_token: {:?}({:?})", token.kind, self.str_from(start));
            if let rustc_lexer::TokenKind::Semi
            | rustc_lexer::TokenKind::LineComment { .. }
            | rustc_lexer::TokenKind::BlockComment { .. }
            | rustc_lexer::TokenKind::CloseParen
            | rustc_lexer::TokenKind::CloseBrace
            | rustc_lexer::TokenKind::CloseBracket = token.kind
            {
                // Heuristic: we assume that it is unlikely we're dealing with an unterminated
                // string surrounded by single quotes.
                self.last_lifetime = None;
            }
            // Now "cook" the token, converting the simple `rustc_lexer::TokenKind` enum into a
            // rich `rustc_ast::TokenKind`. This turns strings into interned symbols and runs
            // additional validation.
@ -247,6 +264,7 @@ fn next_token(&mut self) -> (Token, bool) {
                    // expansion purposes. See #12512 for the gory details of why
                    // this is necessary.
                    let lifetime_name = self.str_from(start);
                    self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1)));
                    if starts_with_number {
                        let span = self.mk_sp(start, self.pos);
                        self.dcx().struct_err("lifetimes cannot start with a number")
@ -395,10 +413,21 @@ fn cook_lexer_literal(
        match kind {
            rustc_lexer::LiteralKind::Char { terminated } => {
                if !terminated {
-                    self.dcx()
+                    let mut err = self
                        .dcx()
                        .struct_span_fatal(self.mk_sp(start, end), "unterminated character literal")
-                        .with_code(E0762)
+                        .with_code(E0762);
-                        .emit()
+                    if let Some(lt_sp) = self.last_lifetime {
                        err.multipart_suggestion(
                            "if you meant to write a `str` literal, use double quotes",
                            vec![
                                (lt_sp, "\"".to_string()),
                                (self.mk_sp(start, start + BytePos(1)), "\"".to_string()),
                            ],
                            Applicability::MaybeIncorrect,
                        );
                    }
                    err.emit()
                }
                self.cook_unicode(token::Char, Mode::Char, start, end, 1, 1) // ' '
            }
@ -673,7 +702,16 @@ fn report_unknown_prefix(&self, start: BytePos) {
            let sugg = if prefix == "rb" {
                Some(errors::UnknownPrefixSugg::UseBr(prefix_span))
            } else if expn_data.is_root() {
-                Some(errors::UnknownPrefixSugg::Whitespace(prefix_span.shrink_to_hi()))
+                if self.cursor.first() == '\''
                    && let Some(start) = self.last_lifetime
                {
                    Some(errors::UnknownPrefixSugg::MeantStr {
                        start,
                        end: self.mk_sp(self.pos, self.pos + BytePos(1)),
                    })
                } else {
                    Some(errors::UnknownPrefixSugg::Whitespace(prefix_span.shrink_to_hi()))
                }
            } else {
                None
            };
--- a/tests/ui/lexer/lex-bad-str-literal-as-char-1.fixed
+++ b/tests/ui/lexer/lex-bad-str-literal-as-char-1.fixed
@ -0,0 +1,6 @@
 //@ run-rustfix
 fn main() {
    println!("1 + 1");
    //~^ ERROR unterminated character literal
    //~| ERROR lifetimes cannot start with a number
 }
--- a/tests/ui/lexer/lex-bad-str-literal-as-char-1.rs
+++ b/tests/ui/lexer/lex-bad-str-literal-as-char-1.rs
@ -0,0 +1,6 @@
 //@ run-rustfix
 fn main() {
    println!('1 + 1');
    //~^ ERROR unterminated character literal
    //~| ERROR lifetimes cannot start with a number
 }
--- a/tests/ui/lexer/lex-bad-str-literal-as-char-1.stderr
+++ b/tests/ui/lexer/lex-bad-str-literal-as-char-1.stderr
@ -0,0 +1,20 @@
 error[E0762]: unterminated character literal
  --> $DIR/lex-bad-str-literal-as-char-1.rs:3:20
   |
 LL |     println!('1 + 1');
   |                    ^^^
   |
 help: if you meant to write a `str` literal, use double quotes
   |
 LL |     println!("1 + 1");
   |              ~     ~
 error: lifetimes cannot start with a number
  --> $DIR/lex-bad-str-literal-as-char-1.rs:3:14
   |
 LL |     println!('1 + 1');
   |              ^^
 error: aborting due to 2 previous errors
 For more information about this error, try `rustc --explain E0762`.
--- a/tests/ui/lexer/lex-bad-str-literal-as-char-2.fixed
+++ b/tests/ui/lexer/lex-bad-str-literal-as-char-2.fixed
@ -0,0 +1,4 @@
 //@ run-rustfix
 fn main() {
    println!(" 1 + 1"); //~ ERROR character literal may only contain one codepoint
 }
--- a/tests/ui/lexer/lex-bad-str-literal-as-char-2.rs
+++ b/tests/ui/lexer/lex-bad-str-literal-as-char-2.rs
@ -0,0 +1,4 @@
 //@ run-rustfix
 fn main() {
    println!(' 1 + 1'); //~ ERROR character literal may only contain one codepoint
 }
--- a/tests/ui/lexer/lex-bad-str-literal-as-char-2.stderr
+++ b/tests/ui/lexer/lex-bad-str-literal-as-char-2.stderr
@ -0,0 +1,13 @@
 error: character literal may only contain one codepoint
  --> $DIR/lex-bad-str-literal-as-char-2.rs:3:14
   |
 LL |     println!(' 1 + 1');
   |              ^^^^^^^^
   |
 help: if you meant to write a `str` literal, use double quotes
   |
 LL |     println!(" 1 + 1");
   |              ~~~~~~~~
 error: aborting due to 1 previous error
--- a/tests/ui/lexer/lex-bad-str-literal-as-char-3.fixed
+++ b/tests/ui/lexer/lex-bad-str-literal-as-char-3.fixed
@ -0,0 +1,4 @@
 //@ run-rustfix
 fn main() {
    println!("hello world"); //~ ERROR unterminated character literal
 }
--- a/tests/ui/lexer/lex-bad-str-literal-as-char-3.rs
+++ b/tests/ui/lexer/lex-bad-str-literal-as-char-3.rs
@ -0,0 +1,4 @@
 //@ run-rustfix
 fn main() {
    println!('hello world'); //~ ERROR unterminated character literal
 }
--- a/tests/ui/lexer/lex-bad-str-literal-as-char-3.stderr
+++ b/tests/ui/lexer/lex-bad-str-literal-as-char-3.stderr
@ -0,0 +1,14 @@
 error[E0762]: unterminated character literal
  --> $DIR/lex-bad-str-literal-as-char-3.rs:3:26
   |
 LL |     println!('hello world');
   |                          ^^^^
   |
 help: if you meant to write a `str` literal, use double quotes
   |
 LL |     println!("hello world");
   |              ~           ~
 error: aborting due to 1 previous error
 For more information about this error, try `rustc --explain E0762`.