diff --git a/compiler/rustc_parse/messages.ftl b/compiler/rustc_parse/messages.ftl index c39ada95a4e..96765c296e7 100644 --- a/compiler/rustc_parse/messages.ftl +++ b/compiler/rustc_parse/messages.ftl @@ -336,7 +336,7 @@ parse_expected_identifier_found_reserved_keyword = expected identifier, found re parse_expected_identifier_found_doc_comment = expected identifier, found doc comment parse_expected_identifier = expected identifier -parse_sugg_escape_to_use_as_identifier = escape `{$ident_name}` to use it as an identifier +parse_sugg_escape_identifier = escape `{$ident_name}` to use it as an identifier parse_sugg_remove_comma = remove this comma diff --git a/compiler/rustc_parse/src/errors.rs b/compiler/rustc_parse/src/errors.rs index af0c3026c66..a9d116012ae 100644 --- a/compiler/rustc_parse/src/errors.rs +++ b/compiler/rustc_parse/src/errors.rs @@ -888,12 +888,12 @@ pub(crate) struct InvalidMetaItem { #[derive(Subdiagnostic)] #[suggestion( - parse_sugg_escape_to_use_as_identifier, + parse_sugg_escape_identifier, style = "verbose", applicability = "maybe-incorrect", code = "r#" )] -pub(crate) struct SuggEscapeToUseAsIdentifier { +pub(crate) struct SuggEscapeIdentifier { #[primary_span] pub span: Span, pub ident_name: String, @@ -937,7 +937,7 @@ impl ExpectedIdentifierFound { pub(crate) struct ExpectedIdentifier { pub span: Span, pub token: Token, - pub suggest_raw: Option, + pub suggest_raw: Option, pub suggest_remove_comma: Option, pub help_cannot_start_number: Option, } @@ -986,7 +986,10 @@ impl<'a, G: EmissionGuarantee> IntoDiagnostic<'a, G> for ExpectedIdentifier { #[derive(Subdiagnostic)] #[help(parse_invalid_identifier_with_leading_number)] -pub(crate) struct HelpIdentifierStartsWithNumber; +pub(crate) struct HelpIdentifierStartsWithNumber { + #[primary_span] + pub num_span: Span, +} pub(crate) struct ExpectedSemi { pub span: Span, diff --git a/compiler/rustc_parse/src/parser/diagnostics.rs b/compiler/rustc_parse/src/parser/diagnostics.rs index 5b12bcc1822..9544afd3d6d 100644 --- a/compiler/rustc_parse/src/parser/diagnostics.rs +++ b/compiler/rustc_parse/src/parser/diagnostics.rs @@ -6,14 +6,14 @@ use super::{ use crate::errors::{ AmbiguousPlus, AttributeOnParamType, BadQPathStage2, BadTypePlus, BadTypePlusSub, ComparisonOperatorsCannotBeChained, ComparisonOperatorsCannotBeChainedSugg, - ConstGenericWithoutBraces, ConstGenericWithoutBracesSugg, DocCommentOnParamType, - DoubleColonInBound, ExpectedIdentifier, ExpectedSemi, ExpectedSemiSugg, + ConstGenericWithoutBraces, ConstGenericWithoutBracesSugg, DocCommentDoesNotDocumentAnything, + DocCommentOnParamType, DoubleColonInBound, ExpectedIdentifier, ExpectedSemi, ExpectedSemiSugg, GenericParamsWithoutAngleBrackets, GenericParamsWithoutAngleBracketsSugg, HelpIdentifierStartsWithNumber, InInTypo, IncorrectAwait, IncorrectSemicolon, IncorrectUseOfAwait, ParenthesesInForHead, ParenthesesInForHeadSugg, PatternMethodParamWithoutBody, QuestionMarkInType, QuestionMarkInTypeSugg, SelfParamNotFirst, StructLiteralBodyWithoutPath, StructLiteralBodyWithoutPathSugg, StructLiteralNeedingParens, - StructLiteralNeedingParensSugg, SuggEscapeToUseAsIdentifier, SuggRemoveComma, + StructLiteralNeedingParensSugg, SuggEscapeIdentifier, SuggRemoveComma, UnexpectedConstInGenericParam, UnexpectedConstParamDeclaration, UnexpectedConstParamDeclarationSugg, UnmatchedAngleBrackets, UseEqInstead, }; @@ -38,7 +38,7 @@ use rustc_errors::{ use rustc_session::errors::ExprParenthesesNeeded; use rustc_span::source_map::Spanned; use rustc_span::symbol::{kw, sym, Ident}; -use rustc_span::{Span, SpanSnippetError, DUMMY_SP}; +use rustc_span::{Span, SpanSnippetError, Symbol, DUMMY_SP}; use std::mem::take; use std::ops::{Deref, DerefMut}; use thin_vec::{thin_vec, ThinVec}; @@ -268,7 +268,21 @@ impl<'a> Parser<'a> { self.sess.source_map().span_to_snippet(span) } - pub(super) fn expected_ident_found(&mut self) -> DiagnosticBuilder<'a, ErrorGuaranteed> { + /// Emits an error with suggestions if an identifier was expected but not found. + /// + /// Returns a possibly recovered identifier. + pub(super) fn expected_ident_found( + &mut self, + recover: bool, + ) -> PResult<'a, (Ident, /* is_raw */ bool)> { + if let TokenKind::DocComment(..) = self.prev_token.kind { + return Err(DocCommentDoesNotDocumentAnything { + span: self.prev_token.span, + missing_comma: None, + } + .into_diagnostic(&self.sess.span_diagnostic)); + } + let valid_follow = &[ TokenKind::Eq, TokenKind::Colon, @@ -281,31 +295,51 @@ impl<'a> Parser<'a> { TokenKind::CloseDelim(Delimiter::Parenthesis), ]; - let suggest_raw = match self.token.ident() { - Some((ident, false)) - if ident.is_raw_guess() - && self.look_ahead(1, |t| valid_follow.contains(&t.kind)) => - { - Some(SuggEscapeToUseAsIdentifier { - span: ident.span.shrink_to_lo(), - // `Symbol::to_string()` is different from `Symbol::into_diagnostic_arg()`, - // which uses `Symbol::to_ident_string()` and "helpfully" adds an implicit `r#` - ident_name: ident.name.to_string(), - }) - } - _ => None, - }; + let mut recovered_ident = None; + // we take this here so that the correct original token is retained in + // the diagnostic, regardless of eager recovery. + let bad_token = self.token.clone(); - let suggest_remove_comma = (self.token == token::Comma - && self.look_ahead(1, |t| t.is_ident())) - .then_some(SuggRemoveComma { span: self.token.span }); + // suggest prepending a keyword in identifier position with `r#` + let suggest_raw = if let Some((ident, false)) = self.token.ident() + && ident.is_raw_guess() + && self.look_ahead(1, |t| valid_follow.contains(&t.kind)) + { + recovered_ident = Some((ident, true)); - let help_cannot_start_number = - self.is_lit_bad_ident().then_some(HelpIdentifierStartsWithNumber); + // `Symbol::to_string()` is different from `Symbol::into_diagnostic_arg()`, + // which uses `Symbol::to_ident_string()` and "helpfully" adds an implicit `r#` + let ident_name = ident.name.to_string(); + + Some(SuggEscapeIdentifier { + span: ident.span.shrink_to_lo(), + ident_name + }) + } else { None }; + + let suggest_remove_comma = + if self.token == token::Comma && self.look_ahead(1, |t| t.is_ident()) { + if recover { + self.bump(); + recovered_ident = self.ident_or_err(false).ok(); + }; + + Some(SuggRemoveComma { span: bad_token.span }) + } else { + None + }; + + let help_cannot_start_number = self.is_lit_bad_ident().map(|(len, valid_portion)| { + let (invalid, valid) = self.token.span.split_at(len as u32); + + recovered_ident = Some((Ident::new(valid_portion, valid), false)); + + HelpIdentifierStartsWithNumber { num_span: invalid } + }); let err = ExpectedIdentifier { - span: self.token.span, - token: self.token.clone(), + span: bad_token.span, + token: bad_token, suggest_raw, suggest_remove_comma, help_cannot_start_number, @@ -314,6 +348,7 @@ impl<'a> Parser<'a> { // if the token we have is a `<` // it *might* be a misplaced generic + // FIXME: could we recover with this? if self.token == token::Lt { // all keywords that could have generic applied let valid_prev_keywords = @@ -364,18 +399,38 @@ impl<'a> Parser<'a> { } } - err + if let Some(recovered_ident) = recovered_ident && recover { + err.emit(); + Ok(recovered_ident) + } else { + Err(err) + } + } + + pub(super) fn expected_ident_found_err(&mut self) -> DiagnosticBuilder<'a, ErrorGuaranteed> { + self.expected_ident_found(false).unwrap_err() } /// Checks if the current token is a integer or float literal and looks like /// it could be a invalid identifier with digits at the start. - pub(super) fn is_lit_bad_ident(&mut self) -> bool { - matches!(self.token.uninterpolate().kind, token::Literal(Lit { kind: token::LitKind::Integer | token::LitKind::Float, .. }) - // ensure that the integer literal is followed by a *invalid* - // suffix: this is how we know that it is a identifier with an - // invalid beginning. - if rustc_ast::MetaItemLit::from_token(&self.token).is_none() - ) + /// + /// Returns the number of characters (bytes) composing the invalid portion + /// of the identifier and the valid portion of the identifier. + pub(super) fn is_lit_bad_ident(&mut self) -> Option<(usize, Symbol)> { + // ensure that the integer literal is followed by a *invalid* + // suffix: this is how we know that it is a identifier with an + // invalid beginning. + if let token::Literal(Lit { + kind: token::LitKind::Integer | token::LitKind::Float, + symbol, + suffix, + }) = self.token.kind + && rustc_ast::MetaItemLit::from_token(&self.token).is_none() + { + Some((symbol.as_str().len(), suffix.unwrap())) + } else { + None + } } pub(super) fn expected_one_of_not_found( diff --git a/compiler/rustc_parse/src/parser/item.rs b/compiler/rustc_parse/src/parser/item.rs index 85cc8ca02a9..ae8fe90e9d6 100644 --- a/compiler/rustc_parse/src/parser/item.rs +++ b/compiler/rustc_parse/src/parser/item.rs @@ -1181,7 +1181,7 @@ impl<'a> Parser<'a> { defaultness: Defaultness, ) -> PResult<'a, ItemInfo> { let impl_span = self.token.span; - let mut err = self.expected_ident_found(); + let mut err = self.expected_ident_found_err(); // Only try to recover if this is implementing a trait for a type let mut impl_info = match self.parse_item_impl(attrs, defaultness) { @@ -1744,7 +1744,7 @@ impl<'a> Parser<'a> { /// Parses a field identifier. Specialized version of `parse_ident_common` /// for better diagnostics and suggestions. fn parse_field_ident(&mut self, adt_ty: &str, lo: Span) -> PResult<'a, Ident> { - let (ident, is_raw) = self.ident_or_err()?; + let (ident, is_raw) = self.ident_or_err(true)?; if !is_raw && ident.is_reserved() { let snapshot = self.create_snapshot_for_diagnostic(); let err = if self.check_fn_front_matter(false, Case::Sensitive) { @@ -1776,7 +1776,7 @@ impl<'a> Parser<'a> { Err(err) => { err.cancel(); self.restore_snapshot(snapshot); - self.expected_ident_found() + self.expected_ident_found_err() } } } else if self.eat_keyword(kw::Struct) { @@ -1792,11 +1792,11 @@ impl<'a> Parser<'a> { Err(err) => { err.cancel(); self.restore_snapshot(snapshot); - self.expected_ident_found() + self.expected_ident_found_err() } } } else { - let mut err = self.expected_ident_found(); + let mut err = self.expected_ident_found_err(); if self.eat_keyword_noexpect(kw::Let) && let removal_span = self.prev_token.span.until(self.token.span) && let Ok(ident) = self.parse_ident_common(false) diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 3251dd6d0c6..53c25a80c4b 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -42,8 +42,7 @@ use thin_vec::ThinVec; use tracing::debug; use crate::errors::{ - DocCommentDoesNotDocumentAnything, IncorrectVisibilityRestriction, MismatchedClosingDelimiter, - NonStringAbiLiteral, + IncorrectVisibilityRestriction, MismatchedClosingDelimiter, NonStringAbiLiteral, }; bitflags::bitflags! { @@ -552,21 +551,11 @@ impl<'a> Parser<'a> { self.parse_ident_common(true) } - fn ident_or_err(&mut self) -> PResult<'a, (Ident, /* is_raw */ bool)> { - self.token.ident().ok_or_else(|| match self.prev_token.kind { - TokenKind::DocComment(..) => DocCommentDoesNotDocumentAnything { - span: self.prev_token.span, - missing_comma: None, - } - .into_diagnostic(&self.sess.span_diagnostic), - _ => self.expected_ident_found(), - }) - } - fn parse_ident_common(&mut self, recover: bool) -> PResult<'a, Ident> { - let (ident, is_raw) = self.ident_or_err()?; + let (ident, is_raw) = self.ident_or_err(recover)?; + if !is_raw && ident.is_reserved() { - let mut err = self.expected_ident_found(); + let mut err = self.expected_ident_found_err(); if recover { err.emit(); } else { @@ -577,6 +566,21 @@ impl<'a> Parser<'a> { Ok(ident) } + fn ident_or_err(&mut self, recover: bool) -> PResult<'a, (Ident, /* is_raw */ bool)> { + let result = self.token.ident().ok_or_else(|| self.expected_ident_found(recover)); + + let (ident, is_raw) = match result { + Ok(ident) => ident, + Err(err) => match err { + // we recovered! + Ok(ident) => ident, + Err(err) => return Err(err), + }, + }; + + Ok((ident, is_raw)) + } + /// Checks if the next token is `tok`, and returns `true` if so. /// /// This method will automatically add `tok` to `expected_tokens` if `tok` is not diff --git a/compiler/rustc_parse/src/parser/pat.rs b/compiler/rustc_parse/src/parser/pat.rs index fc9f1d1330a..2246002f5d3 100644 --- a/compiler/rustc_parse/src/parser/pat.rs +++ b/compiler/rustc_parse/src/parser/pat.rs @@ -348,10 +348,6 @@ impl<'a> Parser<'a> { lo = self.token.span; } - if self.is_lit_bad_ident() { - return Err(self.expected_ident_found()); - } - let pat = if self.check(&token::BinOp(token::And)) || self.token.kind == token::AndAnd { self.parse_pat_deref(expected)? } else if self.check(&token::OpenDelim(Delimiter::Parenthesis)) { @@ -395,7 +391,13 @@ impl<'a> Parser<'a> { } else { PatKind::Lit(const_expr) } - } else if self.can_be_ident_pat() { + // Don't eagerly error on semantically invalid tokens when matching + // declarative macros, as the input to those doesn't have to be + // semantically valid. For attribute/derive proc macros this is not the + // case, so doing the recovery for them is fine. + } else if self.can_be_ident_pat() + || (self.is_lit_bad_ident().is_some() && self.may_recover()) + { // Parse `ident @ pat` // This can give false positives and parse nullary enums, // they are dealt with later in resolve. @@ -594,7 +596,7 @@ impl<'a> Parser<'a> { // Make sure we don't allow e.g. `let mut $p;` where `$p:pat`. if let token::Interpolated(nt) = &self.token.kind { if let token::NtPat(_) = **nt { - self.expected_ident_found().emit(); + self.expected_ident_found_err().emit(); } } diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index 873cd33f6a4..02cffc762be 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -795,6 +795,18 @@ impl Span { }) } + /// Splits a span into two composite spans around a certain position. + pub fn split_at(self, pos: u32) -> (Span, Span) { + let len = self.hi().0 - self.lo().0; + debug_assert!(pos <= len); + + let split_pos = BytePos(self.lo().0 + pos); + ( + Span::new(self.lo(), split_pos, self.ctxt(), self.parent()), + Span::new(split_pos, self.hi(), self.ctxt(), self.parent()), + ) + } + /// Returns a `Span` that would enclose both `self` and `end`. /// /// Note that this can also be used to extend the span "backwards": diff --git a/tests/ui/parser/ident-recovery.rs b/tests/ui/parser/ident-recovery.rs new file mode 100644 index 00000000000..7575372b940 --- /dev/null +++ b/tests/ui/parser/ident-recovery.rs @@ -0,0 +1,16 @@ +fn ,comma() { + //~^ ERROR expected identifier, found `,` + struct Foo { + x: i32,, + //~^ ERROR expected identifier, found `,` + y: u32, + } +} + +fn break() { +//~^ ERROR expected identifier, found keyword `break` + let continue = 5; + //~^ ERROR expected identifier, found keyword `continue` +} + +fn main() {} diff --git a/tests/ui/parser/ident-recovery.stderr b/tests/ui/parser/ident-recovery.stderr new file mode 100644 index 00000000000..e9a55026d12 --- /dev/null +++ b/tests/ui/parser/ident-recovery.stderr @@ -0,0 +1,42 @@ +error: expected identifier, found `,` + --> $DIR/ident-recovery.rs:1:4 + | +LL | fn ,comma() { + | ^ + | | + | expected identifier + | help: remove this comma + +error: expected identifier, found `,` + --> $DIR/ident-recovery.rs:4:16 + | +LL | x: i32,, + | ^ + | | + | expected identifier + | help: remove this comma + +error: expected identifier, found keyword `break` + --> $DIR/ident-recovery.rs:10:4 + | +LL | fn break() { + | ^^^^^ expected identifier, found keyword + | +help: escape `break` to use it as an identifier + | +LL | fn r#break() { + | ++ + +error: expected identifier, found keyword `continue` + --> $DIR/ident-recovery.rs:12:9 + | +LL | let continue = 5; + | ^^^^^^^^ expected identifier, found keyword + | +help: escape `continue` to use it as an identifier + | +LL | let r#continue = 5; + | ++ + +error: aborting due to 4 previous errors + diff --git a/tests/ui/parser/integer-literal-start-ident.stderr b/tests/ui/parser/integer-literal-start-ident.stderr index 51c37a0d24c..b2c66129656 100644 --- a/tests/ui/parser/integer-literal-start-ident.stderr +++ b/tests/ui/parser/integer-literal-start-ident.stderr @@ -4,7 +4,11 @@ error: expected identifier, found `1main` LL | fn 1main() {} | ^^^^^ expected identifier | - = help: identifiers cannot start with a number +help: identifiers cannot start with a number + --> $DIR/integer-literal-start-ident.rs:1:4 + | +LL | fn 1main() {} + | ^ error: aborting due to previous error diff --git a/tests/ui/parser/issues/issue-104088.rs b/tests/ui/parser/issues/issue-104088.rs index 86988c8cd21..3dc636b6a33 100644 --- a/tests/ui/parser/issues/issue-104088.rs +++ b/tests/ui/parser/issues/issue-104088.rs @@ -1,26 +1,19 @@ -fn test() { +fn 1234test() { +//~^ ERROR expected identifier, found `1234test` if let 123 = 123 { println!("yes"); } -} -fn test_2() { + if let 2e1 = 123 { + //~^ ERROR mismatched types + } + + let 23name = 123; + //~^ ERROR expected identifier, found `23name` + + let 2x: i32 = 123; + //~^ ERROR expected identifier, found `2x` + let 1x = 123; //~^ ERROR expected identifier, found `1x` } -fn test_3() { - let 2x: i32 = 123; - //~^ ERROR expected identifier, found `2x` -} - -fn test_4() { - if let 2e1 = 123 { - //~^ ERROR mismatched types - } -} - -fn test_5() { - let 23name = 123; - //~^ ERROR expected identifier, found `23name` -} - fn main() {} diff --git a/tests/ui/parser/issues/issue-104088.stderr b/tests/ui/parser/issues/issue-104088.stderr index 6511a313149..8b751759d69 100644 --- a/tests/ui/parser/issues/issue-104088.stderr +++ b/tests/ui/parser/issues/issue-104088.stderr @@ -1,35 +1,59 @@ -error: expected identifier, found `1x` - --> $DIR/issue-104088.rs:6:9 +error: expected identifier, found `1234test` + --> $DIR/issue-104088.rs:1:4 | -LL | let 1x = 123; - | ^^ expected identifier +LL | fn 1234test() { + | ^^^^^^^^ expected identifier | - = help: identifiers cannot start with a number - -error: expected identifier, found `2x` - --> $DIR/issue-104088.rs:11:9 +help: identifiers cannot start with a number + --> $DIR/issue-104088.rs:1:4 | -LL | let 2x: i32 = 123; - | ^^ expected identifier - | - = help: identifiers cannot start with a number +LL | fn 1234test() { + | ^^^^ error: expected identifier, found `23name` - --> $DIR/issue-104088.rs:22:9 + --> $DIR/issue-104088.rs:9:9 | LL | let 23name = 123; | ^^^^^^ expected identifier | - = help: identifiers cannot start with a number +help: identifiers cannot start with a number + --> $DIR/issue-104088.rs:9:9 + | +LL | let 23name = 123; + | ^^ + +error: expected identifier, found `2x` + --> $DIR/issue-104088.rs:12:9 + | +LL | let 2x: i32 = 123; + | ^^ expected identifier + | +help: identifiers cannot start with a number + --> $DIR/issue-104088.rs:12:9 + | +LL | let 2x: i32 = 123; + | ^ + +error: expected identifier, found `1x` + --> $DIR/issue-104088.rs:15:9 + | +LL | let 1x = 123; + | ^^ expected identifier + | +help: identifiers cannot start with a number + --> $DIR/issue-104088.rs:15:9 + | +LL | let 1x = 123; + | ^ error[E0308]: mismatched types - --> $DIR/issue-104088.rs:16:12 + --> $DIR/issue-104088.rs:5:12 | LL | if let 2e1 = 123 { | ^^^ --- this expression has type `{integer}` | | | expected integer, found floating-point number -error: aborting due to 4 previous errors +error: aborting due to 5 previous errors For more information about this error, try `rustc --explain E0308`.