Rollup merge of #125117 - dev-ardi:improve-parser, r=wesleywiser,fmease

Improve parser

Fixes #124935.

- Add a few more help diagnostics to incorrect semicolons
- Overall improved that function
- Addded a few comments
- Renamed diff_marker fns to git_diff_marker
This commit is contained in:
Matthias Krüger 2024-05-18 18:44:14 +02:00 committed by GitHub
commit f9bf759e83
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 81 additions and 70 deletions

View File

@ -3262,6 +3262,7 @@ pub enum ItemKind {
} }
impl ItemKind { impl ItemKind {
/// "a" or "an"
pub fn article(&self) -> &'static str { pub fn article(&self) -> &'static str {
use ItemKind::*; use ItemKind::*;
match self { match self {

View File

@ -83,7 +83,7 @@ pub(crate) struct IncorrectSemicolon<'a> {
#[suggestion(style = "short", code = "", applicability = "machine-applicable")] #[suggestion(style = "short", code = "", applicability = "machine-applicable")]
pub span: Span, pub span: Span,
#[help] #[help]
pub opt_help: Option<()>, pub show_help: bool,
pub name: &'a str, pub name: &'a str,
} }

View File

@ -241,7 +241,7 @@ impl<'psess, 'src> TokenTreesReader<'psess, 'src> {
// we have no way of tracking this in the lexer itself, so we piggyback on the parser // we have no way of tracking this in the lexer itself, so we piggyback on the parser
let mut in_cond = false; let mut in_cond = false;
while parser.token != token::Eof { while parser.token != token::Eof {
if let Err(diff_err) = parser.err_diff_marker() { if let Err(diff_err) = parser.err_vcs_conflict_marker() {
diff_errs.push(diff_err); diff_errs.push(diff_err);
} else if parser.is_keyword_ahead(0, &[kw::If, kw::While]) { } else if parser.is_keyword_ahead(0, &[kw::If, kw::While]) {
in_cond = true; in_cond = true;

View File

@ -1817,34 +1817,31 @@ impl<'a> Parser<'a> {
Ok(P(T::recovered(Some(P(QSelf { ty, path_span, position: 0 })), path))) Ok(P(T::recovered(Some(P(QSelf { ty, path_span, position: 0 })), path)))
} }
pub fn maybe_consume_incorrect_semicolon(&mut self, items: &[P<Item>]) -> bool { /// This function gets called in places where a semicolon is NOT expected and if there's a
if self.token.kind == TokenKind::Semi { /// semicolon it emits the appropriate error and returns true.
self.bump(); pub fn maybe_consume_incorrect_semicolon(&mut self, previous_item: Option<&Item>) -> bool {
if self.token.kind != TokenKind::Semi {
return false;
}
let mut err = // Check previous item to add it to the diagnostic, for example to say
IncorrectSemicolon { span: self.prev_token.span, opt_help: None, name: "" }; // `enum declarations are not followed by a semicolon`
let err = match previous_item {
if !items.is_empty() { Some(previous_item) => {
let previous_item = &items[items.len() - 1]; let name = match previous_item.kind {
let previous_item_kind_name = match previous_item.kind {
// Say "braced struct" because tuple-structs and // Say "braced struct" because tuple-structs and
// braceless-empty-struct declarations do take a semicolon. // braceless-empty-struct declarations do take a semicolon.
ItemKind::Struct(..) => Some("braced struct"), ItemKind::Struct(..) => "braced struct",
ItemKind::Enum(..) => Some("enum"), _ => previous_item.kind.descr(),
ItemKind::Trait(..) => Some("trait"),
ItemKind::Union(..) => Some("union"),
_ => None,
}; };
if let Some(name) = previous_item_kind_name { IncorrectSemicolon { span: self.token.span, name, show_help: true }
err.opt_help = Some(());
err.name = name;
}
} }
self.dcx().emit_err(err); None => IncorrectSemicolon { span: self.token.span, name: "", show_help: false },
true };
} else { self.dcx().emit_err(err);
false
} self.bump();
true
} }
/// Creates a `Diag` for an unexpected token `t` and tries to recover if it is a /// Creates a `Diag` for an unexpected token `t` and tries to recover if it is a
@ -2957,13 +2954,23 @@ impl<'a> Parser<'a> {
err err
} }
pub fn is_diff_marker(&mut self, long_kind: &TokenKind, short_kind: &TokenKind) -> bool { /// This checks if this is a conflict marker, depending of the parameter passed.
///
/// * `>>>>>`
/// * `=====`
/// * `<<<<<`
///
pub fn is_vcs_conflict_marker(
&mut self,
long_kind: &TokenKind,
short_kind: &TokenKind,
) -> bool {
(0..3).all(|i| self.look_ahead(i, |tok| tok == long_kind)) (0..3).all(|i| self.look_ahead(i, |tok| tok == long_kind))
&& self.look_ahead(3, |tok| tok == short_kind) && self.look_ahead(3, |tok| tok == short_kind)
} }
fn diff_marker(&mut self, long_kind: &TokenKind, short_kind: &TokenKind) -> Option<Span> { fn conflict_marker(&mut self, long_kind: &TokenKind, short_kind: &TokenKind) -> Option<Span> {
if self.is_diff_marker(long_kind, short_kind) { if self.is_vcs_conflict_marker(long_kind, short_kind) {
let lo = self.token.span; let lo = self.token.span;
for _ in 0..4 { for _ in 0..4 {
self.bump(); self.bump();
@ -2973,15 +2980,16 @@ impl<'a> Parser<'a> {
None None
} }
pub fn recover_diff_marker(&mut self) { pub fn recover_vcs_conflict_marker(&mut self) {
if let Err(err) = self.err_diff_marker() { if let Err(err) = self.err_vcs_conflict_marker() {
err.emit(); err.emit();
FatalError.raise(); FatalError.raise();
} }
} }
pub fn err_diff_marker(&mut self) -> PResult<'a, ()> { pub fn err_vcs_conflict_marker(&mut self) -> PResult<'a, ()> {
let Some(start) = self.diff_marker(&TokenKind::BinOp(token::Shl), &TokenKind::Lt) else { let Some(start) = self.conflict_marker(&TokenKind::BinOp(token::Shl), &TokenKind::Lt)
else {
return Ok(()); return Ok(());
}; };
let mut spans = Vec::with_capacity(3); let mut spans = Vec::with_capacity(3);
@ -2993,13 +3001,15 @@ impl<'a> Parser<'a> {
if self.token.kind == TokenKind::Eof { if self.token.kind == TokenKind::Eof {
break; break;
} }
if let Some(span) = self.diff_marker(&TokenKind::OrOr, &TokenKind::BinOp(token::Or)) { if let Some(span) = self.conflict_marker(&TokenKind::OrOr, &TokenKind::BinOp(token::Or))
{
middlediff3 = Some(span); middlediff3 = Some(span);
} }
if let Some(span) = self.diff_marker(&TokenKind::EqEq, &TokenKind::Eq) { if let Some(span) = self.conflict_marker(&TokenKind::EqEq, &TokenKind::Eq) {
middle = Some(span); middle = Some(span);
} }
if let Some(span) = self.diff_marker(&TokenKind::BinOp(token::Shr), &TokenKind::Gt) { if let Some(span) = self.conflict_marker(&TokenKind::BinOp(token::Shr), &TokenKind::Gt)
{
spans.push(span); spans.push(span);
end = Some(span); end = Some(span);
break; break;

View File

@ -3734,7 +3734,7 @@ impl<'a> Parser<'a> {
/// Parses `ident (COLON expr)?`. /// Parses `ident (COLON expr)?`.
fn parse_expr_field(&mut self) -> PResult<'a, ExprField> { fn parse_expr_field(&mut self) -> PResult<'a, ExprField> {
let attrs = self.parse_outer_attributes()?; let attrs = self.parse_outer_attributes()?;
self.recover_diff_marker(); self.recover_vcs_conflict_marker();
self.collect_tokens_trailing_token(attrs, ForceCollect::No, |this, attrs| { self.collect_tokens_trailing_token(attrs, ForceCollect::No, |this, attrs| {
let lo = this.token.span; let lo = this.token.span;

View File

@ -49,6 +49,7 @@ impl<'a> Parser<'a> {
} }
/// Parses the contents of a module (inner attributes followed by module items). /// Parses the contents of a module (inner attributes followed by module items).
/// We exit once we hit `term`
pub fn parse_mod( pub fn parse_mod(
&mut self, &mut self,
term: &TokenKind, term: &TokenKind,
@ -59,13 +60,13 @@ impl<'a> Parser<'a> {
let post_attr_lo = self.token.span; let post_attr_lo = self.token.span;
let mut items = ThinVec::new(); let mut items = ThinVec::new();
while let Some(item) = self.parse_item(ForceCollect::No)? { while let Some(item) = self.parse_item(ForceCollect::No)? {
self.maybe_consume_incorrect_semicolon(Some(&item));
items.push(item); items.push(item);
self.maybe_consume_incorrect_semicolon(&items);
} }
if !self.eat(term) { if !self.eat(term) {
let token_str = super::token_descr(&self.token); let token_str = super::token_descr(&self.token);
if !self.maybe_consume_incorrect_semicolon(&items) { if !self.maybe_consume_incorrect_semicolon(items.last().map(|x| &**x)) {
let msg = format!("expected item, found {token_str}"); let msg = format!("expected item, found {token_str}");
let mut err = self.dcx().struct_span_err(self.token.span, msg); let mut err = self.dcx().struct_span_err(self.token.span, msg);
let span = self.token.span; let span = self.token.span;
@ -101,9 +102,9 @@ impl<'a> Parser<'a> {
fn_parse_mode: FnParseMode, fn_parse_mode: FnParseMode,
force_collect: ForceCollect, force_collect: ForceCollect,
) -> PResult<'a, Option<Item>> { ) -> PResult<'a, Option<Item>> {
self.recover_diff_marker(); self.recover_vcs_conflict_marker();
let attrs = self.parse_outer_attributes()?; let attrs = self.parse_outer_attributes()?;
self.recover_diff_marker(); self.recover_vcs_conflict_marker();
self.parse_item_common(attrs, true, false, fn_parse_mode, force_collect) self.parse_item_common(attrs, true, false, fn_parse_mode, force_collect)
} }
@ -194,12 +195,12 @@ impl<'a> Parser<'a> {
fn_parse_mode: FnParseMode, fn_parse_mode: FnParseMode,
case: Case, case: Case,
) -> PResult<'a, Option<ItemInfo>> { ) -> PResult<'a, Option<ItemInfo>> {
let def_final = def == &Defaultness::Final; let check_pub = def == &Defaultness::Final;
let mut def_ = || mem::replace(def, Defaultness::Final); let mut def_ = || mem::replace(def, Defaultness::Final);
let info = if self.eat_keyword_case(kw::Use, case) { let info = if self.eat_keyword_case(kw::Use, case) {
self.parse_use_item()? self.parse_use_item()?
} else if self.check_fn_front_matter(def_final, case) { } else if self.check_fn_front_matter(check_pub, case) {
// FUNCTION ITEM // FUNCTION ITEM
let (ident, sig, generics, body) = let (ident, sig, generics, body) =
self.parse_fn(attrs, fn_parse_mode, lo, vis, case)?; self.parse_fn(attrs, fn_parse_mode, lo, vis, case)?;
@ -310,7 +311,7 @@ impl<'a> Parser<'a> {
Ok(Some(info)) Ok(Some(info))
} }
fn recover_import_as_use(&mut self) -> PResult<'a, Option<(Ident, ItemKind)>> { fn recover_import_as_use(&mut self) -> PResult<'a, Option<ItemInfo>> {
let span = self.token.span; let span = self.token.span;
let token_name = super::token_descr(&self.token); let token_name = super::token_descr(&self.token);
let snapshot = self.create_snapshot_for_diagnostic(); let snapshot = self.create_snapshot_for_diagnostic();
@ -328,7 +329,7 @@ impl<'a> Parser<'a> {
} }
} }
fn parse_use_item(&mut self) -> PResult<'a, (Ident, ItemKind)> { fn parse_use_item(&mut self) -> PResult<'a, ItemInfo> {
let tree = self.parse_use_tree()?; let tree = self.parse_use_tree()?;
if let Err(mut e) = self.expect_semi() { if let Err(mut e) = self.expect_semi() {
match tree.kind { match tree.kind {
@ -738,7 +739,7 @@ impl<'a> Parser<'a> {
if self.recover_doc_comment_before_brace() { if self.recover_doc_comment_before_brace() {
continue; continue;
} }
self.recover_diff_marker(); self.recover_vcs_conflict_marker();
match parse_item(self) { match parse_item(self) {
Ok(None) => { Ok(None) => {
let mut is_unnecessary_semicolon = !items.is_empty() let mut is_unnecessary_semicolon = !items.is_empty()
@ -1085,7 +1086,7 @@ impl<'a> Parser<'a> {
/// ``` /// ```
fn parse_use_tree_list(&mut self) -> PResult<'a, ThinVec<(UseTree, ast::NodeId)>> { fn parse_use_tree_list(&mut self) -> PResult<'a, ThinVec<(UseTree, ast::NodeId)>> {
self.parse_delim_comma_seq(Delimiter::Brace, |p| { self.parse_delim_comma_seq(Delimiter::Brace, |p| {
p.recover_diff_marker(); p.recover_vcs_conflict_marker();
Ok((p.parse_use_tree()?, DUMMY_NODE_ID)) Ok((p.parse_use_tree()?, DUMMY_NODE_ID))
}) })
.map(|(r, _)| r) .map(|(r, _)| r)
@ -1512,9 +1513,9 @@ impl<'a> Parser<'a> {
} }
fn parse_enum_variant(&mut self, span: Span) -> PResult<'a, Option<Variant>> { fn parse_enum_variant(&mut self, span: Span) -> PResult<'a, Option<Variant>> {
self.recover_diff_marker(); self.recover_vcs_conflict_marker();
let variant_attrs = self.parse_outer_attributes()?; let variant_attrs = self.parse_outer_attributes()?;
self.recover_diff_marker(); self.recover_vcs_conflict_marker();
let help = "enum variants can be `Variant`, `Variant = <integer>`, \ let help = "enum variants can be `Variant`, `Variant = <integer>`, \
`Variant(Type, ..., TypeN)` or `Variant { fields: Types }`"; `Variant(Type, ..., TypeN)` or `Variant { fields: Types }`";
self.collect_tokens_trailing_token( self.collect_tokens_trailing_token(
@ -1703,6 +1704,10 @@ impl<'a> Parser<'a> {
Ok((class_name, ItemKind::Union(vdata, generics))) Ok((class_name, ItemKind::Union(vdata, generics)))
} }
/// This function parses the fields of record structs:
///
/// - `struct S { ... }`
/// - `enum E { Variant { ... } }`
pub(crate) fn parse_record_struct_body( pub(crate) fn parse_record_struct_body(
&mut self, &mut self,
adt_ty: &str, adt_ty: &str,
@ -1729,19 +1734,10 @@ impl<'a> Parser<'a> {
self.eat(&token::CloseDelim(Delimiter::Brace)); self.eat(&token::CloseDelim(Delimiter::Brace));
} else { } else {
let token_str = super::token_descr(&self.token); let token_str = super::token_descr(&self.token);
let msg = format!( let where_str = if parsed_where { "" } else { "`where`, or " };
"expected {}`{{` after struct name, found {}", let msg = format!("expected {where_str}`{{` after struct name, found {token_str}");
if parsed_where { "" } else { "`where`, or " },
token_str
);
let mut err = self.dcx().struct_span_err(self.token.span, msg); let mut err = self.dcx().struct_span_err(self.token.span, msg);
err.span_label( err.span_label(self.token.span, format!("expected {where_str}`{{` after struct name",));
self.token.span,
format!(
"expected {}`{{` after struct name",
if parsed_where { "" } else { "`where`, or " }
),
);
return Err(err); return Err(err);
} }
@ -1755,7 +1751,7 @@ impl<'a> Parser<'a> {
let attrs = p.parse_outer_attributes()?; let attrs = p.parse_outer_attributes()?;
p.collect_tokens_trailing_token(attrs, ForceCollect::No, |p, attrs| { p.collect_tokens_trailing_token(attrs, ForceCollect::No, |p, attrs| {
let mut snapshot = None; let mut snapshot = None;
if p.is_diff_marker(&TokenKind::BinOp(token::Shl), &TokenKind::Lt) { if p.is_vcs_conflict_marker(&TokenKind::BinOp(token::Shl), &TokenKind::Lt) {
// Account for `<<<<<<<` diff markers. We can't proactively error here because // Account for `<<<<<<<` diff markers. We can't proactively error here because
// that can be a valid type start, so we snapshot and reparse only we've // that can be a valid type start, so we snapshot and reparse only we've
// encountered another parse error. // encountered another parse error.
@ -1766,7 +1762,7 @@ impl<'a> Parser<'a> {
Ok(vis) => vis, Ok(vis) => vis,
Err(err) => { Err(err) => {
if let Some(ref mut snapshot) = snapshot { if let Some(ref mut snapshot) = snapshot {
snapshot.recover_diff_marker(); snapshot.recover_vcs_conflict_marker();
} }
return Err(err); return Err(err);
} }
@ -1775,7 +1771,7 @@ impl<'a> Parser<'a> {
Ok(ty) => ty, Ok(ty) => ty,
Err(err) => { Err(err) => {
if let Some(ref mut snapshot) = snapshot { if let Some(ref mut snapshot) = snapshot {
snapshot.recover_diff_marker(); snapshot.recover_vcs_conflict_marker();
} }
return Err(err); return Err(err);
} }
@ -1800,9 +1796,9 @@ impl<'a> Parser<'a> {
/// Parses an element of a struct declaration. /// Parses an element of a struct declaration.
fn parse_field_def(&mut self, adt_ty: &str) -> PResult<'a, FieldDef> { fn parse_field_def(&mut self, adt_ty: &str) -> PResult<'a, FieldDef> {
self.recover_diff_marker(); self.recover_vcs_conflict_marker();
let attrs = self.parse_outer_attributes()?; let attrs = self.parse_outer_attributes()?;
self.recover_diff_marker(); self.recover_vcs_conflict_marker();
self.collect_tokens_trailing_token(attrs, ForceCollect::No, |this, attrs| { self.collect_tokens_trailing_token(attrs, ForceCollect::No, |this, attrs| {
let lo = this.token.span; let lo = this.token.span;
let vis = this.parse_visibility(FollowedByType::No)?; let vis = this.parse_visibility(FollowedByType::No)?;
@ -2662,7 +2658,7 @@ impl<'a> Parser<'a> {
} }
let (mut params, _) = self.parse_paren_comma_seq(|p| { let (mut params, _) = self.parse_paren_comma_seq(|p| {
p.recover_diff_marker(); p.recover_vcs_conflict_marker();
let snapshot = p.create_snapshot_for_diagnostic(); let snapshot = p.create_snapshot_for_diagnostic();
let param = p.parse_param_general(req_name, first_param).or_else(|e| { let param = p.parse_param_general(req_name, first_param).or_else(|e| {
let guar = e.emit(); let guar = e.emit();

View File

@ -567,7 +567,7 @@ impl<'a> Parser<'a> {
if self.token == token::Eof { if self.token == token::Eof {
break; break;
} }
if self.is_diff_marker(&TokenKind::BinOp(token::Shl), &TokenKind::Lt) { if self.is_vcs_conflict_marker(&TokenKind::BinOp(token::Shl), &TokenKind::Lt) {
// Account for `<<<<<<<` diff markers. We can't proactively error here because // Account for `<<<<<<<` diff markers. We can't proactively error here because
// that can be a valid path start, so we snapshot and reparse only we've // that can be a valid path start, so we snapshot and reparse only we've
// encountered another parse error. // encountered another parse error.
@ -576,7 +576,7 @@ impl<'a> Parser<'a> {
let stmt = match self.parse_full_stmt(recover) { let stmt = match self.parse_full_stmt(recover) {
Err(mut err) if recover.yes() => { Err(mut err) if recover.yes() => {
if let Some(ref mut snapshot) = snapshot { if let Some(ref mut snapshot) = snapshot {
snapshot.recover_diff_marker(); snapshot.recover_vcs_conflict_marker();
} }
if self.token == token::Colon { if self.token == token::Colon {
// if a previous and next token of the current one is // if a previous and next token of the current one is

View File

@ -684,9 +684,9 @@ pub(crate) fn make_test(
} }
} }
// The supplied slice is only used for diagnostics, // The supplied item is only used for diagnostics,
// which are swallowed here anyway. // which are swallowed here anyway.
parser.maybe_consume_incorrect_semicolon(&[]); parser.maybe_consume_incorrect_semicolon(None);
} }
// Reset errors so that they won't be reported as compiler bugs when dropping the // Reset errors so that they won't be reported as compiler bugs when dropping the

View File

@ -3,6 +3,8 @@ error: expected item, found `;`
| |
LL | mod M {}; LL | mod M {};
| ^ help: remove this semicolon | ^ help: remove this semicolon
|
= help: module declarations are not followed by a semicolon
error: expected item, found `;` error: expected item, found `;`
--> $DIR/recover-from-semicolon-trailing-item.rs:4:12 --> $DIR/recover-from-semicolon-trailing-item.rs:4:12
@ -17,6 +19,8 @@ error: expected item, found `;`
| |
LL | fn foo(a: usize) {}; LL | fn foo(a: usize) {};
| ^ help: remove this semicolon | ^ help: remove this semicolon
|
= help: function declarations are not followed by a semicolon
error[E0308]: mismatched types error[E0308]: mismatched types
--> $DIR/recover-from-semicolon-trailing-item.rs:10:20 --> $DIR/recover-from-semicolon-trailing-item.rs:10:20