Auto merge of #114915 - nnethercote:Nonterminal-cleanups, r=petrochenkov

`Nonterminal`-related cleanups In #114647 I am trying to remove `Nonterminal`. It has a number of preliminary cleanups that are worth merging even if #114647 doesn't merge, so let's do them in this PR. r? `@petrochenkov`
2023-08-18 16:07:40 +00:00 · 2023-08-18 16:07:40 +00:00 · ee5cb9e3a6
commit ee5cb9e3a6
parent 2ceed0b6cb 9e22351c74
11 changed files with 120 additions and 92 deletions
--- a/compiler/rustc_ast_pretty/src/pprust/state.rs
+++ b/compiler/rustc_ast_pretty/src/pprust/state.rs
@ -150,6 +150,8 @@ pub fn print_crate<'a>(
 /// and also addresses some specific regressions described in #63896 and #73345.
 fn tt_prepend_space(tt: &TokenTree, prev: &TokenTree) -> bool {
    if let TokenTree::Token(token, _) = prev {
+        // No space after these tokens, e.g. `x.y`, `$e`
+        // (The carets point to `prev`.)       ^     ^
        if matches!(token.kind, token::Dot | token::Dollar) {
            return false;
        }
@ -158,10 +160,19 @@ fn tt_prepend_space(tt: &TokenTree, prev: &TokenTree) -> bool {
        }
    }
    match tt {
+        // No space before these tokens, e.g. `foo,`, `println!`, `x.y`
+        // (The carets point to `token`.)         ^           ^     ^
+        //
+        // FIXME: having `Not` here works well for macro invocations like
+        // `println!()`, but is bad when `!` means "logical not" or "the never
+        // type", where the lack of space causes ugliness like this:
+        // `Fn() ->!`, `x =! y`, `if! x { f(); }`.
        TokenTree::Token(token, _) => !matches!(token.kind, token::Comma | token::Not | token::Dot),
+        // No space before parentheses if preceded by these tokens, e.g. `foo(...)`
        TokenTree::Delimited(_, Delimiter::Parenthesis, _) => {
            !matches!(prev, TokenTree::Token(Token { kind: token::Ident(..), .. }, _))
        }
+        // No space before brackets if preceded by these tokens, e.g. `#[...]`
        TokenTree::Delimited(_, Delimiter::Bracket, _) => {
            !matches!(prev, TokenTree::Token(Token { kind: token::Pound, .. }, _))
        }
--- a/compiler/rustc_expand/src/mbe/macro_parser.rs
+++ b/compiler/rustc_expand/src/mbe/macro_parser.rs
@ -81,7 +81,7 @@ use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::sync::Lrc;
 use rustc_errors::ErrorGuaranteed;
 use rustc_lint_defs::pluralize;
-use rustc_parse::parser::{NtOrTt, Parser};
+use rustc_parse::parser::{ParseNtResult, Parser};
 use rustc_span::symbol::Ident;
 use rustc_span::symbol::MacroRulesNormalizedIdent;
 use rustc_span::Span;
@ -692,8 +692,8 @@ impl TtParser {
                            Ok(nt) => nt,
                        };
                        let m = match nt {
-                            NtOrTt::Nt(nt) => MatchedNonterminal(Lrc::new(nt)),
-                            NtOrTt::Tt(tt) => MatchedTokenTree(tt),
+                            ParseNtResult::Nt(nt) => MatchedNonterminal(Lrc::new(nt)),
+                            ParseNtResult::Tt(tt) => MatchedTokenTree(tt),
                        };
                        mp.push_match(next_metavar, seq_depth, m);
                        mp.idx += 1;
--- a/compiler/rustc_expand/src/mbe/macro_rules.rs
+++ b/compiler/rustc_expand/src/mbe/macro_rules.rs
@ -1328,7 +1328,7 @@ fn is_in_follow(tok: &mbe::TokenTree, kind: NonterminalKind) -> IsInFollow {
                    _ => IsInFollow::No(TOKENS),
                }
            }
-            NonterminalKind::PatWithOr { .. } => {
+            NonterminalKind::PatWithOr => {
                const TOKENS: &[&str] = &["`=>`", "`,`", "`=`", "`if`", "`in`"];
                match tok {
                    TokenTree::Token(token) => match token.kind {
--- a/compiler/rustc_expand/src/mbe/transcribe.rs
+++ b/compiler/rustc_expand/src/mbe/transcribe.rs
@ -220,16 +220,15 @@ pub(super) fn transcribe<'a>(
                        MatchedTokenTree(tt) => {
                            // `tt`s are emitted into the output stream directly as "raw tokens",
                            // without wrapping them into groups.
-                            let token = tt.clone();
-                            result.push(token);
+                            result.push(tt.clone());
                        }
                        MatchedNonterminal(nt) => {
                            // Other variables are emitted into the output stream as groups with
                            // `Delimiter::Invisible` to maintain parsing priorities.
                            // `Interpolated` is currently used for such groups in rustc parser.
                            marker.visit_span(&mut sp);
-                            let token = TokenTree::token_alone(token::Interpolated(nt.clone()), sp);
-                            result.push(token);
+                            result
+                                .push(TokenTree::token_alone(token::Interpolated(nt.clone()), sp));
                        }
                        MatchedSeq(..) => {
                            // We were unable to descend far enough. This is an error.
--- a/compiler/rustc_macros/src/serialize.rs
+++ b/compiler/rustc_macros/src/serialize.rs
@ -59,14 +59,14 @@ fn decodable_body(
                })
                .collect();
            let message = format!(
-                "invalid enum variant tag while decoding `{}`, expected 0..{}",
+                "invalid enum variant tag while decoding `{}`, expected 0..{}, actual {{}}",
                ty_name,
                variants.len()
            );
            quote! {
                match ::rustc_serialize::Decoder::read_usize(__decoder) {
                    #match_inner
-                    _ => panic!(#message),
+                    n => panic!(#message, n),
                }
            }
        }
--- a/compiler/rustc_parse/src/parser/expr.rs
+++ b/compiler/rustc_parse/src/parser/expr.rs
@ -193,13 +193,7 @@ impl<'a> Parser<'a> {

        self.expected_tokens.push(TokenType::Operator);
        while let Some(op) = self.check_assoc_op() {
-            // Adjust the span for interpolated LHS to point to the `$lhs` token
-            // and not to what it refers to.
-            let lhs_span = match self.prev_token.kind {
-                TokenKind::Interpolated(..) => self.prev_token.span,
-                _ => lhs.span,
-            };
-
+            let lhs_span = self.interpolated_or_expr_span(&lhs);
            let cur_op_span = self.token.span;
            let restrictions = if op.node.is_assign_like() {
                self.restrictions & Restrictions::NO_STRUCT_LITERAL
@ -626,8 +620,8 @@ impl<'a> Parser<'a> {

    fn parse_expr_prefix_common(&mut self, lo: Span) -> PResult<'a, (Span, P<Expr>)> {
        self.bump();
-        let expr = self.parse_expr_prefix(None);
-        let (span, expr) = self.interpolated_or_expr_span(expr)?;
+        let expr = self.parse_expr_prefix(None)?;
+        let span = self.interpolated_or_expr_span(&expr);
        Ok((lo.to(span), expr))
    }

@ -702,20 +696,12 @@ impl<'a> Parser<'a> {
        self.parse_expr_unary(lo, UnOp::Not)
    }

-    /// Returns the span of expr, if it was not interpolated or the span of the interpolated token.
-    fn interpolated_or_expr_span(
-        &self,
-        expr: PResult<'a, P<Expr>>,
-    ) -> PResult<'a, (Span, P<Expr>)> {
-        expr.map(|e| {
-            (
-                match self.prev_token.kind {
-                    TokenKind::Interpolated(..) => self.prev_token.span,
-                    _ => e.span,
-                },
-                e,
-            )
-        })
+    /// Returns the span of expr if it was not interpolated, or the span of the interpolated token.
+    fn interpolated_or_expr_span(&self, expr: &Expr) -> Span {
+        match self.prev_token.kind {
+            TokenKind::Interpolated(..) => self.prev_token.span,
+            _ => expr.span,
+        }
    }

    fn parse_assoc_op_cast(
@ -898,8 +884,8 @@ impl<'a> Parser<'a> {
            self.parse_expr_prefix_range(None)
        } else {
            self.parse_expr_prefix(None)
-        };
-        let (hi, expr) = self.interpolated_or_expr_span(expr)?;
+        }?;
+        let hi = self.interpolated_or_expr_span(&expr);
        let span = lo.to(hi);
        if let Some(lt) = lifetime {
            self.error_remove_borrow_lifetime(span, lt.ident.span);
@ -930,8 +916,8 @@ impl<'a> Parser<'a> {
    fn parse_expr_dot_or_call(&mut self, attrs: Option<AttrWrapper>) -> PResult<'a, P<Expr>> {
        let attrs = self.parse_or_use_outer_attributes(attrs)?;
        self.collect_tokens_for_expr(attrs, |this, attrs| {
-            let base = this.parse_expr_bottom();
-            let (span, base) = this.interpolated_or_expr_span(base)?;
+            let base = this.parse_expr_bottom()?;
+            let span = this.interpolated_or_expr_span(&base);
            this.parse_expr_dot_or_call_with(base, span, attrs)
        })
    }
--- a/compiler/rustc_parse/src/parser/mod.rs
+++ b/compiler/rustc_parse/src/parser/mod.rs
@ -1052,33 +1052,48 @@ impl<'a> Parser<'a> {
    }

    /// Look-ahead `dist` tokens of `self.token` and get access to that token there.
-    /// When `dist == 0` then the current token is looked at.
+    /// When `dist == 0` then the current token is looked at. `Eof` will be
+    /// returned if the look-ahead is any distance past the end of the tokens.
    pub fn look_ahead<R>(&self, dist: usize, looker: impl FnOnce(&Token) -> R) -> R {
        if dist == 0 {
            return looker(&self.token);
        }

-        let tree_cursor = &self.token_cursor.tree_cursor;
        if let Some(&(_, delim, span)) = self.token_cursor.stack.last()
            && delim != Delimiter::Invisible
        {
+            // We are not in the outermost token stream, and the token stream
+            // we are in has non-skipped delimiters. Look for skipped
+            // delimiters in the lookahead range.
+            let tree_cursor = &self.token_cursor.tree_cursor;
            let all_normal = (0..dist).all(|i| {
                let token = tree_cursor.look_ahead(i);
                !matches!(token, Some(TokenTree::Delimited(_, Delimiter::Invisible, _)))
            });
            if all_normal {
+                // There were no skipped delimiters. Do lookahead by plain indexing.
                return match tree_cursor.look_ahead(dist - 1) {
-                    Some(tree) => match tree {
-                        TokenTree::Token(token, _) => looker(token),
-                        TokenTree::Delimited(dspan, delim, _) => {
-                            looker(&Token::new(token::OpenDelim(*delim), dspan.open))
+                    Some(tree) => {
+                        // Indexing stayed within the current token stream.
+                        match tree {
+                            TokenTree::Token(token, _) => looker(token),
+                            TokenTree::Delimited(dspan, delim, _) => {
+                                looker(&Token::new(token::OpenDelim(*delim), dspan.open))
+                            }
                        }
-                    },
-                    None => looker(&Token::new(token::CloseDelim(delim), span.close)),
+                    }
+                    None => {
+                        // Indexing went past the end of the current token
+                        // stream. Use the close delimiter, no matter how far
+                        // ahead `dist` went.
+                        looker(&Token::new(token::CloseDelim(delim), span.close))
+                    }
                };
            }
        }

+        // We are in a more complex case. Just clone the token cursor and use
+        // `next`, skipping delimiters as necessary. Slow but simple.
        let mut cursor = self.token_cursor.clone();
        let mut i = 0;
        let mut token = Token::dummy();
@ -1476,7 +1491,7 @@ pub enum FlatToken {
 }

 #[derive(Debug)]
-pub enum NtOrTt {
+pub enum ParseNtResult {
    Nt(Nonterminal),
    Tt(TokenTree),
 }
--- a/compiler/rustc_parse/src/parser/nonterminal.rs
+++ b/compiler/rustc_parse/src/parser/nonterminal.rs
@ -1,5 +1,5 @@
 use rustc_ast::ptr::P;
-use rustc_ast::token::{self, Delimiter, NonterminalKind, Token};
+use rustc_ast::token::{self, Delimiter, Nonterminal::*, NonterminalKind, Token};
 use rustc_ast::HasTokens;
 use rustc_ast_pretty::pprust;
 use rustc_errors::IntoDiagnostic;
@ -8,7 +8,7 @@ use rustc_span::symbol::{kw, Ident};

 use crate::errors::UnexpectedNonterminal;
 use crate::parser::pat::{CommaRecoveryMode, RecoverColon, RecoverComma};
-use crate::parser::{FollowedByType, ForceCollect, NtOrTt, Parser, PathStyle};
+use crate::parser::{FollowedByType, ForceCollect, ParseNtResult, Parser, PathStyle};

 impl<'a> Parser<'a> {
    /// Checks whether a non-terminal may begin with a particular token.
@ -20,10 +20,21 @@ impl<'a> Parser<'a> {
    pub fn nonterminal_may_begin_with(kind: NonterminalKind, token: &Token) -> bool {
        /// Checks whether the non-terminal may contain a single (non-keyword) identifier.
        fn may_be_ident(nt: &token::Nonterminal) -> bool {
-            !matches!(
-                *nt,
-                token::NtItem(_) | token::NtBlock(_) | token::NtVis(_) | token::NtLifetime(_)
-            )
+            match nt {
+                NtStmt(_)
+                | NtPat(_)
+                | NtExpr(_)
+                | NtTy(_)
+                | NtIdent(..)
+                | NtLiteral(_) // `true`, `false`
+                | NtMeta(_)
+                | NtPath(_) => true,
+
+                NtItem(_)
+                | NtBlock(_)
+                | NtVis(_)
+                | NtLifetime(_) => false,
+            }
        }

        match kind {
@ -44,27 +55,19 @@ impl<'a> Parser<'a> {
            },
            NonterminalKind::Block => match &token.kind {
                token::OpenDelim(Delimiter::Brace) => true,
-                token::Interpolated(nt) => !matches!(
-                    **nt,
-                    token::NtItem(_)
-                        | token::NtPat(_)
-                        | token::NtTy(_)
-                        | token::NtIdent(..)
-                        | token::NtMeta(_)
-                        | token::NtPath(_)
-                        | token::NtVis(_)
-                ),
+                token::Interpolated(nt) => match **nt {
+                    NtBlock(_) | NtLifetime(_) | NtStmt(_) | NtExpr(_) | NtLiteral(_) => true,
+                    NtItem(_) | NtPat(_) | NtTy(_) | NtIdent(..) | NtMeta(_) | NtPath(_)
+                    | NtVis(_) => false,
+                },
                _ => false,
            },
            NonterminalKind::Path | NonterminalKind::Meta => match &token.kind {
                token::ModSep | token::Ident(..) => true,
-                token::Interpolated(nt) => match **nt {
-                    token::NtPath(_) | token::NtMeta(_) => true,
-                    _ => may_be_ident(&nt),
-                },
+                token::Interpolated(nt) => may_be_ident(nt),
                _ => false,
            },
-            NonterminalKind::PatParam { .. } | NonterminalKind::PatWithOr { .. } => {
+            NonterminalKind::PatParam { .. } | NonterminalKind::PatWithOr => {
                match &token.kind {
                token::Ident(..) |                          // box, ref, mut, and other identifiers (can stricten)
                token::OpenDelim(Delimiter::Parenthesis) |  // tuple pattern
@ -79,7 +82,7 @@ impl<'a> Parser<'a> {
                token::Lt |                                 // path (UFCS constant)
                token::BinOp(token::Shl) => true,           // path (double UFCS)
                // leading vert `|` or-pattern
-                token::BinOp(token::Or) =>  matches!(kind, NonterminalKind::PatWithOr {..}),
+                token::BinOp(token::Or) => matches!(kind, NonterminalKind::PatWithOr),
                token::Interpolated(nt) => may_be_ident(nt),
                _ => false,
            }
@ -87,7 +90,7 @@ impl<'a> Parser<'a> {
            NonterminalKind::Lifetime => match &token.kind {
                token::Lifetime(_) => true,
                token::Interpolated(nt) => {
-                    matches!(**nt, token::NtLifetime(_))
+                    matches!(**nt, NtLifetime(_))
                }
                _ => false,
            },
@ -100,18 +103,16 @@ impl<'a> Parser<'a> {
    /// Parse a non-terminal (e.g. MBE `:pat` or `:ident`). Inlined because there is only one call
    /// site.
    #[inline]
-    pub fn parse_nonterminal(&mut self, kind: NonterminalKind) -> PResult<'a, NtOrTt> {
-        // Any `Nonterminal` which stores its tokens (currently `NtItem` and `NtExpr`)
-        // needs to have them force-captured here.
+    pub fn parse_nonterminal(&mut self, kind: NonterminalKind) -> PResult<'a, ParseNtResult> {
        // A `macro_rules!` invocation may pass a captured item/expr to a proc-macro,
        // which requires having captured tokens available. Since we cannot determine
        // in advance whether or not a proc-macro will be (transitively) invoked,
        // we always capture tokens for any `Nonterminal` which needs them.
        let mut nt = match kind {
            // Note that TT is treated differently to all the others.
-            NonterminalKind::TT => return Ok(NtOrTt::Tt(self.parse_token_tree())),
+            NonterminalKind::TT => return Ok(ParseNtResult::Tt(self.parse_token_tree())),
            NonterminalKind::Item => match self.parse_item(ForceCollect::Yes)? {
-                Some(item) => token::NtItem(item),
+                Some(item) => NtItem(item),
                None => {
                    return Err(UnexpectedNonterminal::Item(self.token.span)
                               .into_diagnostic(&self.sess.span_diagnostic));
@ -120,19 +121,19 @@ impl<'a> Parser<'a> {
            NonterminalKind::Block => {
                // While a block *expression* may have attributes (e.g. `#[my_attr] { ... }`),
                // the ':block' matcher does not support them
-                token::NtBlock(self.collect_tokens_no_attrs(|this| this.parse_block())?)
+                NtBlock(self.collect_tokens_no_attrs(|this| this.parse_block())?)
            }
            NonterminalKind::Stmt => match self.parse_stmt(ForceCollect::Yes)? {
-                Some(s) => token::NtStmt(P(s)),
+                Some(s) => NtStmt(P(s)),
                None => {
                    return Err(UnexpectedNonterminal::Statement(self.token.span)
                               .into_diagnostic(&self.sess.span_diagnostic));
                }
            },
-            NonterminalKind::PatParam { .. } | NonterminalKind::PatWithOr { .. } => {
-                token::NtPat(self.collect_tokens_no_attrs(|this| match kind {
+            NonterminalKind::PatParam { .. } | NonterminalKind::PatWithOr => {
+                NtPat(self.collect_tokens_no_attrs(|this| match kind {
                    NonterminalKind::PatParam { .. } => this.parse_pat_no_top_alt(None, None),
-                    NonterminalKind::PatWithOr { .. } => this.parse_pat_allow_top_alt(
+                    NonterminalKind::PatWithOr => this.parse_pat_allow_top_alt(
                        None,
                        RecoverComma::No,
                        RecoverColon::No,
@ -142,16 +143,16 @@ impl<'a> Parser<'a> {
                })?)
            }

-            NonterminalKind::Expr => token::NtExpr(self.parse_expr_force_collect()?),
+            NonterminalKind::Expr => NtExpr(self.parse_expr_force_collect()?),
            NonterminalKind::Literal => {
                // The `:literal` matcher does not support attributes
-                token::NtLiteral(
+                NtLiteral(
                    self.collect_tokens_no_attrs(|this| this.parse_literal_maybe_minus())?,
                )
            }

-            NonterminalKind::Ty => token::NtTy(
-                self.collect_tokens_no_attrs(|this| this.parse_no_question_mark_recover())?,
+            NonterminalKind::Ty => NtTy(
+                self.collect_tokens_no_attrs(|this| this.parse_ty_no_question_mark_recover())?,
            ),

            // this could be handled like a token, since it is one
@ -159,7 +160,7 @@ impl<'a> Parser<'a> {
                if let Some((ident, is_raw)) = get_macro_ident(&self.token) =>
            {
                self.bump();
-                token::NtIdent(ident, is_raw)
+                NtIdent(ident, is_raw)
            }
            NonterminalKind::Ident => {
                return Err(UnexpectedNonterminal::Ident {
@ -167,16 +168,16 @@ impl<'a> Parser<'a> {
                    token: self.token.clone(),
                }.into_diagnostic(&self.sess.span_diagnostic));
            }
-            NonterminalKind::Path => token::NtPath(
+            NonterminalKind::Path => NtPath(
                P(self.collect_tokens_no_attrs(|this| this.parse_path(PathStyle::Type))?),
            ),
-            NonterminalKind::Meta => token::NtMeta(P(self.parse_attr_item(true)?)),
-            NonterminalKind::Vis => token::NtVis(
+            NonterminalKind::Meta => NtMeta(P(self.parse_attr_item(true)?)),
+            NonterminalKind::Vis => NtVis(
                P(self.collect_tokens_no_attrs(|this| this.parse_visibility(FollowedByType::Yes))?),
            ),
            NonterminalKind::Lifetime => {
                if self.check_lifetime() {
-                    token::NtLifetime(self.expect_lifetime().ident)
+                    NtLifetime(self.expect_lifetime().ident)
                } else {
                    return Err(UnexpectedNonterminal::Lifetime {
                        span: self.token.span,
@ -196,7 +197,7 @@ impl<'a> Parser<'a> {
            );
        }

-        Ok(NtOrTt::Nt(nt))
+        Ok(ParseNtResult::Nt(nt))
    }
 }

--- a/compiler/rustc_parse/src/parser/ty.rs
+++ b/compiler/rustc_parse/src/parser/ty.rs
@ -180,7 +180,7 @@ impl<'a> Parser<'a> {
        )
    }

-    pub(super) fn parse_no_question_mark_recover(&mut self) -> PResult<'a, P<Ty>> {
+    pub(super) fn parse_ty_no_question_mark_recover(&mut self) -> PResult<'a, P<Ty>> {
        self.parse_ty_common(
            AllowPlus::Yes,
            AllowCVariadic::No,
--- a/tests/ui/macros/macro-interpolation.rs
+++ b/tests/ui/macros/macro-interpolation.rs
@ -1,5 +1,3 @@
-// run-pass
-
 macro_rules! overly_complicated {
    ($fnname:ident, $arg:ident, $ty:ty, $body:block, $val:expr, $pat:pat, $res:path) =>
    ({
@ -21,12 +19,14 @@ macro_rules! qpath {

    (ty, <$type:ty as $trait:ty>::$name:ident) => {
        <$type as $trait>::$name
+        //~^ ERROR expected identifier, found `!`
    };
 }

 pub fn main() {
    let _: qpath!(path, <str as ToOwned>::Owned);
    let _: qpath!(ty, <str as ToOwned>::Owned);
+    let _: qpath!(ty, <str as !>::Owned);

    assert!(overly_complicated!(f, x, Option<usize>, { return Some(x); },
                               Some(8), Some(y), y) == 8)
--- a/tests/ui/macros/macro-interpolation.stderr
+++ b/tests/ui/macros/macro-interpolation.stderr
@ -0,0 +1,16 @@
+error: expected identifier, found `!`
+  --> $DIR/macro-interpolation.rs:21:19
+   |
+LL |         <$type as $trait>::$name
+   |                   ^^^^^^ expected identifier
+...
+LL |     let _: qpath!(ty, <str as !>::Owned);
+   |            -----------------------------
+   |            |
+   |            this macro call doesn't expand to a type
+   |            in this macro invocation
+   |
+   = note: this error originates in the macro `qpath` (in Nightly builds, run with -Z macro-backtrace for more info)
+
+error: aborting due to previous error
+