From e1d12c8caf4c3dad0a28c88e5d454a3eb6e1f955 Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Fri, 1 Dec 2023 18:23:16 +0300 Subject: [PATCH] macro_rules: Less hacky heuristic for using `tt` metavariable spans --- compiler/rustc_ast/src/tokenstream.rs | 23 +------ compiler/rustc_expand/src/mbe/macro_rules.rs | 35 +--------- compiler/rustc_expand/src/mbe/transcribe.rs | 64 ++++++++++++++++++- tests/coverage/macro_name_span.cov-map | 8 +-- tests/coverage/macro_name_span.coverage | 19 +----- tests/ui/macros/issue-118786.stderr | 4 +- .../issue-68091-unicode-ident-after-if.stderr | 8 +-- ...unicode-ident-after-incomplete-expr.stderr | 4 +- .../capture-macro-rules-invoke.stdout | 2 +- tests/ui/proc-macro/expand-expr.rs | 4 +- tests/ui/proc-macro/expand-expr.stderr | 6 +- 11 files changed, 85 insertions(+), 92 deletions(-) diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs index 4c0c496584e..053468ff936 100644 --- a/compiler/rustc_ast/src/tokenstream.rs +++ b/compiler/rustc_ast/src/tokenstream.rs @@ -26,7 +26,7 @@ use smallvec::{smallvec, SmallVec}; use std::borrow::Cow; -use std::{cmp, fmt, iter, mem}; +use std::{cmp, fmt, iter}; /// When the main Rust parser encounters a syntax-extension invocation, it /// parses the arguments to the invocation as a token tree. This is a very @@ -81,14 +81,6 @@ pub fn span(&self) -> Span { } } - /// Modify the `TokenTree`'s span in-place. - pub fn set_span(&mut self, span: Span) { - match self { - TokenTree::Token(token, _) => token.span = span, - TokenTree::Delimited(dspan, ..) => *dspan = DelimSpan::from_single(span), - } - } - /// Create a `TokenTree::Token` with alone spacing. pub fn token_alone(kind: TokenKind, span: Span) -> TokenTree { TokenTree::Token(Token::new(kind, span), Spacing::Alone) @@ -461,19 +453,6 @@ pub fn eq_unspanned(&self, other: &TokenStream) -> bool { t1.next().is_none() && t2.next().is_none() } - /// Applies the supplied function to each `TokenTree` and its index in `self`, returning a new `TokenStream` - /// - /// It is equivalent to `TokenStream::new(self.trees().cloned().enumerate().map(|(i, tt)| f(i, tt)).collect())`. - pub fn map_enumerated_owned( - mut self, - mut f: impl FnMut(usize, TokenTree) -> TokenTree, - ) -> TokenStream { - let owned = Lrc::make_mut(&mut self.0); // clone if necessary - // rely on vec's in-place optimizations to avoid another allocation - *owned = mem::take(owned).into_iter().enumerate().map(|(i, tree)| f(i, tree)).collect(); - self - } - /// Create a token stream containing a single token with alone spacing. The /// spacing used for the final token in a constructed stream doesn't matter /// because it's never used. In practice we arbitrarily use diff --git a/compiler/rustc_expand/src/mbe/macro_rules.rs b/compiler/rustc_expand/src/mbe/macro_rules.rs index e9736d6f2c8..e9797abcbdf 100644 --- a/compiler/rustc_expand/src/mbe/macro_rules.rs +++ b/compiler/rustc_expand/src/mbe/macro_rules.rs @@ -10,7 +10,7 @@ use rustc_ast as ast; use rustc_ast::token::{self, Delimiter, NonterminalKind, Token, TokenKind, TokenKind::*}; -use rustc_ast::tokenstream::{DelimSpan, TokenStream, TokenTree}; +use rustc_ast::tokenstream::{DelimSpan, TokenStream}; use rustc_ast::{NodeId, DUMMY_NODE_ID}; use rustc_ast_pretty::pprust; use rustc_attr::{self as attr, TransparencyError}; @@ -213,7 +213,7 @@ fn expand_macro<'cx>( let arm_span = rhses[i].span(); // rhs has holes ( `$id` and `$(...)` that need filled) - let mut tts = match transcribe(cx, &named_matches, rhs, rhs_span, transparency) { + let tts = match transcribe(cx, &named_matches, rhs, rhs_span, transparency) { Ok(tts) => tts, Err(mut err) => { err.emit(); @@ -221,37 +221,6 @@ fn expand_macro<'cx>( } }; - // Replace all the tokens for the corresponding positions in the macro, to maintain - // proper positions in error reporting, while maintaining the macro_backtrace. - if tts.len() == rhs.tts.len() { - tts = tts.map_enumerated_owned(|i, mut tt| { - let rhs_tt = &rhs.tts[i]; - let ctxt = tt.span().ctxt(); - match (&mut tt, rhs_tt) { - // preserve the delim spans if able - ( - TokenTree::Delimited(target_sp, ..), - mbe::TokenTree::Delimited(source_sp, ..), - ) => { - target_sp.open = source_sp.open.with_ctxt(ctxt); - target_sp.close = source_sp.close.with_ctxt(ctxt); - } - ( - TokenTree::Delimited(target_sp, ..), - mbe::TokenTree::MetaVar(source_sp, ..), - ) => { - target_sp.open = source_sp.with_ctxt(ctxt); - target_sp.close = source_sp.with_ctxt(ctxt).shrink_to_hi(); - } - _ => { - let sp = rhs_tt.span().with_ctxt(ctxt); - tt.set_span(sp); - } - } - tt - }); - } - if cx.trace_macros() { let msg = format!("to `{}`", pprust::tts_to_string(&tts)); trace_macros_note(&mut cx.expansions, sp, msg); diff --git a/compiler/rustc_expand/src/mbe/transcribe.rs b/compiler/rustc_expand/src/mbe/transcribe.rs index f2a9875ffd2..c969ca7ef89 100644 --- a/compiler/rustc_expand/src/mbe/transcribe.rs +++ b/compiler/rustc_expand/src/mbe/transcribe.rs @@ -4,7 +4,7 @@ NoSyntaxVarsExprRepeat, VarStillRepeating, }; use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq, MatchedTokenTree, NamedMatch}; -use crate::mbe::{self, MetaVarExpr}; +use crate::mbe::{self, KleeneOp, MetaVarExpr}; use rustc_ast::mut_visit::{self, MutVisitor}; use rustc_ast::token::{self, Delimiter, Token, TokenKind}; use rustc_ast::tokenstream::{DelimSpacing, DelimSpan, Spacing, TokenStream, TokenTree}; @@ -42,6 +42,7 @@ enum Frame<'a> { tts: &'a [mbe::TokenTree], idx: usize, sep: Option, + kleene_op: KleeneOp, }, } @@ -207,7 +208,7 @@ pub(super) fn transcribe<'a>( // Is the repetition empty? if len == 0 { - if seq.kleene.op == mbe::KleeneOp::OneOrMore { + if seq.kleene.op == KleeneOp::OneOrMore { // FIXME: this really ought to be caught at macro definition // time... It happens when the Kleene operator in the matcher and // the body for the same meta-variable do not match. @@ -227,6 +228,7 @@ pub(super) fn transcribe<'a>( idx: 0, sep: seq.separator.clone(), tts: &delimited.tts, + kleene_op: seq.kleene.op, }); } } @@ -243,7 +245,7 @@ pub(super) fn transcribe<'a>( MatchedTokenTree(tt) => { // `tt`s are emitted into the output stream directly as "raw tokens", // without wrapping them into groups. - result.push(tt.clone()); + result.push(maybe_use_metavar_location(cx, &stack, sp, tt)); } MatchedNonterminal(nt) => { // Other variables are emitted into the output stream as groups with @@ -308,6 +310,62 @@ pub(super) fn transcribe<'a>( } } +/// Usually metavariables `$var` produce interpolated tokens, which have an additional place for +/// keeping both the original span and the metavariable span. For `tt` metavariables that's not the +/// case however, and there's no place for keeping a second span. So we try to give the single +/// produced span a location that would be most useful in practice (the hygiene part of the span +/// must not be changed). +/// +/// Different locations are useful for different purposes: +/// - The original location is useful when we need to report a diagnostic for the original token in +/// isolation, without combining it with any surrounding tokens. This case occurs, but it is not +/// very common in practice. +/// - The metavariable location is useful when we need to somehow combine the token span with spans +/// of its surrounding tokens. This is the most common way to use token spans. +/// +/// So this function replaces the original location with the metavariable location in all cases +/// except these two: +/// - The metavariable is an element of undelimited sequence `$($tt)*`. +/// These are typically used for passing larger amounts of code, and tokens in that code usually +/// combine with each other and not with tokens outside of the sequence. +/// - The metavariable span comes from a different crate, then we prefer the more local span. +/// +/// FIXME: Find a way to keep both original and metavariable spans for all tokens without +/// regressing compilation time too much. Several experiments for adding such spans were made in +/// the past (PR #95580, #118517, #118671) and all showed some regressions. +fn maybe_use_metavar_location( + cx: &ExtCtxt<'_>, + stack: &[Frame<'_>], + metavar_span: Span, + orig_tt: &TokenTree, +) -> TokenTree { + let undelimited_seq = matches!( + stack.last(), + Some(Frame::Sequence { + tts: [_], + sep: None, + kleene_op: KleeneOp::ZeroOrMore | KleeneOp::OneOrMore, + .. + }) + ); + if undelimited_seq || cx.source_map().is_imported(metavar_span) { + return orig_tt.clone(); + } + + match orig_tt { + TokenTree::Token(Token { kind, span }, spacing) => { + let span = metavar_span.with_ctxt(span.ctxt()); + TokenTree::Token(Token { kind: kind.clone(), span }, *spacing) + } + TokenTree::Delimited(dspan, dspacing, delimiter, tts) => { + let open = metavar_span.shrink_to_lo().with_ctxt(dspan.open.ctxt()); + let close = metavar_span.shrink_to_hi().with_ctxt(dspan.close.ctxt()); + let dspan = DelimSpan::from_pair(open, close); + TokenTree::Delimited(dspan, *dspacing, *delimiter, tts.clone()) + } + } +} + /// Lookup the meta-var named `ident` and return the matched token tree from the invocation using /// the set of matches `interpolations`. /// diff --git a/tests/coverage/macro_name_span.cov-map b/tests/coverage/macro_name_span.cov-map index b84628fc788..a18e5f14861 100644 --- a/tests/coverage/macro_name_span.cov-map +++ b/tests/coverage/macro_name_span.cov-map @@ -1,15 +1,15 @@ Function name: macro_name_span::affected_function -Raw bytes (9): 0x[01, 01, 00, 01, 01, 06, 1b, 00, 20] +Raw bytes (9): 0x[01, 01, 00, 01, 01, 16, 1c, 02, 06] Number of files: 1 - file 0 => global file 1 Number of expressions: 0 Number of file 0 mappings: 1 -- Code(Counter(0)) at (prev + 6, 27) to (start + 0, 32) +- Code(Counter(0)) at (prev + 22, 28) to (start + 2, 6) Function name: macro_name_span::main -Raw bytes (9): 0x[01, 02, 00, 01, 01, 0b, 01, 02, 02] +Raw bytes (9): 0x[01, 01, 00, 01, 01, 0b, 01, 02, 02] Number of files: 1 -- file 0 => global file 2 +- file 0 => global file 1 Number of expressions: 0 Number of file 0 mappings: 1 - Code(Counter(0)) at (prev + 11, 1) to (start + 2, 2) diff --git a/tests/coverage/macro_name_span.coverage b/tests/coverage/macro_name_span.coverage index cadf7024657..28c88b1defa 100644 --- a/tests/coverage/macro_name_span.coverage +++ b/tests/coverage/macro_name_span.coverage @@ -1,16 +1,3 @@ -$DIR/auxiliary/macro_name_span_helper.rs: - LL| |// edition: 2021 - LL| | - LL| |#[macro_export] - LL| |macro_rules! macro_that_defines_a_function { - LL| | (fn $name:ident () $body:tt) => { - LL| 1| fn $name () -> () $body - LL| | } - LL| |} - LL| | - LL| |// Non-executable comment. - -$DIR/macro_name_span.rs: LL| |// edition: 2021 LL| | LL| |// Regression test for . @@ -32,8 +19,8 @@ $DIR/macro_name_span.rs: LL| |} LL| | LL| |macro_name_span_helper::macro_that_defines_a_function! { - LL| | fn affected_function() { - LL| | macro_with_an_unreasonably_and_egregiously_long_name!(); - LL| | } + LL| 1| fn affected_function() { + LL| 1| macro_with_an_unreasonably_and_egregiously_long_name!(); + LL| 1| } LL| |} diff --git a/tests/ui/macros/issue-118786.stderr b/tests/ui/macros/issue-118786.stderr index ca3a40f31c1..1a8ac9340da 100644 --- a/tests/ui/macros/issue-118786.stderr +++ b/tests/ui/macros/issue-118786.stderr @@ -6,8 +6,8 @@ LL | macro_rules! $macro_name { | help: change the delimiters to curly braces | -LL | macro_rules! {} { - | ~ + +LL | macro_rules! {$macro_name} { + | + + help: add a semicolon | LL | macro_rules! $macro_name; { diff --git a/tests/ui/parser/issues/issue-68091-unicode-ident-after-if.stderr b/tests/ui/parser/issues/issue-68091-unicode-ident-after-if.stderr index 2423a7526be..8e125864b8b 100644 --- a/tests/ui/parser/issues/issue-68091-unicode-ident-after-if.stderr +++ b/tests/ui/parser/issues/issue-68091-unicode-ident-after-if.stderr @@ -1,10 +1,10 @@ error: missing condition for `if` expression - --> $DIR/issue-68091-unicode-ident-after-if.rs:3:14 + --> $DIR/issue-68091-unicode-ident-after-if.rs:3:13 | LL | $($c)ö* {} - | ^ - if this block is the condition of the `if` expression, then it must be followed by another block - | | - | expected condition here + | ^ - if this block is the condition of the `if` expression, then it must be followed by another block + | | + | expected condition here error: aborting due to 1 previous error diff --git a/tests/ui/parser/issues/issue-68092-unicode-ident-after-incomplete-expr.stderr b/tests/ui/parser/issues/issue-68092-unicode-ident-after-incomplete-expr.stderr index 43047ff8802..15aa62e0810 100644 --- a/tests/ui/parser/issues/issue-68092-unicode-ident-after-incomplete-expr.stderr +++ b/tests/ui/parser/issues/issue-68092-unicode-ident-after-incomplete-expr.stderr @@ -1,8 +1,8 @@ error: macro expansion ends with an incomplete expression: expected expression - --> $DIR/issue-68092-unicode-ident-after-incomplete-expr.rs:3:14 + --> $DIR/issue-68092-unicode-ident-after-incomplete-expr.rs:3:13 | LL | $($c)ö* - | ^ expected expression + | ^ expected expression error: aborting due to 1 previous error diff --git a/tests/ui/proc-macro/capture-macro-rules-invoke.stdout b/tests/ui/proc-macro/capture-macro-rules-invoke.stdout index 71e34119ba7..bbab08bca49 100644 --- a/tests/ui/proc-macro/capture-macro-rules-invoke.stdout +++ b/tests/ui/proc-macro/capture-macro-rules-invoke.stdout @@ -271,7 +271,7 @@ PRINT-BANG INPUT (DEBUG): TokenStream [ span: $DIR/capture-macro-rules-invoke.rs:47:19: 47:20 (#0), }, ], - span: $DIR/capture-macro-rules-invoke.rs:47:13: 47:22 (#0), + span: $DIR/capture-macro-rules-invoke.rs:15:60: 15:63 (#0), }, Punct { ch: ',', diff --git a/tests/ui/proc-macro/expand-expr.rs b/tests/ui/proc-macro/expand-expr.rs index 700aac41c44..89cd1d767a5 100644 --- a/tests/ui/proc-macro/expand-expr.rs +++ b/tests/ui/proc-macro/expand-expr.rs @@ -37,7 +37,7 @@ expand_expr_is!("10 + 20", stringify!(10 + 20)); macro_rules! echo_tts { - ($($t:tt)*) => { $($t)* }; //~ ERROR: expected expression, found `$` + ($($t:tt)*) => { $($t)* }; } macro_rules! echo_lit { @@ -109,7 +109,7 @@ macro_rules! mac { // Invalid expressions produce errors in addition to returning `Err(())`. expand_expr_fail!($); //~ ERROR: expected expression, found `$` -expand_expr_fail!(echo_tts!($)); +expand_expr_fail!(echo_tts!($)); //~ ERROR: expected expression, found `$` expand_expr_fail!(echo_pm!($)); //~ ERROR: expected expression, found `$` // We get errors reported and recover during macro expansion if the macro diff --git a/tests/ui/proc-macro/expand-expr.stderr b/tests/ui/proc-macro/expand-expr.stderr index df61e997289..2b92472e5ab 100644 --- a/tests/ui/proc-macro/expand-expr.stderr +++ b/tests/ui/proc-macro/expand-expr.stderr @@ -11,10 +11,10 @@ LL | expand_expr_fail!($); | ^ expected expression error: expected expression, found `$` - --> $DIR/expand-expr.rs:40:23 + --> $DIR/expand-expr.rs:112:29 | -LL | ($($t:tt)*) => { $($t)* }; - | ^^^^ expected expression +LL | expand_expr_fail!(echo_tts!($)); + | ^ expected expression error: expected expression, found `$` --> $DIR/expand-expr.rs:113:28