Properly allow macro expanded format_args
invocations to uses captures
Originally, this was kinda half-allowed. There were some primitive checks in place that looked at the span to see whether the input was likely a literal. These "source literal" checks are needed because the spans created during `format_args` parsing only make sense when it is indeed a literal that was written in the source code directly. This is orthogonal to the restriction that the first argument must be a "direct literal", not being exanpanded from macros. This restriction was imposed by [RFC 2795] on the basis of being too confusing. But this was only concerned with the argument of the invocation being a literal, not whether it was a source literal (maybe in spirit it meant it being a source literal, this is not clear to me). Since the original check only really cared about source literals (which is good enough to deny the `format_args!(concat!())` example), macros expanding to `format_args` invocations were able to use implicit captures if they spanned the string in a way that lead back to a source string. The "source literal" checks were not strict enough and caused ICEs in certain cases (see # 106191 (the space is intended to avoid spammy backreferences)). So I tightened it up in # 106195 to really only work if it's a direct source literal. This caused the `indoc` crate to break. `indoc` transformed the source literal by removing whitespace, which made it not a "source literal" anymore (which is required to fix the ICE). But since `indoc` spanned the literal in ways that made the old check think that it's a literal, it was able to use implicit captures (which is useful and nice for the users of `indoc`). This commit properly seperates the previously introduced concepts of "source literal" and "direct literal" and therefore allows `indoc` invocations, which don't create "source literals" to use implicit captures again. [RFC 2795]: https://rust-lang.github.io/rfcs/2795-format-args-implicit-identifiers.html#macro-hygiene
This commit is contained in:
parent
0058748944
commit
729185338f
@ -36,6 +36,21 @@ enum PositionUsedAs {
|
||||
}
|
||||
use PositionUsedAs::*;
|
||||
|
||||
struct MacroInput {
|
||||
fmtstr: P<Expr>,
|
||||
args: FormatArguments,
|
||||
/// Whether the first argument was a string literal or a result from eager macro expansion.
|
||||
/// If it's not a string literal, we disallow implicit arugment capturing.
|
||||
///
|
||||
/// This does not correspond to whether we can treat spans to the literal normally, as the whole
|
||||
/// invocation might be the result of another macro expansion, in which case this flag may still be true.
|
||||
///
|
||||
/// See [RFC 2795] for more information.
|
||||
///
|
||||
/// [RFC 2795]: https://rust-lang.github.io/rfcs/2795-format-args-implicit-identifiers.html#macro-hygiene
|
||||
is_direct_literal: bool,
|
||||
}
|
||||
|
||||
/// Parses the arguments from the given list of tokens, returning the diagnostic
|
||||
/// if there's a parse error so we can continue parsing other format!
|
||||
/// expressions.
|
||||
@ -45,11 +60,7 @@ use PositionUsedAs::*;
|
||||
/// ```text
|
||||
/// Ok((fmtstr, parsed arguments))
|
||||
/// ```
|
||||
fn parse_args<'a>(
|
||||
ecx: &mut ExtCtxt<'a>,
|
||||
sp: Span,
|
||||
tts: TokenStream,
|
||||
) -> PResult<'a, (P<Expr>, FormatArguments)> {
|
||||
fn parse_args<'a>(ecx: &mut ExtCtxt<'a>, sp: Span, tts: TokenStream) -> PResult<'a, MacroInput> {
|
||||
let mut args = FormatArguments::new();
|
||||
|
||||
let mut p = ecx.new_parser_from_tts(tts);
|
||||
@ -59,25 +70,21 @@ fn parse_args<'a>(
|
||||
}
|
||||
|
||||
let first_token = &p.token;
|
||||
let fmtstr = match first_token.kind {
|
||||
token::TokenKind::Literal(token::Lit {
|
||||
kind: token::LitKind::Str | token::LitKind::StrRaw(_),
|
||||
..
|
||||
}) => {
|
||||
// If the first token is a string literal, then a format expression
|
||||
// is constructed from it.
|
||||
//
|
||||
// This allows us to properly handle cases when the first comma
|
||||
// after the format string is mistakenly replaced with any operator,
|
||||
// which cause the expression parser to eat too much tokens.
|
||||
p.parse_literal_maybe_minus()?
|
||||
}
|
||||
_ => {
|
||||
// Otherwise, we fall back to the expression parser.
|
||||
p.parse_expr()?
|
||||
}
|
||||
|
||||
let fmtstr = if let token::Literal(lit) = first_token.kind && matches!(lit.kind, token::Str | token::StrRaw(_)) {
|
||||
// This allows us to properly handle cases when the first comma
|
||||
// after the format string is mistakenly replaced with any operator,
|
||||
// which cause the expression parser to eat too much tokens.
|
||||
p.parse_literal_maybe_minus()?
|
||||
} else {
|
||||
// Otherwise, we fall back to the expression parser.
|
||||
p.parse_expr()?
|
||||
};
|
||||
|
||||
// Only allow implicit captures to be used when the argument is a direct literal
|
||||
// instead of a macro expanding to one.
|
||||
let is_direct_literal = matches!(fmtstr.kind, ExprKind::Lit(_));
|
||||
|
||||
let mut first = true;
|
||||
|
||||
while p.token != token::Eof {
|
||||
@ -147,17 +154,19 @@ fn parse_args<'a>(
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok((fmtstr, args))
|
||||
Ok(MacroInput { fmtstr, args, is_direct_literal })
|
||||
}
|
||||
|
||||
pub fn make_format_args(
|
||||
fn make_format_args(
|
||||
ecx: &mut ExtCtxt<'_>,
|
||||
efmt: P<Expr>,
|
||||
mut args: FormatArguments,
|
||||
input: MacroInput,
|
||||
append_newline: bool,
|
||||
) -> Result<FormatArgs, ()> {
|
||||
let msg = "format argument must be a string literal";
|
||||
let unexpanded_fmt_span = efmt.span;
|
||||
let unexpanded_fmt_span = input.fmtstr.span;
|
||||
|
||||
let MacroInput { fmtstr: efmt, mut args, is_direct_literal } = input;
|
||||
|
||||
let (fmt_str, fmt_style, fmt_span) = match expr_to_spanned_string(ecx, efmt, msg) {
|
||||
Ok(mut fmt) if append_newline => {
|
||||
fmt.0 = Symbol::intern(&format!("{}\n", fmt.0));
|
||||
@ -208,11 +217,11 @@ pub fn make_format_args(
|
||||
}
|
||||
}
|
||||
|
||||
let is_literal = parser.is_literal;
|
||||
let is_source_literal = parser.is_source_literal;
|
||||
|
||||
if !parser.errors.is_empty() {
|
||||
let err = parser.errors.remove(0);
|
||||
let sp = if is_literal {
|
||||
let sp = if is_source_literal {
|
||||
fmt_span.from_inner(InnerSpan::new(err.span.start, err.span.end))
|
||||
} else {
|
||||
// The format string could be another macro invocation, e.g.:
|
||||
@ -230,7 +239,7 @@ pub fn make_format_args(
|
||||
if let Some(note) = err.note {
|
||||
e.note(¬e);
|
||||
}
|
||||
if let Some((label, span)) = err.secondary_label && is_literal {
|
||||
if let Some((label, span)) = err.secondary_label && is_source_literal {
|
||||
e.span_label(fmt_span.from_inner(InnerSpan::new(span.start, span.end)), label);
|
||||
}
|
||||
if err.should_be_replaced_with_positional_argument {
|
||||
@ -256,7 +265,7 @@ pub fn make_format_args(
|
||||
}
|
||||
|
||||
let to_span = |inner_span: rustc_parse_format::InnerSpan| {
|
||||
is_literal.then(|| {
|
||||
is_source_literal.then(|| {
|
||||
fmt_span.from_inner(InnerSpan { start: inner_span.start, end: inner_span.end })
|
||||
})
|
||||
};
|
||||
@ -304,7 +313,7 @@ pub fn make_format_args(
|
||||
// Name not found in `args`, so we add it as an implicitly captured argument.
|
||||
let span = span.unwrap_or(fmt_span);
|
||||
let ident = Ident::new(name, span);
|
||||
let expr = if is_literal {
|
||||
let expr = if is_direct_literal {
|
||||
ecx.expr_ident(span, ident)
|
||||
} else {
|
||||
// For the moment capturing variables from format strings expanded from macros is
|
||||
@ -814,7 +823,7 @@ fn report_invalid_references(
|
||||
// for `println!("{7:7$}", 1);`
|
||||
indexes.sort();
|
||||
indexes.dedup();
|
||||
let span: MultiSpan = if !parser.is_literal || parser.arg_places.is_empty() {
|
||||
let span: MultiSpan = if !parser.is_source_literal || parser.arg_places.is_empty() {
|
||||
MultiSpan::from_span(fmt_span)
|
||||
} else {
|
||||
MultiSpan::from_spans(invalid_refs.iter().filter_map(|&(_, span, _, _)| span).collect())
|
||||
@ -855,8 +864,8 @@ fn expand_format_args_impl<'cx>(
|
||||
) -> Box<dyn base::MacResult + 'cx> {
|
||||
sp = ecx.with_def_site_ctxt(sp);
|
||||
match parse_args(ecx, sp, tts) {
|
||||
Ok((efmt, args)) => {
|
||||
if let Ok(format_args) = make_format_args(ecx, efmt, args, nl) {
|
||||
Ok(input) => {
|
||||
if let Ok(format_args) = make_format_args(ecx, input, nl) {
|
||||
MacEager::expr(ecx.expr(sp, ExprKind::FormatArgs(P(format_args))))
|
||||
} else {
|
||||
MacEager::expr(DummyResult::raw_expr(sp, true))
|
||||
|
@ -234,8 +234,10 @@ pub struct Parser<'a> {
|
||||
last_opening_brace: Option<InnerSpan>,
|
||||
/// Whether the source string is comes from `println!` as opposed to `format!` or `print!`
|
||||
append_newline: bool,
|
||||
/// Whether this formatting string is a literal or it comes from a macro.
|
||||
pub is_literal: bool,
|
||||
/// Whether this formatting string was written directly in the source. This controls whether we
|
||||
/// can use spans to refer into it and give better error messages.
|
||||
/// N.B: This does _not_ control whether implicit argument captures can be used.
|
||||
pub is_source_literal: bool,
|
||||
/// Start position of the current line.
|
||||
cur_line_start: usize,
|
||||
/// Start and end byte offset of every line of the format string. Excludes
|
||||
@ -262,7 +264,7 @@ impl<'a> Iterator for Parser<'a> {
|
||||
} else {
|
||||
let arg = self.argument(lbrace_end);
|
||||
if let Some(rbrace_pos) = self.must_consume('}') {
|
||||
if self.is_literal {
|
||||
if self.is_source_literal {
|
||||
let lbrace_byte_pos = self.to_span_index(pos);
|
||||
let rbrace_byte_pos = self.to_span_index(rbrace_pos);
|
||||
|
||||
@ -302,7 +304,7 @@ impl<'a> Iterator for Parser<'a> {
|
||||
_ => Some(String(self.string(pos))),
|
||||
}
|
||||
} else {
|
||||
if self.is_literal {
|
||||
if self.is_source_literal {
|
||||
let span = self.span(self.cur_line_start, self.input.len());
|
||||
if self.line_spans.last() != Some(&span) {
|
||||
self.line_spans.push(span);
|
||||
@ -323,7 +325,7 @@ impl<'a> Parser<'a> {
|
||||
mode: ParseMode,
|
||||
) -> Parser<'a> {
|
||||
let input_string_kind = find_width_map_from_snippet(snippet, style);
|
||||
let (width_map, is_literal) = match input_string_kind {
|
||||
let (width_map, is_source_literal) = match input_string_kind {
|
||||
InputStringKind::Literal { width_mappings } => (width_mappings, true),
|
||||
InputStringKind::NotALiteral => (Vec::new(), false),
|
||||
};
|
||||
@ -339,7 +341,7 @@ impl<'a> Parser<'a> {
|
||||
width_map,
|
||||
last_opening_brace: None,
|
||||
append_newline,
|
||||
is_literal,
|
||||
is_source_literal,
|
||||
cur_line_start: 0,
|
||||
line_spans: vec![],
|
||||
}
|
||||
@ -532,13 +534,13 @@ impl<'a> Parser<'a> {
|
||||
'{' | '}' => {
|
||||
return &self.input[start..pos];
|
||||
}
|
||||
'\n' if self.is_literal => {
|
||||
'\n' if self.is_source_literal => {
|
||||
self.line_spans.push(self.span(self.cur_line_start, pos));
|
||||
self.cur_line_start = pos + 1;
|
||||
self.cur.next();
|
||||
}
|
||||
_ => {
|
||||
if self.is_literal && pos == self.cur_line_start && c.is_whitespace() {
|
||||
if self.is_source_literal && pos == self.cur_line_start && c.is_whitespace() {
|
||||
self.cur_line_start = pos + c.len_utf8();
|
||||
}
|
||||
self.cur.next();
|
||||
|
@ -28,25 +28,41 @@ pub fn err_with_input_span(input: TokenStream) -> TokenStream {
|
||||
TokenStream::from(TokenTree::Literal(lit))
|
||||
}
|
||||
|
||||
fn build_format(args: impl Into<TokenStream>) -> TokenStream {
|
||||
TokenStream::from_iter([
|
||||
TokenTree::from(Ident::new("format", Span::call_site())),
|
||||
TokenTree::from(Punct::new('!', Spacing::Alone)),
|
||||
TokenTree::from(Group::new(Delimiter::Parenthesis, args.into())),
|
||||
])
|
||||
}
|
||||
|
||||
#[proc_macro]
|
||||
pub fn respan_to_invalid_format_literal(input: TokenStream) -> TokenStream {
|
||||
let mut s = Literal::string("{");
|
||||
s.set_span(input.into_iter().next().unwrap().span());
|
||||
TokenStream::from_iter([
|
||||
TokenTree::from(Ident::new("format", Span::call_site())),
|
||||
TokenTree::from(Punct::new('!', Spacing::Alone)),
|
||||
TokenTree::from(Group::new(Delimiter::Parenthesis, TokenTree::from(s).into())),
|
||||
])
|
||||
|
||||
build_format(TokenTree::from(s))
|
||||
}
|
||||
|
||||
#[proc_macro]
|
||||
pub fn capture_a_with_prepended_space_preserve_span(input: TokenStream) -> TokenStream {
|
||||
let mut s = Literal::string(" {a}");
|
||||
s.set_span(input.into_iter().next().unwrap().span());
|
||||
TokenStream::from_iter([
|
||||
TokenTree::from(Ident::new("format", Span::call_site())),
|
||||
TokenTree::from(Punct::new('!', Spacing::Alone)),
|
||||
TokenTree::from(Group::new(Delimiter::Parenthesis, TokenTree::from(s).into())),
|
||||
])
|
||||
|
||||
build_format(TokenTree::from(s))
|
||||
}
|
||||
|
||||
#[proc_macro]
|
||||
pub fn format_args_captures(_: TokenStream) -> TokenStream {
|
||||
r#"{ let x = 5; format!("{x}") }"#.parse().unwrap()
|
||||
}
|
||||
|
||||
#[proc_macro]
|
||||
pub fn bad_format_args_captures(_: TokenStream) -> TokenStream {
|
||||
r#"{ let x = 5; format!(concat!("{x}")) }"#.parse().unwrap()
|
||||
}
|
||||
|
||||
#[proc_macro]
|
||||
pub fn identity_pm(input: TokenStream) -> TokenStream {
|
||||
input
|
||||
}
|
||||
|
21
tests/ui/fmt/format-args-capture-first-literal-is-macro.rs
Normal file
21
tests/ui/fmt/format-args-capture-first-literal-is-macro.rs
Normal file
@ -0,0 +1,21 @@
|
||||
// aux-build:format-string-proc-macro.rs
|
||||
|
||||
#[macro_use]
|
||||
extern crate format_string_proc_macro;
|
||||
|
||||
macro_rules! identity_mbe {
|
||||
($tt:tt) => {
|
||||
$tt
|
||||
//~^ ERROR there is no argument named `a`
|
||||
};
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let a = 0;
|
||||
|
||||
format!(identity_pm!("{a}"));
|
||||
//~^ ERROR there is no argument named `a`
|
||||
format!(identity_mbe!("{a}"));
|
||||
format!(concat!("{a}"));
|
||||
//~^ ERROR there is no argument named `a`
|
||||
}
|
@ -0,0 +1,30 @@
|
||||
error: there is no argument named `a`
|
||||
--> $DIR/format-args-capture-first-literal-is-macro.rs:16:26
|
||||
|
|
||||
LL | format!(identity_pm!("{a}"));
|
||||
| ^^^^^
|
||||
|
|
||||
= note: did you intend to capture a variable `a` from the surrounding scope?
|
||||
= note: to avoid ambiguity, `format_args!` cannot capture variables when the format string is expanded from a macro
|
||||
|
||||
error: there is no argument named `a`
|
||||
--> $DIR/format-args-capture-first-literal-is-macro.rs:8:9
|
||||
|
|
||||
LL | $tt
|
||||
| ^^^
|
||||
|
|
||||
= note: did you intend to capture a variable `a` from the surrounding scope?
|
||||
= note: to avoid ambiguity, `format_args!` cannot capture variables when the format string is expanded from a macro
|
||||
|
||||
error: there is no argument named `a`
|
||||
--> $DIR/format-args-capture-first-literal-is-macro.rs:19:13
|
||||
|
|
||||
LL | format!(concat!("{a}"));
|
||||
| ^^^^^^^^^^^^^^
|
||||
|
|
||||
= note: did you intend to capture a variable `a` from the surrounding scope?
|
||||
= note: to avoid ambiguity, `format_args!` cannot capture variables when the format string is expanded from a macro
|
||||
= note: this error originates in the macro `concat` (in Nightly builds, run with -Z macro-backtrace for more info)
|
||||
|
||||
error: aborting due to 3 previous errors
|
||||
|
@ -0,0 +1,8 @@
|
||||
// aux-build:format-string-proc-macro.rs
|
||||
|
||||
extern crate format_string_proc_macro;
|
||||
|
||||
fn main() {
|
||||
format_string_proc_macro::bad_format_args_captures!();
|
||||
//~^ ERROR there is no argument named `x`
|
||||
}
|
@ -0,0 +1,12 @@
|
||||
error: there is no argument named `x`
|
||||
--> $DIR/format-args-capture-from-pm-first-arg-macro.rs:6:5
|
||||
|
|
||||
LL | format_string_proc_macro::bad_format_args_captures!();
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
|
||||
= note: did you intend to capture a variable `x` from the surrounding scope?
|
||||
= note: to avoid ambiguity, `format_args!` cannot capture variables when the format string is expanded from a macro
|
||||
= note: this error originates in the macro `concat` (in Nightly builds, run with -Z macro-backtrace for more info)
|
||||
|
||||
error: aborting due to previous error
|
||||
|
10
tests/ui/fmt/format-args-capture-issue-106408.rs
Normal file
10
tests/ui/fmt/format-args-capture-issue-106408.rs
Normal file
@ -0,0 +1,10 @@
|
||||
// check-pass
|
||||
// aux-build:format-string-proc-macro.rs
|
||||
|
||||
extern crate format_string_proc_macro;
|
||||
|
||||
fn main() {
|
||||
// While literal macros like `format_args!(concat!())` are not supposed to work with implicit
|
||||
// captures, it should work if the whole invocation comes from a macro expansion (#106408).
|
||||
format_string_proc_macro::format_args_captures!();
|
||||
}
|
16
tests/ui/fmt/format-args-capture-macro-hygiene-pass.rs
Normal file
16
tests/ui/fmt/format-args-capture-macro-hygiene-pass.rs
Normal file
@ -0,0 +1,16 @@
|
||||
// run-pass
|
||||
|
||||
macro_rules! format_mbe {
|
||||
($tt:tt) => {
|
||||
{
|
||||
#[allow(unused_variables)]
|
||||
let a = 123;
|
||||
format!($tt)
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let a = 0;
|
||||
assert_eq!(format_mbe!("{a}"), "0");
|
||||
}
|
@ -1,15 +1,10 @@
|
||||
// aux-build:format-string-proc-macro.rs
|
||||
// check-fail
|
||||
// known-bug: #106191
|
||||
// unset-rustc-env:RUST_BACKTRACE
|
||||
// had to be reverted
|
||||
// error-pattern:unexpectedly panicked
|
||||
// failure-status:101
|
||||
// dont-check-compiler-stderr
|
||||
|
||||
extern crate format_string_proc_macro;
|
||||
|
||||
fn main() {
|
||||
format_string_proc_macro::respan_to_invalid_format_literal!("¡");
|
||||
//~^ ERROR invalid format string: expected `'}'` but string was terminated
|
||||
format_args!(r#concat!("¡ {"));
|
||||
//~^ ERROR invalid format string: expected `'}'` but string was terminated
|
||||
}
|
||||
|
@ -1,2 +1,19 @@
|
||||
query stack during panic:
|
||||
end of query stack
|
||||
error: invalid format string: expected `'}'` but string was terminated
|
||||
--> $DIR/respanned-literal-issue-106191.rs:6:65
|
||||
|
|
||||
LL | format_string_proc_macro::respan_to_invalid_format_literal!("¡");
|
||||
| ^^^ expected `'}'` in format string
|
||||
|
|
||||
= note: if you intended to print `{`, you can escape it using `{{`
|
||||
|
||||
error: invalid format string: expected `'}'` but string was terminated
|
||||
--> $DIR/respanned-literal-issue-106191.rs:8:18
|
||||
|
|
||||
LL | format_args!(r#concat!("¡ {"));
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^ expected `'}'` in format string
|
||||
|
|
||||
= note: if you intended to print `{`, you can escape it using `{{`
|
||||
= note: this error originates in the macro `concat` (in Nightly builds, run with -Z macro-backtrace for more info)
|
||||
|
||||
error: aborting due to 2 previous errors
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user