Auto merge of #13854 - lowr:fix/mbe-glue-punct, r=Veykril

Support multi-character punct tokens in MBE

Fixes #11497

In the context of MBE, consecutive puncts are parsed as multi-character punct tokens whenever possible. For example, `:::` is parsed as ``[Punct(`::`), Punct(`:`)]`` and shouldn't get matched to patterns like `: : :` or `: ::`.

We have implemented this behavior only for when we match puncts against `tt` fragments, but not when we match puncts literally. This PR extracts the multi-character punct handling procedure into a separate method and extends its support for literal matching.

For good measure, this PR adds support for `<-` token, which is still [considered as one token in rustc](e396186407/compiler/rustc_ast/src/token.rs (L249)) despite the placement syntax having been removed.
This commit is contained in:
bors 2023-01-02 13:07:42 +00:00
commit 790759fb11
6 changed files with 258 additions and 126 deletions

View File

@ -1630,3 +1630,48 @@ macro_rules! m {
"#]],
);
}
#[test]
fn test_punct_without_space() {
// Puncts are "glued" greedily.
check(
r#"
macro_rules! foo {
(: : :) => { "1 1 1" };
(: ::) => { "1 2" };
(:: :) => { "2 1" };
(: : : :) => { "1 1 1 1" };
(:: : :) => { "2 1 1" };
(: :: :) => { "1 2 1" };
(: : ::) => { "1 1 2" };
(:: ::) => { "2 2" };
}
fn test() {
foo!(:::);
foo!(: :::);
foo!(::::);
}
"#,
expect![[r#"
macro_rules! foo {
(: : :) => { "1 1 1" };
(: ::) => { "1 2" };
(:: :) => { "2 1" };
(: : : :) => { "1 1 1 1" };
(:: : :) => { "2 1 1" };
(: :: :) => { "1 2 1" };
(: : ::) => { "1 1 2" };
(:: ::) => { "2 2" };
}
fn test() {
"2 1";
"1 2 1";
"2 2";
}
"#]],
);
}

View File

@ -141,7 +141,13 @@ fn collect_from_op(op: &Op, parent: &mut tt::Subtree, seed: &mut usize) {
None => (),
Some(kind) => panic!("Unhandled kind {kind:?}"),
},
Op::Leaf(leaf) => parent.token_trees.push(leaf.clone().into()),
Op::Literal(it) => parent.token_trees.push(tt::Leaf::from(it.clone()).into()),
Op::Ident(it) => parent.token_trees.push(tt::Leaf::from(it.clone()).into()),
Op::Punct(puncts) => {
for punct in puncts {
parent.token_trees.push(tt::Leaf::from(punct.clone()).into());
}
}
Op::Repeat { tokens, kind, separator } => {
let max = 10;
let cnt = match kind {

View File

@ -68,7 +68,7 @@
expander::{Binding, Bindings, ExpandResult, Fragment},
parser::{MetaVarKind, Op, RepeatKind, Separator},
tt_iter::TtIter,
ExpandError, MetaTemplate,
ExpandError, MetaTemplate, ValueResult,
};
impl Bindings {
@ -321,8 +321,8 @@ struct MatchState<'t> {
/// The KleeneOp of this sequence if we are in a repetition.
sep_kind: Option<RepeatKind>,
/// Number of tokens of separator parsed
sep_parsed: Option<usize>,
/// Whether we already matched separator token.
sep_matched: bool,
/// Matched meta variables bindings
bindings: BindingsIdx,
@ -387,7 +387,7 @@ macro_rules! try_push {
None => {
// We are at or past the end of the matcher of `item`.
if let Some(up) = &item.up {
if item.sep_parsed.is_none() {
if !item.sep_matched {
// Get the `up` matcher
let mut new_pos = (**up).clone();
new_pos.bindings = bindings_builder.copy(&new_pos.bindings);
@ -401,14 +401,17 @@ macro_rules! try_push {
}
// Check if we need a separator.
// We check the separator one by one
let sep_idx = item.sep_parsed.unwrap_or(0);
let sep_len = item.sep.as_ref().map_or(0, Separator::tt_count);
if item.sep.is_some() && sep_idx != sep_len {
if item.sep.is_some() && !item.sep_matched {
let sep = item.sep.as_ref().unwrap();
if src.clone().expect_separator(sep, sep_idx) {
let mut fork = src.clone();
if fork.expect_separator(sep) {
// HACK: here we use `meta_result` to pass `TtIter` back to caller because
// it might have been advanced multiple times. `ValueResult` is
// insignificant.
item.meta_result = Some((fork, ValueResult::ok(None)));
item.dot.next();
item.sep_parsed = Some(sep_idx + 1);
// item.sep_parsed = Some(sep_len);
item.sep_matched = true;
try_push!(next_items, item);
}
}
@ -416,7 +419,7 @@ macro_rules! try_push {
// and try to match again UNLESS we are only allowed to have _one_ repetition.
else if item.sep_kind != Some(RepeatKind::ZeroOrOne) {
item.dot = item.dot.reset();
item.sep_parsed = None;
item.sep_matched = false;
bindings_builder.push_default(&mut item.bindings);
cur_items.push(item);
}
@ -451,7 +454,7 @@ macro_rules! try_push {
up: Some(Box::new(item)),
sep: separator.clone(),
sep_kind: Some(*kind),
sep_parsed: None,
sep_matched: false,
bindings: bindings_builder.alloc(),
meta_result: None,
is_error: false,
@ -500,18 +503,69 @@ macro_rules! try_push {
}
}
}
OpDelimited::Op(Op::Leaf(leaf)) => {
if let Err(err) = match_leaf(leaf, &mut src.clone()) {
res.add_err(err);
item.is_error = true;
OpDelimited::Op(Op::Literal(lhs)) => {
if let Ok(rhs) = src.clone().expect_leaf() {
if matches!(rhs, tt::Leaf::Literal(it) if it.text == lhs.text) {
item.dot.next();
} else {
res.add_err(ExpandError::UnexpectedToken);
item.is_error = true;
}
} else {
item.dot.next();
res.add_err(ExpandError::binding_error(format!("expected literal: `{lhs}`")));
item.is_error = true;
}
try_push!(next_items, item);
}
OpDelimited::Op(Op::Ident(lhs)) => {
if let Ok(rhs) = src.clone().expect_leaf() {
if matches!(rhs, tt::Leaf::Ident(it) if it.text == lhs.text) {
item.dot.next();
} else {
res.add_err(ExpandError::UnexpectedToken);
item.is_error = true;
}
} else {
res.add_err(ExpandError::binding_error(format!("expected ident: `{lhs}`")));
item.is_error = true;
}
try_push!(next_items, item);
}
OpDelimited::Op(Op::Punct(lhs)) => {
let mut fork = src.clone();
let error = if let Ok(rhs) = fork.expect_glued_punct() {
let first_is_single_quote = rhs[0].char == '\'';
let lhs = lhs.iter().map(|it| it.char);
let rhs = rhs.iter().map(|it| it.char);
if lhs.clone().eq(rhs) {
// HACK: here we use `meta_result` to pass `TtIter` back to caller because
// it might have been advanced multiple times. `ValueResult` is
// insignificant.
item.meta_result = Some((fork, ValueResult::ok(None)));
item.dot.next();
next_items.push(item);
continue;
}
if first_is_single_quote {
// If the first punct token is a single quote, that's a part of a lifetime
// ident, not a punct.
ExpandError::UnexpectedToken
} else {
let lhs: SmolStr = lhs.collect();
ExpandError::binding_error(format!("expected punct: `{lhs}`"))
}
} else {
ExpandError::UnexpectedToken
};
res.add_err(error);
item.is_error = true;
error_items.push(item);
}
OpDelimited::Op(Op::Ignore { .. } | Op::Index { .. }) => {}
OpDelimited::Open => {
if matches!(src.clone().next(), Some(tt::TokenTree::Subtree(..))) {
if matches!(src.peek_n(0), Some(tt::TokenTree::Subtree(..))) {
item.dot.next();
try_push!(next_items, item);
}
@ -541,7 +595,7 @@ fn match_loop(pattern: &MetaTemplate, src: &tt::Subtree) -> Match {
up: None,
sep: None,
sep_kind: None,
sep_parsed: None,
sep_matched: false,
bindings: bindings_builder.alloc(),
is_error: false,
meta_result: None,
@ -616,21 +670,33 @@ fn match_loop(pattern: &MetaTemplate, src: &tt::Subtree) -> Match {
}
// Dump all possible `next_items` into `cur_items` for the next iteration.
else if !next_items.is_empty() {
if let Some((iter, _)) = next_items[0].meta_result.take() {
// We've matched a possibly "glued" punct. The matched punct (hence
// `meta_result` also) must be the same for all items.
// FIXME: If there are multiple items, it's definitely redundant (and it's hacky!
// `meta_result` isn't supposed to be used this way).
// We already bumped, so no need to call `.next()` like in the other branch.
src = iter;
for item in next_items.iter_mut() {
item.meta_result = None;
}
} else {
match src.next() {
Some(tt::TokenTree::Subtree(subtree)) => {
stack.push(src.clone());
src = TtIter::new(subtree);
}
None => {
if let Some(iter) = stack.pop() {
src = iter;
}
}
_ => (),
}
}
// Now process the next token
cur_items.extend(next_items.drain(..));
match src.next() {
Some(tt::TokenTree::Subtree(subtree)) => {
stack.push(src.clone());
src = TtIter::new(subtree);
}
None => {
if let Some(iter) = stack.pop() {
src = iter;
}
}
_ => (),
}
}
// Finally, we have the case where we need to call the black-box parser to get some
// nonterminal.
@ -663,27 +729,6 @@ fn match_loop(pattern: &MetaTemplate, src: &tt::Subtree) -> Match {
}
}
fn match_leaf(lhs: &tt::Leaf, src: &mut TtIter<'_>) -> Result<(), ExpandError> {
let rhs = src
.expect_leaf()
.map_err(|()| ExpandError::binding_error(format!("expected leaf: `{lhs}`")))?;
match (lhs, rhs) {
(
tt::Leaf::Punct(tt::Punct { char: lhs, .. }),
tt::Leaf::Punct(tt::Punct { char: rhs, .. }),
) if lhs == rhs => Ok(()),
(
tt::Leaf::Ident(tt::Ident { text: lhs, .. }),
tt::Leaf::Ident(tt::Ident { text: rhs, .. }),
) if lhs == rhs => Ok(()),
(
tt::Leaf::Literal(tt::Literal { text: lhs, .. }),
tt::Leaf::Literal(tt::Literal { text: rhs, .. }),
) if lhs == rhs => Ok(()),
_ => Err(ExpandError::UnexpectedToken),
}
}
fn match_meta_var(kind: MetaVarKind, input: &mut TtIter<'_>) -> ExpandResult<Option<Fragment>> {
let fragment = match kind {
MetaVarKind::Path => parser::PrefixEntryPoint::Path,
@ -756,10 +801,10 @@ fn collect_vars(collector_fun: &mut impl FnMut(SmolStr), pattern: &MetaTemplate)
for op in pattern.iter() {
match op {
Op::Var { name, .. } => collector_fun(name.clone()),
Op::Leaf(_) => (),
Op::Subtree { tokens, .. } => collect_vars(collector_fun, tokens),
Op::Repeat { tokens, .. } => collect_vars(collector_fun, tokens),
Op::Ignore { .. } | Op::Index { .. } => {}
Op::Ignore { .. } | Op::Index { .. } | Op::Literal(_) | Op::Ident(_) | Op::Punct(_) => {
}
}
}
}
@ -822,14 +867,14 @@ fn size_hint(&self) -> (usize, Option<usize>) {
}
impl<'a> TtIter<'a> {
fn expect_separator(&mut self, separator: &Separator, idx: usize) -> bool {
fn expect_separator(&mut self, separator: &Separator) -> bool {
let mut fork = self.clone();
let ok = match separator {
Separator::Ident(lhs) if idx == 0 => match fork.expect_ident_or_underscore() {
Separator::Ident(lhs) => match fork.expect_ident_or_underscore() {
Ok(rhs) => rhs.text == lhs.text,
Err(_) => false,
},
Separator::Literal(lhs) if idx == 0 => match fork.expect_literal() {
Separator::Literal(lhs) => match fork.expect_literal() {
Ok(rhs) => match rhs {
tt::Leaf::Literal(rhs) => rhs.text == lhs.text,
tt::Leaf::Ident(rhs) => rhs.text == lhs.text,
@ -837,11 +882,14 @@ fn expect_separator(&mut self, separator: &Separator, idx: usize) -> bool {
},
Err(_) => false,
},
Separator::Puncts(lhss) if idx < lhss.len() => match fork.expect_punct() {
Ok(rhs) => rhs.char == lhss[idx].char,
Separator::Puncts(lhs) => match fork.expect_glued_punct() {
Ok(rhs) => {
let lhs = lhs.iter().map(|it| it.char);
let rhs = rhs.iter().map(|it| it.char);
lhs.eq(rhs)
}
Err(_) => false,
},
_ => false,
};
if ok {
*self = fork;
@ -850,52 +898,21 @@ fn expect_separator(&mut self, separator: &Separator, idx: usize) -> bool {
}
fn expect_tt(&mut self) -> Result<tt::TokenTree, ()> {
match self.peek_n(0) {
Some(tt::TokenTree::Leaf(tt::Leaf::Punct(punct))) if punct.char == '\'' => {
return self.expect_lifetime();
if let Some(tt::TokenTree::Leaf(tt::Leaf::Punct(punct))) = self.peek_n(0) {
if punct.char == '\'' {
self.expect_lifetime()
} else {
let puncts = self.expect_glued_punct()?;
let token_trees = puncts.into_iter().map(|p| tt::Leaf::Punct(p).into()).collect();
Ok(tt::TokenTree::Subtree(tt::Subtree { delimiter: None, token_trees }))
}
_ => (),
}
let tt = self.next().ok_or(())?.clone();
let punct = match tt {
tt::TokenTree::Leaf(tt::Leaf::Punct(punct)) if punct.spacing == tt::Spacing::Joint => {
punct
}
_ => return Ok(tt),
};
let (second, third) = match (self.peek_n(0), self.peek_n(1)) {
(
Some(tt::TokenTree::Leaf(tt::Leaf::Punct(p2))),
Some(tt::TokenTree::Leaf(tt::Leaf::Punct(p3))),
) if p2.spacing == tt::Spacing::Joint => (p2.char, Some(p3.char)),
(Some(tt::TokenTree::Leaf(tt::Leaf::Punct(p2))), _) => (p2.char, None),
_ => return Ok(tt),
};
match (punct.char, second, third) {
('.', '.', Some('.' | '=')) | ('<', '<', Some('=')) | ('>', '>', Some('=')) => {
let tt2 = self.next().unwrap().clone();
let tt3 = self.next().unwrap().clone();
Ok(tt::Subtree { delimiter: None, token_trees: vec![tt, tt2, tt3] }.into())
}
('-' | '!' | '*' | '/' | '&' | '%' | '^' | '+' | '<' | '=' | '>' | '|', '=', _)
| ('-' | '=' | '>', '>', _)
| (':', ':', _)
| ('.', '.', _)
| ('&', '&', _)
| ('<', '<', _)
| ('|', '|', _) => {
let tt2 = self.next().unwrap().clone();
Ok(tt::Subtree { delimiter: None, token_trees: vec![tt, tt2] }.into())
}
_ => Ok(tt),
} else {
self.next().ok_or(()).cloned()
}
}
fn expect_lifetime(&mut self) -> Result<tt::TokenTree, ()> {
let punct = self.expect_punct()?;
let punct = self.expect_single_punct()?;
if punct.char != '\'' {
return Err(());
}

View File

@ -134,7 +134,13 @@ fn expand_subtree(
let mut err = None;
for op in template.iter() {
match op {
Op::Leaf(tt) => arena.push(tt.clone().into()),
Op::Literal(it) => arena.push(tt::Leaf::from(it.clone()).into()),
Op::Ident(it) => arena.push(tt::Leaf::from(it.clone()).into()),
Op::Punct(puncts) => {
for punct in puncts {
arena.push(tt::Leaf::from(punct.clone()).into());
}
}
Op::Subtree { tokens, delimiter } => {
let ExpandResult { value: tt, err: e } =
expand_subtree(ctx, tokens, *delimiter, arena);

View File

@ -1,7 +1,7 @@
//! Parser recognizes special macro syntax, `$var` and `$(repeat)*`, in token
//! trees.
use smallvec::SmallVec;
use smallvec::{smallvec, SmallVec};
use syntax::SmolStr;
use crate::{tt_iter::TtIter, ParseError};
@ -39,7 +39,7 @@ fn parse(tt: &tt::Subtree, mode: Mode) -> Result<MetaTemplate, ParseError> {
let mut src = TtIter::new(tt);
let mut res = Vec::new();
while let Some(first) = src.next() {
while let Some(first) = src.peek_n(0) {
let op = next_op(first, &mut src, mode)?;
res.push(op);
}
@ -54,8 +54,10 @@ pub(crate) enum Op {
Ignore { name: SmolStr, id: tt::TokenId },
Index { depth: u32 },
Repeat { tokens: MetaTemplate, kind: RepeatKind, separator: Option<Separator> },
Leaf(tt::Leaf),
Subtree { tokens: MetaTemplate, delimiter: Option<tt::Delimiter> },
Literal(tt::Literal),
Punct(SmallVec<[tt::Punct; 3]>),
Ident(tt::Ident),
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
@ -108,28 +110,23 @@ fn eq(&self, other: &Separator) -> bool {
}
}
impl Separator {
pub(crate) fn tt_count(&self) -> usize {
match self {
Separator::Literal(_) => 1,
Separator::Ident(_) => 1,
Separator::Puncts(it) => it.len(),
}
}
}
#[derive(Clone, Copy)]
enum Mode {
Pattern,
Template,
}
fn next_op<'a>(first: &tt::TokenTree, src: &mut TtIter<'a>, mode: Mode) -> Result<Op, ParseError> {
let res = match first {
tt::TokenTree::Leaf(leaf @ tt::Leaf::Punct(tt::Punct { char: '$', .. })) => {
fn next_op<'a>(
first_peeked: &tt::TokenTree,
src: &mut TtIter<'a>,
mode: Mode,
) -> Result<Op, ParseError> {
let res = match first_peeked {
tt::TokenTree::Leaf(tt::Leaf::Punct(p @ tt::Punct { char: '$', .. })) => {
src.next().expect("first token already peeked");
// Note that the '$' itself is a valid token inside macro_rules.
let second = match src.next() {
None => return Ok(Op::Leaf(leaf.clone())),
None => return Ok(Op::Punct(smallvec![p.clone()])),
Some(it) => it,
};
match second {
@ -160,7 +157,7 @@ fn next_op<'a>(first: &tt::TokenTree, src: &mut TtIter<'a>, mode: Mode) -> Resul
tt::TokenTree::Leaf(leaf) => match leaf {
tt::Leaf::Ident(ident) if ident.text == "crate" => {
// We simply produce identifier `$crate` here. And it will be resolved when lowering ast to Path.
Op::Leaf(tt::Leaf::from(tt::Ident { text: "$crate".into(), id: ident.id }))
Op::Ident(tt::Ident { text: "$crate".into(), id: ident.id })
}
tt::Leaf::Ident(ident) => {
let kind = eat_fragment_kind(src, mode)?;
@ -180,7 +177,7 @@ fn next_op<'a>(first: &tt::TokenTree, src: &mut TtIter<'a>, mode: Mode) -> Resul
"`$$` is not allowed on the pattern side",
))
}
Mode::Template => Op::Leaf(tt::Leaf::Punct(*punct)),
Mode::Template => Op::Punct(smallvec![*punct]),
},
tt::Leaf::Punct(_) | tt::Leaf::Literal(_) => {
return Err(ParseError::expected("expected ident"))
@ -188,8 +185,25 @@ fn next_op<'a>(first: &tt::TokenTree, src: &mut TtIter<'a>, mode: Mode) -> Resul
},
}
}
tt::TokenTree::Leaf(tt) => Op::Leaf(tt.clone()),
tt::TokenTree::Leaf(tt::Leaf::Literal(it)) => {
src.next().expect("first token already peeked");
Op::Literal(it.clone())
}
tt::TokenTree::Leaf(tt::Leaf::Ident(it)) => {
src.next().expect("first token already peeked");
Op::Ident(it.clone())
}
tt::TokenTree::Leaf(tt::Leaf::Punct(_)) => {
// There's at least one punct so this shouldn't fail.
let puncts = src.expect_glued_punct().unwrap();
Op::Punct(puncts)
}
tt::TokenTree::Subtree(subtree) => {
src.next().expect("first token already peeked");
let tokens = MetaTemplate::parse(subtree, mode)?;
Op::Subtree { tokens, delimiter: subtree.delimiter }
}

View File

@ -1,6 +1,7 @@
//! A "Parser" structure for token trees. We use this when parsing a declarative
//! macro definition into a list of patterns and templates.
use smallvec::{smallvec, SmallVec};
use syntax::SyntaxKind;
use tt::buffer::TokenBuffer;
@ -80,13 +81,56 @@ pub(crate) fn expect_u32_literal(&mut self) -> Result<u32, ()> {
}
}
pub(crate) fn expect_punct(&mut self) -> Result<&'a tt::Punct, ()> {
pub(crate) fn expect_single_punct(&mut self) -> Result<&'a tt::Punct, ()> {
match self.expect_leaf()? {
tt::Leaf::Punct(it) => Ok(it),
_ => Err(()),
}
}
/// Returns consecutive `Punct`s that can be glued together.
///
/// This method currently may return a single quotation, which is part of lifetime ident and
/// conceptually not a punct in the context of mbe. Callers should handle this.
pub(crate) fn expect_glued_punct(&mut self) -> Result<SmallVec<[tt::Punct; 3]>, ()> {
let tt::TokenTree::Leaf(tt::Leaf::Punct(first)) = self.next().ok_or(())?.clone() else {
return Err(());
};
if first.spacing == tt::Spacing::Alone {
return Ok(smallvec![first]);
}
let (second, third) = match (self.peek_n(0), self.peek_n(1)) {
(
Some(tt::TokenTree::Leaf(tt::Leaf::Punct(p2))),
Some(tt::TokenTree::Leaf(tt::Leaf::Punct(p3))),
) if p2.spacing == tt::Spacing::Joint => (p2, Some(p3)),
(Some(tt::TokenTree::Leaf(tt::Leaf::Punct(p2))), _) => (p2, None),
_ => return Ok(smallvec![first]),
};
match (first.char, second.char, third.map(|it| it.char)) {
('.', '.', Some('.' | '=')) | ('<', '<', Some('=')) | ('>', '>', Some('=')) => {
let _ = self.next().unwrap();
let _ = self.next().unwrap();
Ok(smallvec![first, second.clone(), third.unwrap().clone()])
}
('-' | '!' | '*' | '/' | '&' | '%' | '^' | '+' | '<' | '=' | '>' | '|', '=', _)
| ('-' | '=' | '>', '>', _)
| ('<', '-', _)
| (':', ':', _)
| ('.', '.', _)
| ('&', '&', _)
| ('<', '<', _)
| ('|', '|', _) => {
let _ = self.next().unwrap();
Ok(smallvec![first, second.clone()])
}
_ => Ok(smallvec![first]),
}
}
pub(crate) fn expect_fragment(
&mut self,
entry_point: parser::PrefixEntryPoint,
@ -141,7 +185,7 @@ pub(crate) fn expect_fragment(
ExpandResult { value: res, err }
}
pub(crate) fn peek_n(&self, n: usize) -> Option<&tt::TokenTree> {
pub(crate) fn peek_n(&self, n: usize) -> Option<&'a tt::TokenTree> {
self.inner.as_slice().get(n)
}
}