diff --git a/crates/hir-def/src/macro_expansion_tests/mbe.rs b/crates/hir-def/src/macro_expansion_tests/mbe.rs index 457e43925c6..2d5f2a692e5 100644 --- a/crates/hir-def/src/macro_expansion_tests/mbe.rs +++ b/crates/hir-def/src/macro_expansion_tests/mbe.rs @@ -1630,3 +1630,48 @@ macro_rules! m { "#]], ); } + +#[test] +fn test_punct_without_space() { + // Puncts are "glued" greedily. + check( + r#" +macro_rules! foo { + (: : :) => { "1 1 1" }; + (: ::) => { "1 2" }; + (:: :) => { "2 1" }; + + (: : : :) => { "1 1 1 1" }; + (:: : :) => { "2 1 1" }; + (: :: :) => { "1 2 1" }; + (: : ::) => { "1 1 2" }; + (:: ::) => { "2 2" }; +} + +fn test() { + foo!(:::); + foo!(: :::); + foo!(::::); +} +"#, + expect![[r#" +macro_rules! foo { + (: : :) => { "1 1 1" }; + (: ::) => { "1 2" }; + (:: :) => { "2 1" }; + + (: : : :) => { "1 1 1 1" }; + (:: : :) => { "2 1 1" }; + (: :: :) => { "1 2 1" }; + (: : ::) => { "1 1 2" }; + (:: ::) => { "2 2" }; +} + +fn test() { + "2 1"; + "1 2 1"; + "2 2"; +} +"#]], + ); +} diff --git a/crates/mbe/src/benchmark.rs b/crates/mbe/src/benchmark.rs index 1915c0b6611..4b750025018 100644 --- a/crates/mbe/src/benchmark.rs +++ b/crates/mbe/src/benchmark.rs @@ -141,7 +141,13 @@ fn collect_from_op(op: &Op, parent: &mut tt::Subtree, seed: &mut usize) { None => (), Some(kind) => panic!("Unhandled kind {kind:?}"), }, - Op::Leaf(leaf) => parent.token_trees.push(leaf.clone().into()), + Op::Literal(it) => parent.token_trees.push(tt::Leaf::from(it.clone()).into()), + Op::Ident(it) => parent.token_trees.push(tt::Leaf::from(it.clone()).into()), + Op::Punct(puncts) => { + for punct in puncts { + parent.token_trees.push(tt::Leaf::from(punct.clone()).into()); + } + } Op::Repeat { tokens, kind, separator } => { let max = 10; let cnt = match kind { diff --git a/crates/mbe/src/expander/matcher.rs b/crates/mbe/src/expander/matcher.rs index b6c5c1026ee..88eae136f73 100644 --- a/crates/mbe/src/expander/matcher.rs +++ b/crates/mbe/src/expander/matcher.rs @@ -68,7 +68,7 @@ expander::{Binding, Bindings, ExpandResult, Fragment}, parser::{MetaVarKind, Op, RepeatKind, Separator}, tt_iter::TtIter, - ExpandError, MetaTemplate, + ExpandError, MetaTemplate, ValueResult, }; impl Bindings { @@ -321,8 +321,8 @@ struct MatchState<'t> { /// The KleeneOp of this sequence if we are in a repetition. sep_kind: Option, - /// Number of tokens of separator parsed - sep_parsed: Option, + /// Whether we already matched separator token. + sep_matched: bool, /// Matched meta variables bindings bindings: BindingsIdx, @@ -387,7 +387,7 @@ macro_rules! try_push { None => { // We are at or past the end of the matcher of `item`. if let Some(up) = &item.up { - if item.sep_parsed.is_none() { + if !item.sep_matched { // Get the `up` matcher let mut new_pos = (**up).clone(); new_pos.bindings = bindings_builder.copy(&new_pos.bindings); @@ -401,14 +401,17 @@ macro_rules! try_push { } // Check if we need a separator. - // We check the separator one by one - let sep_idx = item.sep_parsed.unwrap_or(0); - let sep_len = item.sep.as_ref().map_or(0, Separator::tt_count); - if item.sep.is_some() && sep_idx != sep_len { + if item.sep.is_some() && !item.sep_matched { let sep = item.sep.as_ref().unwrap(); - if src.clone().expect_separator(sep, sep_idx) { + let mut fork = src.clone(); + if fork.expect_separator(sep) { + // HACK: here we use `meta_result` to pass `TtIter` back to caller because + // it might have been advanced multiple times. `ValueResult` is + // insignificant. + item.meta_result = Some((fork, ValueResult::ok(None))); item.dot.next(); - item.sep_parsed = Some(sep_idx + 1); + // item.sep_parsed = Some(sep_len); + item.sep_matched = true; try_push!(next_items, item); } } @@ -416,7 +419,7 @@ macro_rules! try_push { // and try to match again UNLESS we are only allowed to have _one_ repetition. else if item.sep_kind != Some(RepeatKind::ZeroOrOne) { item.dot = item.dot.reset(); - item.sep_parsed = None; + item.sep_matched = false; bindings_builder.push_default(&mut item.bindings); cur_items.push(item); } @@ -451,7 +454,7 @@ macro_rules! try_push { up: Some(Box::new(item)), sep: separator.clone(), sep_kind: Some(*kind), - sep_parsed: None, + sep_matched: false, bindings: bindings_builder.alloc(), meta_result: None, is_error: false, @@ -500,18 +503,69 @@ macro_rules! try_push { } } } - OpDelimited::Op(Op::Leaf(leaf)) => { - if let Err(err) = match_leaf(leaf, &mut src.clone()) { - res.add_err(err); - item.is_error = true; + OpDelimited::Op(Op::Literal(lhs)) => { + if let Ok(rhs) = src.clone().expect_leaf() { + if matches!(rhs, tt::Leaf::Literal(it) if it.text == lhs.text) { + item.dot.next(); + } else { + res.add_err(ExpandError::UnexpectedToken); + item.is_error = true; + } } else { - item.dot.next(); + res.add_err(ExpandError::binding_error(format!("expected literal: `{lhs}`"))); + item.is_error = true; } try_push!(next_items, item); } + OpDelimited::Op(Op::Ident(lhs)) => { + if let Ok(rhs) = src.clone().expect_leaf() { + if matches!(rhs, tt::Leaf::Ident(it) if it.text == lhs.text) { + item.dot.next(); + } else { + res.add_err(ExpandError::UnexpectedToken); + item.is_error = true; + } + } else { + res.add_err(ExpandError::binding_error(format!("expected ident: `{lhs}`"))); + item.is_error = true; + } + try_push!(next_items, item); + } + OpDelimited::Op(Op::Punct(lhs)) => { + let mut fork = src.clone(); + let error = if let Ok(rhs) = fork.expect_glued_punct() { + let first_is_single_quote = rhs[0].char == '\''; + let lhs = lhs.iter().map(|it| it.char); + let rhs = rhs.iter().map(|it| it.char); + if lhs.clone().eq(rhs) { + // HACK: here we use `meta_result` to pass `TtIter` back to caller because + // it might have been advanced multiple times. `ValueResult` is + // insignificant. + item.meta_result = Some((fork, ValueResult::ok(None))); + item.dot.next(); + next_items.push(item); + continue; + } + + if first_is_single_quote { + // If the first punct token is a single quote, that's a part of a lifetime + // ident, not a punct. + ExpandError::UnexpectedToken + } else { + let lhs: SmolStr = lhs.collect(); + ExpandError::binding_error(format!("expected punct: `{lhs}`")) + } + } else { + ExpandError::UnexpectedToken + }; + + res.add_err(error); + item.is_error = true; + error_items.push(item); + } OpDelimited::Op(Op::Ignore { .. } | Op::Index { .. }) => {} OpDelimited::Open => { - if matches!(src.clone().next(), Some(tt::TokenTree::Subtree(..))) { + if matches!(src.peek_n(0), Some(tt::TokenTree::Subtree(..))) { item.dot.next(); try_push!(next_items, item); } @@ -541,7 +595,7 @@ fn match_loop(pattern: &MetaTemplate, src: &tt::Subtree) -> Match { up: None, sep: None, sep_kind: None, - sep_parsed: None, + sep_matched: false, bindings: bindings_builder.alloc(), is_error: false, meta_result: None, @@ -616,21 +670,33 @@ fn match_loop(pattern: &MetaTemplate, src: &tt::Subtree) -> Match { } // Dump all possible `next_items` into `cur_items` for the next iteration. else if !next_items.is_empty() { + if let Some((iter, _)) = next_items[0].meta_result.take() { + // We've matched a possibly "glued" punct. The matched punct (hence + // `meta_result` also) must be the same for all items. + // FIXME: If there are multiple items, it's definitely redundant (and it's hacky! + // `meta_result` isn't supposed to be used this way). + + // We already bumped, so no need to call `.next()` like in the other branch. + src = iter; + for item in next_items.iter_mut() { + item.meta_result = None; + } + } else { + match src.next() { + Some(tt::TokenTree::Subtree(subtree)) => { + stack.push(src.clone()); + src = TtIter::new(subtree); + } + None => { + if let Some(iter) = stack.pop() { + src = iter; + } + } + _ => (), + } + } // Now process the next token cur_items.extend(next_items.drain(..)); - - match src.next() { - Some(tt::TokenTree::Subtree(subtree)) => { - stack.push(src.clone()); - src = TtIter::new(subtree); - } - None => { - if let Some(iter) = stack.pop() { - src = iter; - } - } - _ => (), - } } // Finally, we have the case where we need to call the black-box parser to get some // nonterminal. @@ -663,27 +729,6 @@ fn match_loop(pattern: &MetaTemplate, src: &tt::Subtree) -> Match { } } -fn match_leaf(lhs: &tt::Leaf, src: &mut TtIter<'_>) -> Result<(), ExpandError> { - let rhs = src - .expect_leaf() - .map_err(|()| ExpandError::binding_error(format!("expected leaf: `{lhs}`")))?; - match (lhs, rhs) { - ( - tt::Leaf::Punct(tt::Punct { char: lhs, .. }), - tt::Leaf::Punct(tt::Punct { char: rhs, .. }), - ) if lhs == rhs => Ok(()), - ( - tt::Leaf::Ident(tt::Ident { text: lhs, .. }), - tt::Leaf::Ident(tt::Ident { text: rhs, .. }), - ) if lhs == rhs => Ok(()), - ( - tt::Leaf::Literal(tt::Literal { text: lhs, .. }), - tt::Leaf::Literal(tt::Literal { text: rhs, .. }), - ) if lhs == rhs => Ok(()), - _ => Err(ExpandError::UnexpectedToken), - } -} - fn match_meta_var(kind: MetaVarKind, input: &mut TtIter<'_>) -> ExpandResult> { let fragment = match kind { MetaVarKind::Path => parser::PrefixEntryPoint::Path, @@ -756,10 +801,10 @@ fn collect_vars(collector_fun: &mut impl FnMut(SmolStr), pattern: &MetaTemplate) for op in pattern.iter() { match op { Op::Var { name, .. } => collector_fun(name.clone()), - Op::Leaf(_) => (), Op::Subtree { tokens, .. } => collect_vars(collector_fun, tokens), Op::Repeat { tokens, .. } => collect_vars(collector_fun, tokens), - Op::Ignore { .. } | Op::Index { .. } => {} + Op::Ignore { .. } | Op::Index { .. } | Op::Literal(_) | Op::Ident(_) | Op::Punct(_) => { + } } } } @@ -822,14 +867,14 @@ fn size_hint(&self) -> (usize, Option) { } impl<'a> TtIter<'a> { - fn expect_separator(&mut self, separator: &Separator, idx: usize) -> bool { + fn expect_separator(&mut self, separator: &Separator) -> bool { let mut fork = self.clone(); let ok = match separator { - Separator::Ident(lhs) if idx == 0 => match fork.expect_ident_or_underscore() { + Separator::Ident(lhs) => match fork.expect_ident_or_underscore() { Ok(rhs) => rhs.text == lhs.text, Err(_) => false, }, - Separator::Literal(lhs) if idx == 0 => match fork.expect_literal() { + Separator::Literal(lhs) => match fork.expect_literal() { Ok(rhs) => match rhs { tt::Leaf::Literal(rhs) => rhs.text == lhs.text, tt::Leaf::Ident(rhs) => rhs.text == lhs.text, @@ -837,11 +882,14 @@ fn expect_separator(&mut self, separator: &Separator, idx: usize) -> bool { }, Err(_) => false, }, - Separator::Puncts(lhss) if idx < lhss.len() => match fork.expect_punct() { - Ok(rhs) => rhs.char == lhss[idx].char, + Separator::Puncts(lhs) => match fork.expect_glued_punct() { + Ok(rhs) => { + let lhs = lhs.iter().map(|it| it.char); + let rhs = rhs.iter().map(|it| it.char); + lhs.eq(rhs) + } Err(_) => false, }, - _ => false, }; if ok { *self = fork; @@ -850,52 +898,21 @@ fn expect_separator(&mut self, separator: &Separator, idx: usize) -> bool { } fn expect_tt(&mut self) -> Result { - match self.peek_n(0) { - Some(tt::TokenTree::Leaf(tt::Leaf::Punct(punct))) if punct.char == '\'' => { - return self.expect_lifetime(); + if let Some(tt::TokenTree::Leaf(tt::Leaf::Punct(punct))) = self.peek_n(0) { + if punct.char == '\'' { + self.expect_lifetime() + } else { + let puncts = self.expect_glued_punct()?; + let token_trees = puncts.into_iter().map(|p| tt::Leaf::Punct(p).into()).collect(); + Ok(tt::TokenTree::Subtree(tt::Subtree { delimiter: None, token_trees })) } - _ => (), - } - - let tt = self.next().ok_or(())?.clone(); - let punct = match tt { - tt::TokenTree::Leaf(tt::Leaf::Punct(punct)) if punct.spacing == tt::Spacing::Joint => { - punct - } - _ => return Ok(tt), - }; - - let (second, third) = match (self.peek_n(0), self.peek_n(1)) { - ( - Some(tt::TokenTree::Leaf(tt::Leaf::Punct(p2))), - Some(tt::TokenTree::Leaf(tt::Leaf::Punct(p3))), - ) if p2.spacing == tt::Spacing::Joint => (p2.char, Some(p3.char)), - (Some(tt::TokenTree::Leaf(tt::Leaf::Punct(p2))), _) => (p2.char, None), - _ => return Ok(tt), - }; - - match (punct.char, second, third) { - ('.', '.', Some('.' | '=')) | ('<', '<', Some('=')) | ('>', '>', Some('=')) => { - let tt2 = self.next().unwrap().clone(); - let tt3 = self.next().unwrap().clone(); - Ok(tt::Subtree { delimiter: None, token_trees: vec![tt, tt2, tt3] }.into()) - } - ('-' | '!' | '*' | '/' | '&' | '%' | '^' | '+' | '<' | '=' | '>' | '|', '=', _) - | ('-' | '=' | '>', '>', _) - | (':', ':', _) - | ('.', '.', _) - | ('&', '&', _) - | ('<', '<', _) - | ('|', '|', _) => { - let tt2 = self.next().unwrap().clone(); - Ok(tt::Subtree { delimiter: None, token_trees: vec![tt, tt2] }.into()) - } - _ => Ok(tt), + } else { + self.next().ok_or(()).cloned() } } fn expect_lifetime(&mut self) -> Result { - let punct = self.expect_punct()?; + let punct = self.expect_single_punct()?; if punct.char != '\'' { return Err(()); } diff --git a/crates/mbe/src/expander/transcriber.rs b/crates/mbe/src/expander/transcriber.rs index cbb59ab8e67..db0d327bf40 100644 --- a/crates/mbe/src/expander/transcriber.rs +++ b/crates/mbe/src/expander/transcriber.rs @@ -134,7 +134,13 @@ fn expand_subtree( let mut err = None; for op in template.iter() { match op { - Op::Leaf(tt) => arena.push(tt.clone().into()), + Op::Literal(it) => arena.push(tt::Leaf::from(it.clone()).into()), + Op::Ident(it) => arena.push(tt::Leaf::from(it.clone()).into()), + Op::Punct(puncts) => { + for punct in puncts { + arena.push(tt::Leaf::from(punct.clone()).into()); + } + } Op::Subtree { tokens, delimiter } => { let ExpandResult { value: tt, err: e } = expand_subtree(ctx, tokens, *delimiter, arena); diff --git a/crates/mbe/src/parser.rs b/crates/mbe/src/parser.rs index 351c359b73c..3d9a61dbc86 100644 --- a/crates/mbe/src/parser.rs +++ b/crates/mbe/src/parser.rs @@ -1,7 +1,7 @@ //! Parser recognizes special macro syntax, `$var` and `$(repeat)*`, in token //! trees. -use smallvec::SmallVec; +use smallvec::{smallvec, SmallVec}; use syntax::SmolStr; use crate::{tt_iter::TtIter, ParseError}; @@ -39,7 +39,7 @@ fn parse(tt: &tt::Subtree, mode: Mode) -> Result { let mut src = TtIter::new(tt); let mut res = Vec::new(); - while let Some(first) = src.next() { + while let Some(first) = src.peek_n(0) { let op = next_op(first, &mut src, mode)?; res.push(op); } @@ -54,8 +54,10 @@ pub(crate) enum Op { Ignore { name: SmolStr, id: tt::TokenId }, Index { depth: u32 }, Repeat { tokens: MetaTemplate, kind: RepeatKind, separator: Option }, - Leaf(tt::Leaf), Subtree { tokens: MetaTemplate, delimiter: Option }, + Literal(tt::Literal), + Punct(SmallVec<[tt::Punct; 3]>), + Ident(tt::Ident), } #[derive(Copy, Clone, Debug, PartialEq, Eq)] @@ -108,28 +110,23 @@ fn eq(&self, other: &Separator) -> bool { } } -impl Separator { - pub(crate) fn tt_count(&self) -> usize { - match self { - Separator::Literal(_) => 1, - Separator::Ident(_) => 1, - Separator::Puncts(it) => it.len(), - } - } -} - #[derive(Clone, Copy)] enum Mode { Pattern, Template, } -fn next_op<'a>(first: &tt::TokenTree, src: &mut TtIter<'a>, mode: Mode) -> Result { - let res = match first { - tt::TokenTree::Leaf(leaf @ tt::Leaf::Punct(tt::Punct { char: '$', .. })) => { +fn next_op<'a>( + first_peeked: &tt::TokenTree, + src: &mut TtIter<'a>, + mode: Mode, +) -> Result { + let res = match first_peeked { + tt::TokenTree::Leaf(tt::Leaf::Punct(p @ tt::Punct { char: '$', .. })) => { + src.next().expect("first token already peeked"); // Note that the '$' itself is a valid token inside macro_rules. let second = match src.next() { - None => return Ok(Op::Leaf(leaf.clone())), + None => return Ok(Op::Punct(smallvec![p.clone()])), Some(it) => it, }; match second { @@ -160,7 +157,7 @@ fn next_op<'a>(first: &tt::TokenTree, src: &mut TtIter<'a>, mode: Mode) -> Resul tt::TokenTree::Leaf(leaf) => match leaf { tt::Leaf::Ident(ident) if ident.text == "crate" => { // We simply produce identifier `$crate` here. And it will be resolved when lowering ast to Path. - Op::Leaf(tt::Leaf::from(tt::Ident { text: "$crate".into(), id: ident.id })) + Op::Ident(tt::Ident { text: "$crate".into(), id: ident.id }) } tt::Leaf::Ident(ident) => { let kind = eat_fragment_kind(src, mode)?; @@ -180,7 +177,7 @@ fn next_op<'a>(first: &tt::TokenTree, src: &mut TtIter<'a>, mode: Mode) -> Resul "`$$` is not allowed on the pattern side", )) } - Mode::Template => Op::Leaf(tt::Leaf::Punct(*punct)), + Mode::Template => Op::Punct(smallvec![*punct]), }, tt::Leaf::Punct(_) | tt::Leaf::Literal(_) => { return Err(ParseError::expected("expected ident")) @@ -188,8 +185,25 @@ fn next_op<'a>(first: &tt::TokenTree, src: &mut TtIter<'a>, mode: Mode) -> Resul }, } } - tt::TokenTree::Leaf(tt) => Op::Leaf(tt.clone()), + + tt::TokenTree::Leaf(tt::Leaf::Literal(it)) => { + src.next().expect("first token already peeked"); + Op::Literal(it.clone()) + } + + tt::TokenTree::Leaf(tt::Leaf::Ident(it)) => { + src.next().expect("first token already peeked"); + Op::Ident(it.clone()) + } + + tt::TokenTree::Leaf(tt::Leaf::Punct(_)) => { + // There's at least one punct so this shouldn't fail. + let puncts = src.expect_glued_punct().unwrap(); + Op::Punct(puncts) + } + tt::TokenTree::Subtree(subtree) => { + src.next().expect("first token already peeked"); let tokens = MetaTemplate::parse(subtree, mode)?; Op::Subtree { tokens, delimiter: subtree.delimiter } } diff --git a/crates/mbe/src/tt_iter.rs b/crates/mbe/src/tt_iter.rs index 7aceb676c74..bee7b5de6ac 100644 --- a/crates/mbe/src/tt_iter.rs +++ b/crates/mbe/src/tt_iter.rs @@ -1,6 +1,7 @@ //! A "Parser" structure for token trees. We use this when parsing a declarative //! macro definition into a list of patterns and templates. +use smallvec::{smallvec, SmallVec}; use syntax::SyntaxKind; use tt::buffer::TokenBuffer; @@ -80,13 +81,56 @@ pub(crate) fn expect_u32_literal(&mut self) -> Result { } } - pub(crate) fn expect_punct(&mut self) -> Result<&'a tt::Punct, ()> { + pub(crate) fn expect_single_punct(&mut self) -> Result<&'a tt::Punct, ()> { match self.expect_leaf()? { tt::Leaf::Punct(it) => Ok(it), _ => Err(()), } } + /// Returns consecutive `Punct`s that can be glued together. + /// + /// This method currently may return a single quotation, which is part of lifetime ident and + /// conceptually not a punct in the context of mbe. Callers should handle this. + pub(crate) fn expect_glued_punct(&mut self) -> Result, ()> { + let tt::TokenTree::Leaf(tt::Leaf::Punct(first)) = self.next().ok_or(())?.clone() else { + return Err(()); + }; + + if first.spacing == tt::Spacing::Alone { + return Ok(smallvec![first]); + } + + let (second, third) = match (self.peek_n(0), self.peek_n(1)) { + ( + Some(tt::TokenTree::Leaf(tt::Leaf::Punct(p2))), + Some(tt::TokenTree::Leaf(tt::Leaf::Punct(p3))), + ) if p2.spacing == tt::Spacing::Joint => (p2, Some(p3)), + (Some(tt::TokenTree::Leaf(tt::Leaf::Punct(p2))), _) => (p2, None), + _ => return Ok(smallvec![first]), + }; + + match (first.char, second.char, third.map(|it| it.char)) { + ('.', '.', Some('.' | '=')) | ('<', '<', Some('=')) | ('>', '>', Some('=')) => { + let _ = self.next().unwrap(); + let _ = self.next().unwrap(); + Ok(smallvec![first, second.clone(), third.unwrap().clone()]) + } + ('-' | '!' | '*' | '/' | '&' | '%' | '^' | '+' | '<' | '=' | '>' | '|', '=', _) + | ('-' | '=' | '>', '>', _) + | ('<', '-', _) + | (':', ':', _) + | ('.', '.', _) + | ('&', '&', _) + | ('<', '<', _) + | ('|', '|', _) => { + let _ = self.next().unwrap(); + Ok(smallvec![first, second.clone()]) + } + _ => Ok(smallvec![first]), + } + } + pub(crate) fn expect_fragment( &mut self, entry_point: parser::PrefixEntryPoint, @@ -141,7 +185,7 @@ pub(crate) fn expect_fragment( ExpandResult { value: res, err } } - pub(crate) fn peek_n(&self, n: usize) -> Option<&tt::TokenTree> { + pub(crate) fn peek_n(&self, n: usize) -> Option<&'a tt::TokenTree> { self.inner.as_slice().get(n) } }