internal: replace L_DOLLAR/R_DOLLAR with parenthesis hack
The general problem we are dealing with here is this: ``` macro_rules! thrice { ($e:expr) => { $e * 3} } fn main() { let x = thrice!(1 + 2); } ``` we really want this to print 9 rather than 7. The way rustc solves this is rather ad-hoc. In rustc, token trees are allowed to include whole AST fragments, so 1+2 is passed through macro expansion as a single unit. This is a significant violation of token tree model. In rust-analyzer, we intended to handle this in a more elegant way, using token trees with "invisible" delimiters. The idea was is that we introduce a new kind of parenthesis, "left $"/"right $", and let the parser intelligently handle this. The idea was inspired by the relevant comment in the proc_macro crate: https://doc.rust-lang.org/stable/proc_macro/enum.Delimiter.html#variant.None > An implicit delimiter, that may, for example, appear around tokens > coming from a “macro variable” $var. It is important to preserve > operator priorities in cases like $var * 3 where $var is 1 + 2. > Implicit delimiters might not survive roundtrip of a token stream > through a string. Now that we are older and wiser, we conclude that the idea doesn't work. _First_, the comment in the proc-macro crate is wishful thinking. Rustc currently completely ignores none delimiters. It solves the (1 + 2) * 3 problem by having magical token trees which can't be duplicated: * https://rust-lang.zulipchat.com/#narrow/stream/185405-t-compiler.2Frust-analyzer/topic/TIL.20that.20token.20streams.20are.20magic * https://rust-lang.zulipchat.com/#narrow/stream/131828-t-compiler/topic/Handling.20of.20Delimiter.3A.3ANone.20by.20the.20parser _Second_, it's not like our implementation in rust-analyzer works. We special-case expressions (as opposed to treating all kinds of $var captures the same) and we don't know how parser error recovery should work with these dollar-parenthesis. So, in this PR we simplify the whole thing away by not pretending that we are doing something proper and instead just explicitly special-casing expressions by wrapping them into real `()`. In the future, to maintain bug-parity with `rustc` what we are going to do is probably adding an explicit `CAPTURED_EXPR` *token* which we can explicitly account for in the parser. If/when rustc starts handling delimiter=none properly, we'll port that logic as well, in addition to special handling.
This commit is contained in:
parent
9d33d05d85
commit
5a83d1be66
@ -317,32 +317,35 @@ macro_rules! m {
|
||||
($ i:expr) => { fn bar() { $ i * 3; } }
|
||||
}
|
||||
fn bar() {
|
||||
1+2*3;
|
||||
(1+2)*3;
|
||||
}
|
||||
// MACRO_ITEMS@0..15
|
||||
// FN@0..15
|
||||
// MACRO_ITEMS@0..17
|
||||
// FN@0..17
|
||||
// FN_KW@0..2 "fn"
|
||||
// NAME@2..5
|
||||
// IDENT@2..5 "bar"
|
||||
// PARAM_LIST@5..7
|
||||
// L_PAREN@5..6 "("
|
||||
// R_PAREN@6..7 ")"
|
||||
// BLOCK_EXPR@7..15
|
||||
// STMT_LIST@7..15
|
||||
// BLOCK_EXPR@7..17
|
||||
// STMT_LIST@7..17
|
||||
// L_CURLY@7..8 "{"
|
||||
// EXPR_STMT@8..14
|
||||
// BIN_EXPR@8..13
|
||||
// BIN_EXPR@8..11
|
||||
// LITERAL@8..9
|
||||
// INT_NUMBER@8..9 "1"
|
||||
// PLUS@9..10 "+"
|
||||
// LITERAL@10..11
|
||||
// INT_NUMBER@10..11 "2"
|
||||
// STAR@11..12 "*"
|
||||
// LITERAL@12..13
|
||||
// INT_NUMBER@12..13 "3"
|
||||
// SEMICOLON@13..14 ";"
|
||||
// R_CURLY@14..15 "}"
|
||||
// EXPR_STMT@8..16
|
||||
// BIN_EXPR@8..15
|
||||
// PAREN_EXPR@8..13
|
||||
// L_PAREN@8..9 "("
|
||||
// BIN_EXPR@9..12
|
||||
// LITERAL@9..10
|
||||
// INT_NUMBER@9..10 "1"
|
||||
// PLUS@10..11 "+"
|
||||
// LITERAL@11..12
|
||||
// INT_NUMBER@11..12 "2"
|
||||
// R_PAREN@12..13 ")"
|
||||
// STAR@13..14 "*"
|
||||
// LITERAL@14..15
|
||||
// INT_NUMBER@14..15 "3"
|
||||
// SEMICOLON@15..16 ";"
|
||||
// R_CURLY@16..17 "}"
|
||||
|
||||
"#]],
|
||||
)
|
||||
@ -722,7 +725,7 @@ macro_rules! m {
|
||||
}
|
||||
|
||||
fn bar() {
|
||||
2+2*baz(3).quux();
|
||||
(2+2*baz(3).quux());
|
||||
}
|
||||
"#]],
|
||||
)
|
||||
@ -1370,42 +1373,48 @@ macro_rules! m {
|
||||
}
|
||||
/* parse error: expected identifier */
|
||||
/* parse error: expected SEMICOLON */
|
||||
/* parse error: expected SEMICOLON */
|
||||
/* parse error: expected expression */
|
||||
fn f() {
|
||||
K::C("0");
|
||||
K::(C("0"));
|
||||
}
|
||||
// MACRO_ITEMS@0..17
|
||||
// FN@0..17
|
||||
// MACRO_ITEMS@0..19
|
||||
// FN@0..19
|
||||
// FN_KW@0..2 "fn"
|
||||
// NAME@2..3
|
||||
// IDENT@2..3 "f"
|
||||
// PARAM_LIST@3..5
|
||||
// L_PAREN@3..4 "("
|
||||
// R_PAREN@4..5 ")"
|
||||
// BLOCK_EXPR@5..17
|
||||
// STMT_LIST@5..17
|
||||
// BLOCK_EXPR@5..19
|
||||
// STMT_LIST@5..19
|
||||
// L_CURLY@5..6 "{"
|
||||
// EXPR_STMT@6..9
|
||||
// PATH_EXPR@6..9
|
||||
// PATH@6..9
|
||||
// EXPR_STMT@6..10
|
||||
// PATH_EXPR@6..10
|
||||
// PATH@6..10
|
||||
// PATH@6..7
|
||||
// PATH_SEGMENT@6..7
|
||||
// NAME_REF@6..7
|
||||
// IDENT@6..7 "K"
|
||||
// COLON2@7..9 "::"
|
||||
// EXPR_STMT@9..16
|
||||
// CALL_EXPR@9..15
|
||||
// PATH_EXPR@9..10
|
||||
// PATH@9..10
|
||||
// PATH_SEGMENT@9..10
|
||||
// NAME_REF@9..10
|
||||
// IDENT@9..10 "C"
|
||||
// ARG_LIST@10..15
|
||||
// L_PAREN@10..11 "("
|
||||
// LITERAL@11..14
|
||||
// STRING@11..14 "\"0\""
|
||||
// R_PAREN@14..15 ")"
|
||||
// SEMICOLON@15..16 ";"
|
||||
// R_CURLY@16..17 "}"
|
||||
// ERROR@9..10
|
||||
// L_PAREN@9..10 "("
|
||||
// EXPR_STMT@10..16
|
||||
// CALL_EXPR@10..16
|
||||
// PATH_EXPR@10..11
|
||||
// PATH@10..11
|
||||
// PATH_SEGMENT@10..11
|
||||
// NAME_REF@10..11
|
||||
// IDENT@10..11 "C"
|
||||
// ARG_LIST@11..16
|
||||
// L_PAREN@11..12 "("
|
||||
// LITERAL@12..15
|
||||
// STRING@12..15 "\"0\""
|
||||
// R_PAREN@15..16 ")"
|
||||
// ERROR@16..17
|
||||
// R_PAREN@16..17 ")"
|
||||
// SEMICOLON@17..18 ";"
|
||||
// R_CURLY@18..19 "}"
|
||||
|
||||
"#]],
|
||||
);
|
||||
@ -1441,7 +1450,7 @@ fn f() {
|
||||
expect![[r#"
|
||||
macro_rules! m { ($expr:expr) => { map($expr) } }
|
||||
fn f() {
|
||||
let _ = map(x+foo);
|
||||
let _ = map((x+foo));
|
||||
}
|
||||
"#]],
|
||||
)
|
||||
|
@ -825,15 +825,18 @@ pub fn new() {
|
||||
};
|
||||
}
|
||||
/* parse error: expected type */
|
||||
/* parse error: expected R_PAREN */
|
||||
/* parse error: expected R_ANGLE */
|
||||
/* parse error: expected COMMA */
|
||||
/* parse error: expected R_ANGLE */
|
||||
/* parse error: expected SEMICOLON */
|
||||
/* parse error: expected SEMICOLON */
|
||||
/* parse error: expected expression */
|
||||
pub fn new() {
|
||||
let _ = 0as u32<<8+8;
|
||||
let _ = 0as u32<<(8+8);
|
||||
}
|
||||
// MACRO_ITEMS@0..29
|
||||
// FN@0..29
|
||||
// MACRO_ITEMS@0..31
|
||||
// FN@0..31
|
||||
// VISIBILITY@0..3
|
||||
// PUB_KW@0..3 "pub"
|
||||
// FN_KW@3..5 "fn"
|
||||
@ -842,39 +845,45 @@ pub fn new() {
|
||||
// PARAM_LIST@8..10
|
||||
// L_PAREN@8..9 "("
|
||||
// R_PAREN@9..10 ")"
|
||||
// BLOCK_EXPR@10..29
|
||||
// STMT_LIST@10..29
|
||||
// BLOCK_EXPR@10..31
|
||||
// STMT_LIST@10..31
|
||||
// L_CURLY@10..11 "{"
|
||||
// LET_STMT@11..24
|
||||
// LET_STMT@11..27
|
||||
// LET_KW@11..14 "let"
|
||||
// WILDCARD_PAT@14..15
|
||||
// UNDERSCORE@14..15 "_"
|
||||
// EQ@15..16 "="
|
||||
// CAST_EXPR@16..24
|
||||
// CAST_EXPR@16..27
|
||||
// LITERAL@16..17
|
||||
// INT_NUMBER@16..17 "0"
|
||||
// AS_KW@17..19 "as"
|
||||
// PATH_TYPE@19..24
|
||||
// PATH@19..24
|
||||
// PATH_SEGMENT@19..24
|
||||
// PATH_TYPE@19..27
|
||||
// PATH@19..27
|
||||
// PATH_SEGMENT@19..27
|
||||
// NAME_REF@19..22
|
||||
// IDENT@19..22 "u32"
|
||||
// GENERIC_ARG_LIST@22..24
|
||||
// GENERIC_ARG_LIST@22..27
|
||||
// L_ANGLE@22..23 "<"
|
||||
// TYPE_ARG@23..24
|
||||
// PATH_TYPE@23..24
|
||||
// PATH@23..24
|
||||
// PATH_SEGMENT@23..24
|
||||
// L_ANGLE@23..24 "<"
|
||||
// EXPR_STMT@24..28
|
||||
// BIN_EXPR@24..27
|
||||
// LITERAL@24..25
|
||||
// INT_NUMBER@24..25 "8"
|
||||
// PLUS@25..26 "+"
|
||||
// LITERAL@26..27
|
||||
// INT_NUMBER@26..27 "8"
|
||||
// SEMICOLON@27..28 ";"
|
||||
// R_CURLY@28..29 "}"
|
||||
// TYPE_ARG@23..27
|
||||
// DYN_TRAIT_TYPE@23..27
|
||||
// TYPE_BOUND_LIST@23..27
|
||||
// TYPE_BOUND@23..26
|
||||
// PATH_TYPE@23..26
|
||||
// PATH@23..26
|
||||
// PATH_SEGMENT@23..26
|
||||
// L_ANGLE@23..24 "<"
|
||||
// PAREN_TYPE@24..26
|
||||
// L_PAREN@24..25 "("
|
||||
// ERROR@25..26
|
||||
// INT_NUMBER@25..26 "8"
|
||||
// PLUS@26..27 "+"
|
||||
// EXPR_STMT@27..28
|
||||
// LITERAL@27..28
|
||||
// INT_NUMBER@27..28 "8"
|
||||
// ERROR@28..29
|
||||
// R_PAREN@28..29 ")"
|
||||
// SEMICOLON@29..30 ";"
|
||||
// R_CURLY@30..31 "}"
|
||||
|
||||
"#]],
|
||||
);
|
||||
|
@ -110,7 +110,12 @@ enum Binding {
|
||||
enum Fragment {
|
||||
/// token fragments are just copy-pasted into the output
|
||||
Tokens(tt::TokenTree),
|
||||
/// Ast fragments are inserted with fake delimiters, so as to make things
|
||||
/// like `$i * 2` where `$i = 1 + 1` work as expectd.
|
||||
Ast(tt::TokenTree),
|
||||
/// Expr ast fragments are surrounded with `()` on insertion to preserve
|
||||
/// precedence. Note that this impl is different from the one currently in
|
||||
/// `rustc` -- `rustc` doesn't translate fragments into token trees at all.
|
||||
///
|
||||
/// At one point in time, we tried to to use "fake" delimiters here a-la
|
||||
/// proc-macro delimiter=none. As we later discovered, "none" delimiters are
|
||||
/// tricky to handle in the parser, and rustc doesn't handle those either.
|
||||
Expr(tt::TokenTree),
|
||||
}
|
||||
|
@ -736,7 +736,7 @@ fn match_meta_var(kind: &str, input: &mut TtIter) -> ExpandResult<Option<Fragmen
|
||||
}
|
||||
};
|
||||
let result = input.expect_fragment(fragment);
|
||||
result.map(|tt| if kind == "expr" { tt.map(Fragment::Ast) } else { tt.map(Fragment::Tokens) })
|
||||
result.map(|tt| if kind == "expr" { tt.map(Fragment::Expr) } else { tt.map(Fragment::Tokens) })
|
||||
}
|
||||
|
||||
fn collect_vars(buf: &mut Vec<SmolStr>, pattern: &MetaTemplate) {
|
||||
|
@ -238,7 +238,16 @@ fn expand_repeat(
|
||||
fn push_fragment(buf: &mut Vec<tt::TokenTree>, fragment: Fragment) {
|
||||
match fragment {
|
||||
Fragment::Tokens(tt::TokenTree::Subtree(tt)) => push_subtree(buf, tt),
|
||||
Fragment::Tokens(tt) | Fragment::Ast(tt) => buf.push(tt),
|
||||
Fragment::Expr(tt::TokenTree::Subtree(mut tt)) => {
|
||||
if tt.delimiter.is_none() {
|
||||
tt.delimiter = Some(tt::Delimiter {
|
||||
id: tt::TokenId::unspecified(),
|
||||
kind: tt::DelimiterKind::Parenthesis,
|
||||
})
|
||||
}
|
||||
buf.push(tt.into())
|
||||
}
|
||||
Fragment::Tokens(tt) | Fragment::Expr(tt) => buf.push(tt),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -52,17 +52,20 @@ pub(crate) fn new(buffer: &TokenBuffer) -> SubtreeTokenSource {
|
||||
cursor.bump()
|
||||
}
|
||||
Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => {
|
||||
cached.push(convert_delim(subtree.delimiter_kind(), false));
|
||||
if let Some(d) = subtree.delimiter_kind() {
|
||||
cached.push(convert_delim(d, false));
|
||||
}
|
||||
cursor.subtree().unwrap()
|
||||
}
|
||||
None => {
|
||||
if let Some(subtree) = cursor.end() {
|
||||
cached.push(convert_delim(subtree.delimiter_kind(), true));
|
||||
None => match cursor.end() {
|
||||
Some(subtree) => {
|
||||
if let Some(d) = subtree.delimiter_kind() {
|
||||
cached.push(convert_delim(d, true));
|
||||
}
|
||||
cursor.bump()
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
None => continue,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
@ -109,17 +112,16 @@ fn is_keyword(&self, kw: &str) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
fn convert_delim(d: Option<tt::DelimiterKind>, closing: bool) -> TtToken {
|
||||
fn convert_delim(d: tt::DelimiterKind, closing: bool) -> TtToken {
|
||||
let (kinds, texts) = match d {
|
||||
Some(tt::DelimiterKind::Parenthesis) => ([T!['('], T![')']], "()"),
|
||||
Some(tt::DelimiterKind::Brace) => ([T!['{'], T!['}']], "{}"),
|
||||
Some(tt::DelimiterKind::Bracket) => ([T!['['], T![']']], "[]"),
|
||||
None => ([L_DOLLAR, R_DOLLAR], ""),
|
||||
tt::DelimiterKind::Parenthesis => ([T!['('], T![')']], "()"),
|
||||
tt::DelimiterKind::Brace => ([T!['{'], T!['}']], "{}"),
|
||||
tt::DelimiterKind::Bracket => ([T!['['], T![']']], "[]"),
|
||||
};
|
||||
|
||||
let idx = closing as usize;
|
||||
let kind = kinds[idx];
|
||||
let text = if !texts.is_empty() { &texts[idx..texts.len() - (1 - idx)] } else { "" };
|
||||
let text = &texts[idx..texts.len() - (1 - idx)];
|
||||
TtToken { tt: Token { kind, is_jointed_to_next: false }, text: SmolStr::new(text) }
|
||||
}
|
||||
|
||||
|
@ -632,12 +632,11 @@ fn finish(mut self) -> (Parse<SyntaxNode>, TokenMap) {
|
||||
}
|
||||
}
|
||||
|
||||
fn delim_to_str(d: Option<tt::DelimiterKind>, closing: bool) -> &'static str {
|
||||
fn delim_to_str(d: tt::DelimiterKind, closing: bool) -> &'static str {
|
||||
let texts = match d {
|
||||
Some(tt::DelimiterKind::Parenthesis) => "()",
|
||||
Some(tt::DelimiterKind::Brace) => "{}",
|
||||
Some(tt::DelimiterKind::Bracket) => "[]",
|
||||
None => return "",
|
||||
tt::DelimiterKind::Parenthesis => "()",
|
||||
tt::DelimiterKind::Brace => "{}",
|
||||
tt::DelimiterKind::Bracket => "[]",
|
||||
};
|
||||
|
||||
let idx = closing as usize;
|
||||
@ -646,10 +645,6 @@ fn delim_to_str(d: Option<tt::DelimiterKind>, closing: bool) -> &'static str {
|
||||
|
||||
impl<'a> TreeSink for TtTreeSink<'a> {
|
||||
fn token(&mut self, kind: SyntaxKind, mut n_tokens: u8) {
|
||||
if kind == L_DOLLAR || kind == R_DOLLAR {
|
||||
self.cursor = self.cursor.bump_subtree();
|
||||
return;
|
||||
}
|
||||
if kind == LIFETIME_IDENT {
|
||||
n_tokens = 2;
|
||||
}
|
||||
@ -661,48 +656,54 @@ fn token(&mut self, kind: SyntaxKind, mut n_tokens: u8) {
|
||||
break;
|
||||
}
|
||||
last = self.cursor;
|
||||
let text: &str = match self.cursor.token_tree() {
|
||||
Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => {
|
||||
// Mark the range if needed
|
||||
let (text, id) = match leaf {
|
||||
tt::Leaf::Ident(ident) => (&ident.text, ident.id),
|
||||
tt::Leaf::Punct(punct) => {
|
||||
assert!(punct.char.is_ascii());
|
||||
let char = &(punct.char as u8);
|
||||
tmp_str = SmolStr::new_inline(
|
||||
std::str::from_utf8(std::slice::from_ref(char)).unwrap(),
|
||||
);
|
||||
(&tmp_str, punct.id)
|
||||
}
|
||||
tt::Leaf::Literal(lit) => (&lit.text, lit.id),
|
||||
};
|
||||
let range = TextRange::at(self.text_pos, TextSize::of(text.as_str()));
|
||||
self.token_map.insert(id, range);
|
||||
self.cursor = self.cursor.bump();
|
||||
text
|
||||
}
|
||||
Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => {
|
||||
self.cursor = self.cursor.subtree().unwrap();
|
||||
if let Some(id) = subtree.delimiter.map(|it| it.id) {
|
||||
self.open_delims.insert(id, self.text_pos);
|
||||
}
|
||||
delim_to_str(subtree.delimiter_kind(), false)
|
||||
}
|
||||
None => {
|
||||
if let Some(parent) = self.cursor.end() {
|
||||
self.cursor = self.cursor.bump();
|
||||
if let Some(id) = parent.delimiter.map(|it| it.id) {
|
||||
if let Some(open_delim) = self.open_delims.get(&id) {
|
||||
let open_range = TextRange::at(*open_delim, TextSize::of('('));
|
||||
let close_range = TextRange::at(self.text_pos, TextSize::of('('));
|
||||
self.token_map.insert_delim(id, open_range, close_range);
|
||||
let text: &str = loop {
|
||||
break match self.cursor.token_tree() {
|
||||
Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => {
|
||||
// Mark the range if needed
|
||||
let (text, id) = match leaf {
|
||||
tt::Leaf::Ident(ident) => (&ident.text, ident.id),
|
||||
tt::Leaf::Punct(punct) => {
|
||||
assert!(punct.char.is_ascii());
|
||||
let char = &(punct.char as u8);
|
||||
tmp_str = SmolStr::new_inline(
|
||||
std::str::from_utf8(std::slice::from_ref(char)).unwrap(),
|
||||
);
|
||||
(&tmp_str, punct.id)
|
||||
}
|
||||
}
|
||||
delim_to_str(parent.delimiter_kind(), true)
|
||||
} else {
|
||||
continue;
|
||||
tt::Leaf::Literal(lit) => (&lit.text, lit.id),
|
||||
};
|
||||
let range = TextRange::at(self.text_pos, TextSize::of(text.as_str()));
|
||||
self.token_map.insert(id, range);
|
||||
self.cursor = self.cursor.bump();
|
||||
text
|
||||
}
|
||||
}
|
||||
Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => {
|
||||
self.cursor = self.cursor.subtree().unwrap();
|
||||
match subtree.delimiter {
|
||||
Some(d) => {
|
||||
self.open_delims.insert(d.id, self.text_pos);
|
||||
delim_to_str(d.kind, false)
|
||||
}
|
||||
None => continue,
|
||||
}
|
||||
}
|
||||
None => {
|
||||
let parent = self.cursor.end().unwrap();
|
||||
self.cursor = self.cursor.bump();
|
||||
match parent.delimiter {
|
||||
Some(d) => {
|
||||
if let Some(open_delim) = self.open_delims.get(&d.id) {
|
||||
let open_range = TextRange::at(*open_delim, TextSize::of('('));
|
||||
let close_range =
|
||||
TextRange::at(self.text_pos, TextSize::of('('));
|
||||
self.token_map.insert_delim(d.id, open_range, close_range);
|
||||
}
|
||||
delim_to_str(d.kind, true)
|
||||
}
|
||||
None => continue,
|
||||
}
|
||||
}
|
||||
};
|
||||
};
|
||||
self.buf += text;
|
||||
self.text_pos += TextSize::of(text);
|
||||
|
Loading…
Reference in New Issue
Block a user