diff --git a/crates/ra_mbe/src/syntax_bridge.rs b/crates/ra_mbe/src/syntax_bridge.rs index fcb73fbc7c5..e3cde9eedcb 100644 --- a/crates/ra_mbe/src/syntax_bridge.rs +++ b/crates/ra_mbe/src/syntax_bridge.rs @@ -3,12 +3,11 @@ use ra_parser::{FragmentKind, ParseError, TreeSink}; use ra_syntax::{ ast::{self, make::tokens::doc_comment}, - tokenize, AstToken, NodeOrToken, Parse, SmolStr, SyntaxKind, + tokenize, AstToken, Parse, SmolStr, SyntaxKind, SyntaxKind::*, - SyntaxNode, SyntaxTreeBuilder, TextRange, TextUnit, Token, T, + SyntaxNode, SyntaxToken, SyntaxTreeBuilder, TextRange, TextUnit, Token as RawToken, T, }; use rustc_hash::FxHashMap; -use std::iter::successors; use tt::buffer::{Cursor, TokenBuffer}; use crate::subtree_source::SubtreeTokenSource; @@ -50,10 +49,8 @@ pub fn ast_to_token_tree(ast: &impl ast::AstNode) -> Option<(tt::Subtree, TokenM /// will consume). pub fn syntax_node_to_token_tree(node: &SyntaxNode) -> Option<(tt::Subtree, TokenMap)> { let global_offset = node.text_range().start(); - let mut c = Convertor { - id_alloc: { TokenIdAlloc { map: TokenMap::default(), global_offset, next_id: 0 } }, - }; - let subtree = c.go(node)?; + let mut c = Convertor::new(node, global_offset); + let subtree = c.go()?; Some((subtree, c.id_alloc.map)) } @@ -152,6 +149,10 @@ impl TokenMap { } } } + + fn remove_delim(&mut self, token_id: tt::TokenId) { + self.entries.retain(|(tid, _)| *tid != token_id); + } } /// Returns the textual content of a doc comment block as a quoted string @@ -237,25 +238,26 @@ impl TokenIdAlloc { token_id } - fn delim(&mut self, open_abs_range: TextRange, close_abs_range: TextRange) -> tt::TokenId { - let open_relative_range = open_abs_range - self.global_offset; - let close_relative_range = close_abs_range - self.global_offset; - let token_id = tt::TokenId(self.next_id); - self.next_id += 1; - - self.map.insert_delim(token_id, open_relative_range, close_relative_range); - token_id - } - fn open_delim(&mut self, open_abs_range: TextRange) -> tt::TokenId { let token_id = tt::TokenId(self.next_id); self.next_id += 1; - self.map.insert_delim(token_id, open_abs_range, open_abs_range); + self.map.insert_delim( + token_id, + open_abs_range - self.global_offset, + open_abs_range - self.global_offset, + ); token_id } - fn close_delim(&mut self, id: tt::TokenId, close_abs_range: TextRange) { - self.map.update_close_delim(id, close_abs_range); + fn close_delim(&mut self, id: tt::TokenId, close_abs_range: Option) { + match close_abs_range { + None => { + self.map.remove_delim(id); + } + Some(close) => { + self.map.update_close_delim(id, close - self.global_offset); + } + } } } @@ -264,10 +266,20 @@ struct RawConvertor<'a> { text: &'a str, offset: TextUnit, id_alloc: TokenIdAlloc, - inner: std::slice::Iter<'a, Token>, + inner: std::slice::Iter<'a, RawToken>, } -impl RawConvertor<'_> { +trait SrcToken { + fn kind(&self) -> SyntaxKind; + + fn to_char(&self) -> Option; + + fn to_text(&self) -> SmolStr; +} + +trait TokenConvertor { + type Token: SrcToken; + fn go(&mut self) -> Option { let mut subtree = tt::Subtree::default(); subtree.delimiter = None; @@ -285,33 +297,22 @@ impl RawConvertor<'_> { Some(subtree) } - fn bump(&mut self) -> Option<(Token, TextRange)> { - let token = self.inner.next()?; - let range = TextRange::offset_len(self.offset, token.len); - self.offset += token.len; - Some((*token, range)) - } - - fn peek(&self) -> Option { - self.inner.as_slice().get(0).cloned() - } - fn collect_leaf(&mut self, result: &mut Vec) { let (token, range) = match self.bump() { None => return, Some(it) => it, }; - let k: SyntaxKind = token.kind; + let k: SyntaxKind = token.kind(); if k == COMMENT { - let node = doc_comment(&self.text[range]); - if let Some(tokens) = convert_doc_comment(&node) { + if let Some(tokens) = self.convert_doc_comment(&token) { result.extend(tokens); } return; } result.push(if k.is_punct() { + assert_eq!(range.len().to_usize(), 1); let delim = match k { T!['('] => Some((tt::DelimiterKind::Parenthesis, T![')'])), T!['{'] => Some((tt::DelimiterKind::Brace, T!['}'])), @@ -321,40 +322,51 @@ impl RawConvertor<'_> { if let Some((kind, closed)) = delim { let mut subtree = tt::Subtree::default(); - let id = self.id_alloc.open_delim(range); + let id = self.id_alloc().open_delim(range); subtree.delimiter = Some(tt::Delimiter { kind, id }); - while self.peek().map(|it| it.kind != closed).unwrap_or(false) { + while self.peek().map(|it| it.kind() != closed).unwrap_or(false) { self.collect_leaf(&mut subtree.token_trees); } let last_range = match self.bump() { - None => return, + None => { + // For error resilience, we insert an char punct for the opening delim here + self.id_alloc().close_delim(id, None); + let leaf: tt::Leaf = tt::Punct { + id: self.id_alloc().alloc(range), + char: token.to_char().unwrap(), + spacing: tt::Spacing::Alone, + } + .into(); + result.push(leaf.into()); + result.extend(subtree.token_trees); + return; + } Some(it) => it.1, }; - self.id_alloc.close_delim(id, last_range); + self.id_alloc().close_delim(id, Some(last_range)); subtree.into() } else { let spacing = match self.peek() { Some(next) - if next.kind.is_trivia() - || next.kind == T!['['] - || next.kind == T!['{'] - || next.kind == T!['('] => + if next.kind().is_trivia() + || next.kind() == T!['['] + || next.kind() == T!['{'] + || next.kind() == T!['('] => { tt::Spacing::Alone } - Some(next) if next.kind.is_punct() => tt::Spacing::Joint, + Some(next) if next.kind().is_punct() => tt::Spacing::Joint, _ => tt::Spacing::Alone, }; - let char = - self.text[range].chars().next().expect("Token from lexer must be single char"); + let char = token.to_char().expect("Token from lexer must be single char"); - tt::Leaf::from(tt::Punct { char, spacing, id: self.id_alloc.alloc(range) }).into() + tt::Leaf::from(tt::Punct { char, spacing, id: self.id_alloc().alloc(range) }).into() } } else { macro_rules! make_leaf { ($i:ident) => { - tt::$i { id: self.id_alloc.alloc(range), text: self.text[range].into() }.into() + tt::$i { id: self.id_alloc().alloc(range), text: token.to_text() }.into() }; } let leaf: tt::Leaf = match k { @@ -368,133 +380,168 @@ impl RawConvertor<'_> { leaf.into() }); } + + fn convert_doc_comment(&self, token: &Self::Token) -> Option>; + + fn bump(&mut self) -> Option<(Self::Token, TextRange)>; + + fn peek(&self) -> Option; + + fn id_alloc(&mut self) -> &mut TokenIdAlloc; +} + +impl<'a> SrcToken for (RawToken, &'a str) { + fn kind(&self) -> SyntaxKind { + self.0.kind + } + + fn to_char(&self) -> Option { + self.1.chars().next() + } + + fn to_text(&self) -> SmolStr { + self.1.into() + } +} + +impl RawConvertor<'_> {} + +impl<'a> TokenConvertor for RawConvertor<'a> { + type Token = (RawToken, &'a str); + + fn convert_doc_comment(&self, token: &Self::Token) -> Option> { + convert_doc_comment(&doc_comment(token.1)) + } + + fn bump(&mut self) -> Option<(Self::Token, TextRange)> { + let token = self.inner.next()?; + let range = TextRange::offset_len(self.offset, token.len); + self.offset += token.len; + + Some(((*token, &self.text[range]), range)) + } + + fn peek(&self) -> Option { + let token = self.inner.as_slice().get(0).cloned(); + + token.map(|it| { + let range = TextRange::offset_len(self.offset, it.len); + (it, &self.text[range]) + }) + } + + fn id_alloc(&mut self) -> &mut TokenIdAlloc { + &mut self.id_alloc + } } -// FIXME: There are some duplicate logic between RawConvertor and Convertor -// It would be nice to refactor to converting SyntaxNode to ra_parser::Token and thus -// use RawConvertor directly. But performance-wise it may not be a good idea ? struct Convertor { id_alloc: TokenIdAlloc, + current: Option, + range: TextRange, + punct_offset: Option<(SyntaxToken, TextUnit)>, } impl Convertor { - fn go(&mut self, tt: &SyntaxNode) -> Option { - // This tree is empty - if tt.first_child_or_token().is_none() { - return Some(tt::Subtree { token_trees: vec![], delimiter: None }); + fn new(node: &SyntaxNode, global_offset: TextUnit) -> Convertor { + Convertor { + id_alloc: { TokenIdAlloc { map: TokenMap::default(), global_offset, next_id: 0 } }, + current: node.first_token(), + range: node.text_range(), + punct_offset: None, + } + } +} + +enum SynToken { + Ordiniary(SyntaxToken), + Punch(SyntaxToken, TextUnit), +} + +impl SynToken { + fn token(&self) -> &SyntaxToken { + match self { + SynToken::Ordiniary(it) => it, + SynToken::Punch(it, _) => it, + } + } +} + +impl SrcToken for SynToken { + fn kind(&self) -> SyntaxKind { + self.token().kind() + } + fn to_char(&self) -> Option { + match self { + SynToken::Ordiniary(_) => None, + SynToken::Punch(it, i) => it.text().chars().nth(i.to_usize()), + } + } + fn to_text(&self) -> SmolStr { + self.token().text().clone() + } +} + +impl TokenConvertor for Convertor { + type Token = SynToken; + fn convert_doc_comment(&self, token: &Self::Token) -> Option> { + convert_doc_comment(token.token()) + } + + fn bump(&mut self) -> Option<(Self::Token, TextRange)> { + if let Some((punct, offset)) = self.punct_offset.clone() { + if offset.to_usize() + 1 < punct.text().len() { + let offset = offset + TextUnit::from_usize(1); + let range = punct.text_range(); + self.punct_offset = Some((punct.clone(), offset)); + let range = TextRange::offset_len(range.start() + offset, TextUnit::from_usize(1)); + return Some((SynToken::Punch(punct, offset), range)); + } } - let first_child = tt.first_child_or_token()?; - let last_child = tt.last_child_or_token()?; - - // ignore trivial first_child and last_child - let first_child = successors(Some(first_child), |it| { - if it.kind().is_trivia() { - it.next_sibling_or_token() - } else { - None - } - }) - .last() - .unwrap(); - if first_child.kind().is_trivia() { - return Some(tt::Subtree { token_trees: vec![], delimiter: None }); + let curr = self.current.clone()?; + if !curr.text_range().is_subrange(&self.range) { + return None; } + self.current = curr.next_token(); - let last_child = successors(Some(last_child), |it| { - if it.kind().is_trivia() { - it.prev_sibling_or_token() - } else { - None - } - }) - .last() - .unwrap(); - - let (delimiter_kind, skip_first) = match (first_child.kind(), last_child.kind()) { - (T!['('], T![')']) => (Some(tt::DelimiterKind::Parenthesis), true), - (T!['{'], T!['}']) => (Some(tt::DelimiterKind::Brace), true), - (T!['['], T![']']) => (Some(tt::DelimiterKind::Bracket), true), - _ => (None, false), + let token = if curr.kind().is_punct() { + let range = curr.text_range(); + let range = TextRange::offset_len(range.start(), TextUnit::from_usize(1)); + self.punct_offset = Some((curr.clone(), TextUnit::from_usize(0))); + (SynToken::Punch(curr, TextUnit::from_usize(0)), range) + } else { + self.punct_offset = None; + let range = curr.text_range(); + (SynToken::Ordiniary(curr), range) }; - let delimiter = delimiter_kind.map(|kind| tt::Delimiter { - kind, - id: self.id_alloc.delim(first_child.text_range(), last_child.text_range()), - }); - let mut token_trees = Vec::new(); - let mut child_iter = tt.children_with_tokens().skip(skip_first as usize).peekable(); + Some(token) + } - while let Some(child) = child_iter.next() { - if skip_first && (child == first_child || child == last_child) { - continue; + fn peek(&self) -> Option { + if let Some((punct, mut offset)) = self.punct_offset.clone() { + offset = offset + TextUnit::from_usize(1); + if offset.to_usize() < punct.text().len() { + return Some(SynToken::Punch(punct, offset)); } - - match child { - NodeOrToken::Token(token) => { - if let Some(doc_tokens) = convert_doc_comment(&token) { - token_trees.extend(doc_tokens); - } else if token.kind().is_trivia() { - continue; - } else if token.kind().is_punct() { - // we need to pull apart joined punctuation tokens - let last_spacing = match child_iter.peek() { - Some(NodeOrToken::Token(token)) => { - if token.kind().is_punct() { - tt::Spacing::Joint - } else { - tt::Spacing::Alone - } - } - _ => tt::Spacing::Alone, - }; - let spacing_iter = std::iter::repeat(tt::Spacing::Joint) - .take(token.text().len() - 1) - .chain(std::iter::once(last_spacing)); - for (char, spacing) in token.text().chars().zip(spacing_iter) { - token_trees.push( - tt::Leaf::from(tt::Punct { - char, - spacing, - id: self.id_alloc.alloc(token.text_range()), - }) - .into(), - ); - } - } else { - macro_rules! make_leaf { - ($i:ident) => { - tt::$i { - id: self.id_alloc.alloc(token.text_range()), - text: token.text().clone(), - } - .into() - }; - } - - let child: tt::Leaf = match token.kind() { - T![true] | T![false] => make_leaf!(Literal), - IDENT | LIFETIME => make_leaf!(Ident), - k if k.is_keyword() => make_leaf!(Ident), - k if k.is_literal() => make_leaf!(Literal), - _ => return None, - }; - token_trees.push(child.into()); - } - } - NodeOrToken::Node(node) => { - let child_subtree = self.go(&node)?; - if child_subtree.delimiter.is_none() && node.kind() != SyntaxKind::TOKEN_TREE { - token_trees.extend(child_subtree.token_trees); - } else { - token_trees.push(child_subtree.into()); - } - } - }; } - let res = tt::Subtree { delimiter, token_trees }; - Some(res) + let curr = self.current.clone()?; + if !curr.text_range().is_subrange(&self.range) { + return None; + } + + let token = if curr.kind().is_punct() { + SynToken::Punch(curr, TextUnit::from_usize(0)) + } else { + SynToken::Ordiniary(curr) + }; + Some(token) + } + + fn id_alloc(&mut self) -> &mut TokenIdAlloc { + &mut self.id_alloc } } diff --git a/crates/ra_mbe/src/tests.rs b/crates/ra_mbe/src/tests.rs index 44f3819388b..a7fcea0acee 100644 --- a/crates/ra_mbe/src/tests.rs +++ b/crates/ra_mbe/src/tests.rs @@ -427,22 +427,28 @@ MACRO_ITEMS@[0; 40) ); } +fn to_subtree(tt: &tt::TokenTree) -> &tt::Subtree { + if let tt::TokenTree::Subtree(subtree) = tt { + return &subtree; + } + unreachable!("It is not a subtree"); +} +fn to_literal(tt: &tt::TokenTree) -> &tt::Literal { + if let tt::TokenTree::Leaf(tt::Leaf::Literal(lit)) = tt { + return lit; + } + unreachable!("It is not a literal"); +} + +fn to_punct(tt: &tt::TokenTree) -> &tt::Punct { + if let tt::TokenTree::Leaf(tt::Leaf::Punct(lit)) = tt { + return lit; + } + unreachable!("It is not a Punct"); +} + #[test] fn test_expand_literals_to_token_tree() { - fn to_subtree(tt: &tt::TokenTree) -> &tt::Subtree { - if let tt::TokenTree::Subtree(subtree) = tt { - return &subtree; - } - unreachable!("It is not a subtree"); - } - - fn to_literal(tt: &tt::TokenTree) -> &tt::Literal { - if let tt::TokenTree::Leaf(tt::Leaf::Literal(lit)) = tt { - return lit; - } - unreachable!("It is not a literal"); - } - let expansion = parse_macro( r#" macro_rules! literals { @@ -470,6 +476,22 @@ fn test_expand_literals_to_token_tree() { assert_eq!(to_literal(&stm_tokens[15 + 3]).text, "\"rust1\""); } +#[test] +fn test_attr_to_token_tree() { + let expansion = parse_to_token_tree_by_syntax( + r#" + #[derive(Copy)] + struct Foo; + "#, + ); + + assert_eq!(to_punct(&expansion.token_trees[0]).char, '#'); + assert_eq!( + to_subtree(&expansion.token_trees[1]).delimiter_kind(), + Some(tt::DelimiterKind::Bracket) + ); +} + #[test] fn test_two_idents() { parse_macro( @@ -1427,8 +1449,8 @@ impl MacroFixture { let macro_invocation = source_file.syntax().descendants().find_map(ast::MacroCall::cast).unwrap(); - let (invocation_tt, _) = - ast_to_token_tree(¯o_invocation.token_tree().unwrap()).unwrap(); + let (invocation_tt, _) = ast_to_token_tree(¯o_invocation.token_tree().unwrap()) + .ok_or_else(|| ExpandError::ConversionError)?; self.rules.expand(&invocation_tt).result() } @@ -1517,6 +1539,16 @@ pub(crate) fn parse_macro(ra_fixture: &str) -> MacroFixture { MacroFixture { rules } } +pub(crate) fn parse_to_token_tree_by_syntax(ra_fixture: &str) -> tt::Subtree { + let source_file = ast::SourceFile::parse(ra_fixture).ok().unwrap(); + let tt = syntax_node_to_token_tree(source_file.syntax()).unwrap().0; + + let parsed = parse_to_token_tree(ra_fixture).unwrap().0; + assert_eq!(tt, parsed); + + parsed +} + fn debug_dump_ignore_spaces(node: &ra_syntax::SyntaxNode) -> String { let mut level = 0; let mut buf = String::new(); @@ -1662,5 +1694,5 @@ fn test_expand_bad_literal() { macro_rules! foo { ($i:literal) => {}; } "#, ) - .assert_expand_err(r#"foo!(&k");"#, &ExpandError::BindingError("".to_string())); + .assert_expand_err(r#"foo!(&k");"#, &ExpandError::BindingError("".into())); }