//! This file contains code for parsing SSR rules, which look something like `foo($a) ==>> bar($b)`. //! We first split everything before and after the separator `==>>`. Next, both the search pattern //! and the replacement template get tokenized by the Rust tokenizer. Tokens are then searched for //! placeholders, which start with `$`. For replacement templates, this is the final form. For //! search patterns, we go further and parse the pattern as each kind of thing that we can match. //! e.g. expressions, type references etc. use crate::errors::bail; use crate::{SsrError, SsrPattern, SsrRule}; use ra_syntax::{ast, AstNode, SmolStr, SyntaxKind, SyntaxNode, SyntaxToken, T}; use rustc_hash::{FxHashMap, FxHashSet}; use std::str::FromStr; #[derive(Debug)] pub(crate) struct ParsedRule { pub(crate) placeholders_by_stand_in: FxHashMap, pub(crate) pattern: SyntaxNode, pub(crate) template: Option, } #[derive(Clone, Debug)] pub(crate) struct SsrTemplate { pub(crate) tokens: Vec, } #[derive(Debug)] pub(crate) struct RawPattern { tokens: Vec, } // Part of a search or replace pattern. #[derive(Clone, Debug, PartialEq, Eq)] pub(crate) enum PatternElement { Token(Token), Placeholder(Placeholder), } #[derive(Clone, Debug, PartialEq, Eq)] pub(crate) struct Placeholder { /// The name of this placeholder. e.g. for "$a", this would be "a" pub(crate) ident: SmolStr, /// A unique name used in place of this placeholder when we parse the pattern as Rust code. stand_in_name: String, pub(crate) constraints: Vec, } #[derive(Clone, Debug, PartialEq, Eq)] pub(crate) enum Constraint { Kind(NodeKind), Not(Box), } #[derive(Clone, Debug, PartialEq, Eq)] pub(crate) enum NodeKind { Literal, } #[derive(Debug, Clone, PartialEq, Eq)] pub(crate) struct Token { kind: SyntaxKind, pub(crate) text: SmolStr, } impl ParsedRule { fn new( pattern: &RawPattern, template: Option<&SsrTemplate>, ) -> Result, SsrError> { let raw_pattern = pattern.as_rust_code(); let mut builder = RuleBuilder { placeholders_by_stand_in: pattern.placeholders_by_stand_in(), rules: Vec::new(), }; builder.try_add(ast::Expr::parse(&raw_pattern), template); builder.try_add(ast::TypeRef::parse(&raw_pattern), template); builder.try_add(ast::ModuleItem::parse(&raw_pattern), template); builder.try_add(ast::Path::parse(&raw_pattern), template); builder.try_add(ast::Pat::parse(&raw_pattern), template); builder.build() } } struct RuleBuilder { placeholders_by_stand_in: FxHashMap, rules: Vec, } impl RuleBuilder { fn try_add(&mut self, pattern: Result, template: Option<&SsrTemplate>) { match pattern { Ok(pattern) => self.rules.push(ParsedRule { placeholders_by_stand_in: self.placeholders_by_stand_in.clone(), pattern: pattern.syntax().clone(), template: template.cloned(), }), _ => {} } } fn build(self) -> Result, SsrError> { if self.rules.is_empty() { bail!("Pattern is not a valid Rust expression, type, item, path or pattern"); } Ok(self.rules) } } impl FromStr for SsrRule { type Err = SsrError; fn from_str(query: &str) -> Result { let mut it = query.split("==>>"); let pattern = it.next().expect("at least empty string").trim(); let template = it .next() .ok_or_else(|| SsrError("Cannot find delimiter `==>>`".into()))? .trim() .to_string(); if it.next().is_some() { return Err(SsrError("More than one delimiter found".into())); } let raw_pattern = pattern.parse()?; let raw_template = template.parse()?; let parsed_rules = ParsedRule::new(&raw_pattern, Some(&raw_template))?; let rule = SsrRule { pattern: raw_pattern, template: raw_template, parsed_rules }; validate_rule(&rule)?; Ok(rule) } } impl FromStr for RawPattern { type Err = SsrError; fn from_str(pattern_str: &str) -> Result { Ok(RawPattern { tokens: parse_pattern(pattern_str)? }) } } impl RawPattern { /// Returns this search pattern as Rust source code that we can feed to the Rust parser. fn as_rust_code(&self) -> String { let mut res = String::new(); for t in &self.tokens { res.push_str(match t { PatternElement::Token(token) => token.text.as_str(), PatternElement::Placeholder(placeholder) => placeholder.stand_in_name.as_str(), }); } res } pub(crate) fn placeholders_by_stand_in(&self) -> FxHashMap { let mut res = FxHashMap::default(); for t in &self.tokens { if let PatternElement::Placeholder(placeholder) = t { res.insert(SmolStr::new(placeholder.stand_in_name.clone()), placeholder.clone()); } } res } } impl ParsedRule { pub(crate) fn get_placeholder(&self, token: &SyntaxToken) -> Option<&Placeholder> { if token.kind() != SyntaxKind::IDENT { return None; } self.placeholders_by_stand_in.get(token.text()) } } impl FromStr for SsrPattern { type Err = SsrError; fn from_str(pattern_str: &str) -> Result { let raw_pattern = pattern_str.parse()?; let parsed_rules = ParsedRule::new(&raw_pattern, None)?; Ok(SsrPattern { raw: raw_pattern, parsed_rules }) } } impl FromStr for SsrTemplate { type Err = SsrError; fn from_str(pattern_str: &str) -> Result { let tokens = parse_pattern(pattern_str)?; // Validate that the template is a valid fragment of Rust code. We reuse the validation // logic for search patterns since the only thing that differs is the error message. if SsrPattern::from_str(pattern_str).is_err() { bail!("Replacement is not a valid Rust expression, type, item, path or pattern"); } // Our actual template needs to preserve whitespace, so we can't reuse `tokens`. Ok(SsrTemplate { tokens }) } } /// Returns `pattern_str`, parsed as a search or replace pattern. If `remove_whitespace` is true, /// then any whitespace tokens will be removed, which we do for the search pattern, but not for the /// replace pattern. fn parse_pattern(pattern_str: &str) -> Result, SsrError> { let mut res = Vec::new(); let mut placeholder_names = FxHashSet::default(); let mut tokens = tokenize(pattern_str)?.into_iter(); while let Some(token) = tokens.next() { if token.kind == T![$] { let placeholder = parse_placeholder(&mut tokens)?; if !placeholder_names.insert(placeholder.ident.clone()) { bail!("Name `{}` repeats more than once", placeholder.ident); } res.push(PatternElement::Placeholder(placeholder)); } else { res.push(PatternElement::Token(token)); } } Ok(res) } /// Checks for errors in a rule. e.g. the replace pattern referencing placeholders that the search /// pattern didn't define. fn validate_rule(rule: &SsrRule) -> Result<(), SsrError> { let mut defined_placeholders = FxHashSet::default(); for p in &rule.pattern.tokens { if let PatternElement::Placeholder(placeholder) = p { defined_placeholders.insert(&placeholder.ident); } } let mut undefined = Vec::new(); for p in &rule.template.tokens { if let PatternElement::Placeholder(placeholder) = p { if !defined_placeholders.contains(&placeholder.ident) { undefined.push(format!("${}", placeholder.ident)); } if !placeholder.constraints.is_empty() { bail!("Replacement placeholders cannot have constraints"); } } } if !undefined.is_empty() { bail!("Replacement contains undefined placeholders: {}", undefined.join(", ")); } Ok(()) } fn tokenize(source: &str) -> Result, SsrError> { let mut start = 0; let (raw_tokens, errors) = ra_syntax::tokenize(source); if let Some(first_error) = errors.first() { bail!("Failed to parse pattern: {}", first_error); } let mut tokens: Vec = Vec::new(); for raw_token in raw_tokens { let token_len = usize::from(raw_token.len); tokens.push(Token { kind: raw_token.kind, text: SmolStr::new(&source[start..start + token_len]), }); start += token_len; } Ok(tokens) } fn parse_placeholder(tokens: &mut std::vec::IntoIter) -> Result { let mut name = None; let mut constraints = Vec::new(); if let Some(token) = tokens.next() { match token.kind { SyntaxKind::IDENT => { name = Some(token.text); } T!['{'] => { let token = tokens.next().ok_or_else(|| SsrError::new("Unexpected end of placeholder"))?; if token.kind == SyntaxKind::IDENT { name = Some(token.text); } loop { let token = tokens .next() .ok_or_else(|| SsrError::new("Placeholder is missing closing brace '}'"))?; match token.kind { T![:] => { constraints.push(parse_constraint(tokens)?); } T!['}'] => break, _ => bail!("Unexpected token while parsing placeholder: '{}'", token.text), } } } _ => { bail!("Placeholders should either be $name or ${{name:constraints}}"); } } } let name = name.ok_or_else(|| SsrError::new("Placeholder ($) with no name"))?; Ok(Placeholder::new(name, constraints)) } fn parse_constraint(tokens: &mut std::vec::IntoIter) -> Result { let constraint_type = tokens .next() .ok_or_else(|| SsrError::new("Found end of placeholder while looking for a constraint"))? .text .to_string(); match constraint_type.as_str() { "kind" => { expect_token(tokens, "(")?; let t = tokens.next().ok_or_else(|| { SsrError::new("Unexpected end of constraint while looking for kind") })?; if t.kind != SyntaxKind::IDENT { bail!("Expected ident, found {:?} while parsing kind constraint", t.kind); } expect_token(tokens, ")")?; Ok(Constraint::Kind(NodeKind::from(&t.text)?)) } "not" => { expect_token(tokens, "(")?; let sub = parse_constraint(tokens)?; expect_token(tokens, ")")?; Ok(Constraint::Not(Box::new(sub))) } x => bail!("Unsupported constraint type '{}'", x), } } fn expect_token(tokens: &mut std::vec::IntoIter, expected: &str) -> Result<(), SsrError> { if let Some(t) = tokens.next() { if t.text == expected { return Ok(()); } bail!("Expected {} found {}", expected, t.text); } bail!("Expected {} found end of stream", expected); } impl NodeKind { fn from(name: &SmolStr) -> Result { Ok(match name.as_str() { "literal" => NodeKind::Literal, _ => bail!("Unknown node kind '{}'", name), }) } } impl Placeholder { fn new(name: SmolStr, constraints: Vec) -> Self { Self { stand_in_name: format!("__placeholder_{}", name), constraints, ident: name } } } #[cfg(test)] mod tests { use super::*; #[test] fn parser_happy_case() { fn token(kind: SyntaxKind, text: &str) -> PatternElement { PatternElement::Token(Token { kind, text: SmolStr::new(text) }) } fn placeholder(name: &str) -> PatternElement { PatternElement::Placeholder(Placeholder::new(SmolStr::new(name), Vec::new())) } let result: SsrRule = "foo($a, $b) ==>> bar($b, $a)".parse().unwrap(); assert_eq!( result.pattern.tokens, vec![ token(SyntaxKind::IDENT, "foo"), token(T!['('], "("), placeholder("a"), token(T![,], ","), token(SyntaxKind::WHITESPACE, " "), placeholder("b"), token(T![')'], ")"), ] ); assert_eq!( result.template.tokens, vec![ token(SyntaxKind::IDENT, "bar"), token(T!['('], "("), placeholder("b"), token(T![,], ","), token(SyntaxKind::WHITESPACE, " "), placeholder("a"), token(T![')'], ")"), ] ); } }