rust/crates/ra_ssr/src/search.rs

//! Searching for matches.

use crate::{
    matching,
    resolving::{ResolvedPath, ResolvedPattern, ResolvedRule},
    Match, MatchFinder,
};
use ra_db::FileRange;
use ra_ide_db::{
    defs::Definition,
    search::{Reference, SearchScope},
};
use ra_syntax::{ast, AstNode, SyntaxKind, SyntaxNode};
use test_utils::mark;

/// A cache for the results of find_usages. This is for when we have multiple patterns that have the
/// same path. e.g. if the pattern was `foo::Bar` that can parse as a path, an expression, a type
/// and as a pattern. In each, the usages of `foo::Bar` are the same and we'd like to avoid finding
/// them more than once.
#[derive(Default)]
pub(crate) struct UsageCache {
    usages: Vec<(Definition, Vec<Reference>)>,
}

impl<'db> MatchFinder<'db> {
    /// Adds all matches for `rule` to `matches_out`. Matches may overlap in ways that make
    /// replacement impossible, so further processing is required in order to properly nest matches
    /// and remove overlapping matches. This is done in the `nesting` module.
    pub(crate) fn find_matches_for_rule(
        &self,
        rule: &ResolvedRule,
        usage_cache: &mut UsageCache,
        matches_out: &mut Vec<Match>,
    ) {
        if pick_path_for_usages(&rule.pattern).is_none() {
            self.slow_scan(rule, matches_out);
            return;
        }
        self.find_matches_for_pattern_tree(rule, &rule.pattern, usage_cache, matches_out);
    }

    fn find_matches_for_pattern_tree(
        &self,
        rule: &ResolvedRule,
        pattern: &ResolvedPattern,
        usage_cache: &mut UsageCache,
        matches_out: &mut Vec<Match>,
    ) {
        if let Some(first_path) = pick_path_for_usages(pattern) {
            let definition: Definition = first_path.resolution.clone().into();
            for reference in self.find_usages(usage_cache, definition) {
                let file = self.sema.parse(reference.file_range.file_id);
                if let Some(path) = self.sema.find_node_at_offset_with_descend::<ast::Path>(
                    file.syntax(),
                    reference.file_range.range.start(),
                ) {
                    if let Some(node_to_match) = self
                        .sema
                        .ancestors_with_macros(path.syntax().clone())
                        .skip(first_path.depth as usize)
                        .next()
                    {
                        if !is_search_permitted_ancestors(&node_to_match) {
                            mark::hit!(use_declaration_with_braces);
                            continue;
                        }
                        if let Ok(m) =
                            matching::get_match(false, rule, &node_to_match, &None, &self.sema)
                        {
                            matches_out.push(m);
                        }
                    }
                }
            }
        }
    }

    fn find_usages<'a>(
        &self,
        usage_cache: &'a mut UsageCache,
        definition: Definition,
    ) -> &'a [Reference] {
        // Logically if a lookup succeeds we should just return it. Unfortunately returning it would
        // extend the lifetime of the borrow, then we wouldn't be able to do the insertion on a
        // cache miss. This is a limitation of NLL and is fixed with Polonius. For now we do two
        // lookups in the case of a cache hit.
        if usage_cache.find(&definition).is_none() {
            let usages = definition.find_usages(&self.sema, Some(self.search_scope()));
            usage_cache.usages.push((definition, usages));
            return &usage_cache.usages.last().unwrap().1;
        }
        usage_cache.find(&definition).unwrap()
    }

    /// Returns the scope within which we want to search. We don't want un unrestricted search
    /// scope, since we don't want to find references in external dependencies.
    fn search_scope(&self) -> SearchScope {
        // FIXME: We should ideally have a test that checks that we edit local roots and not library
        // roots. This probably would require some changes to fixtures, since currently everything
        // seems to get put into a single source root.
        use ra_db::SourceDatabaseExt;
        use ra_ide_db::symbol_index::SymbolsDatabase;
        let mut files = Vec::new();
        for &root in self.sema.db.local_roots().iter() {
            let sr = self.sema.db.source_root(root);
            files.extend(sr.iter());
        }
        SearchScope::files(&files)
    }

    fn slow_scan(&self, rule: &ResolvedRule, matches_out: &mut Vec<Match>) {
        use ra_db::SourceDatabaseExt;
        use ra_ide_db::symbol_index::SymbolsDatabase;
        for &root in self.sema.db.local_roots().iter() {
            let sr = self.sema.db.source_root(root);
            for file_id in sr.iter() {
                let file = self.sema.parse(file_id);
                let code = file.syntax();
                self.slow_scan_node(code, rule, &None, matches_out);
            }
        }
    }

    fn slow_scan_node(
        &self,
        code: &SyntaxNode,
        rule: &ResolvedRule,
        restrict_range: &Option<FileRange>,
        matches_out: &mut Vec<Match>,
    ) {
        if !is_search_permitted(code) {
            return;
        }
        if let Ok(m) = matching::get_match(false, rule, &code, restrict_range, &self.sema) {
            matches_out.push(m);
        }
        // If we've got a macro call, we already tried matching it pre-expansion, which is the only
        // way to match the whole macro, now try expanding it and matching the expansion.
        if let Some(macro_call) = ast::MacroCall::cast(code.clone()) {
            if let Some(expanded) = self.sema.expand(&macro_call) {
                if let Some(tt) = macro_call.token_tree() {
                    // When matching within a macro expansion, we only want to allow matches of
                    // nodes that originated entirely from within the token tree of the macro call.
                    // i.e. we don't want to match something that came from the macro itself.
                    self.slow_scan_node(
                        &expanded,
                        rule,
                        &Some(self.sema.original_range(tt.syntax())),
                        matches_out,
                    );
                }
            }
        }
        for child in code.children() {
            self.slow_scan_node(&child, rule, restrict_range, matches_out);
        }
    }
}

/// Returns whether we support matching within `node` and all of its ancestors.
fn is_search_permitted_ancestors(node: &SyntaxNode) -> bool {
    if let Some(parent) = node.parent() {
        if !is_search_permitted_ancestors(&parent) {
            return false;
        }
    }
    is_search_permitted(node)
}

/// Returns whether we support matching within this kind of node.
fn is_search_permitted(node: &SyntaxNode) -> bool {
    // FIXME: Properly handle use declarations. At the moment, if our search pattern is `foo::bar`
    // and the code is `use foo::{baz, bar}`, we'll match `bar`, since it resolves to `foo::bar`.
    // However we'll then replace just the part we matched `bar`. We probably need to instead remove
    // `bar` and insert a new use declaration.
    node.kind() != SyntaxKind::USE_ITEM
}

impl UsageCache {
    fn find(&mut self, definition: &Definition) -> Option<&[Reference]> {
        // We expect a very small number of cache entries (generally 1), so a linear scan should be
        // fast enough and avoids the need to implement Hash for Definition.
        for (d, refs) in &self.usages {
            if d == definition {
                return Some(refs);
            }
        }
        None
    }
}

/// Returns a path that's suitable for path resolution. We exclude builtin types, since they aren't
/// something that we can find references to. We then somewhat arbitrarily pick the path that is the
/// longest as this is hopefully more likely to be less common, making it faster to find.
fn pick_path_for_usages(pattern: &ResolvedPattern) -> Option<&ResolvedPath> {
    // FIXME: Take the scope of the resolved path into account. e.g. if there are any paths that are
    // private to the current module, then we definitely would want to pick them over say a path
    // from std. Possibly we should go further than this and intersect the search scopes for all
    // resolved paths then search only in that scope.
    pattern
        .resolved_paths
        .iter()
        .filter(|(_, p)| {
            !matches!(p.resolution, hir::PathResolution::Def(hir::ModuleDef::BuiltinType(_)))
        })
        .map(|(node, resolved)| (node.text().len(), resolved))
        .max_by(|(a, _), (b, _)| a.cmp(b))
        .map(|(_, resolved)| resolved)
}
SSR: Move search code into a submodule Also renamed find_matches to slow_scan_node to reflect that it's a slow way to do things. Actually the name came from a later commit and probably makes more sense once there's an alternative. 2020-07-22 01:31:32 -05:00			`//! Searching for matches.`

SSR: Use Definition::find_usages to speed up matching. When the search pattern contains a path, this substantially speeds up finding matches, especially if the path references a private item. 2020-07-21 23:01:21 -05:00			`use crate::{`
			`matching,`
			`resolving::{ResolvedPath, ResolvedPattern, ResolvedRule},`
			`Match, MatchFinder,`
			`};`
SSR: Move search code into a submodule Also renamed find_matches to slow_scan_node to reflect that it's a slow way to do things. Actually the name came from a later commit and probably makes more sense once there's an alternative. 2020-07-22 01:31:32 -05:00			`use ra_db::FileRange;`
SSR: Use Definition::find_usages to speed up matching. When the search pattern contains a path, this substantially speeds up finding matches, especially if the path references a private item. 2020-07-21 23:01:21 -05:00			`use ra_ide_db::{`
			`defs::Definition,`
			`search::{Reference, SearchScope},`
			`};`
SSR: Disable matching within use declarations It currently does the wrong thing when the use declaration contains braces. 2020-07-23 06:28:31 -05:00			`use ra_syntax::{ast, AstNode, SyntaxKind, SyntaxNode};`
			`use test_utils::mark;`
SSR: Move search code into a submodule Also renamed find_matches to slow_scan_node to reflect that it's a slow way to do things. Actually the name came from a later commit and probably makes more sense once there's an alternative. 2020-07-22 01:31:32 -05:00
SSR: Use Definition::find_usages to speed up matching. When the search pattern contains a path, this substantially speeds up finding matches, especially if the path references a private item. 2020-07-21 23:01:21 -05:00			`/// A cache for the results of find_usages. This is for when we have multiple patterns that have the`
			/// same path. e.g. if the pattern was `foo::Bar` that can parse as a path, an expression, a type
			/// and as a pattern. In each, the usages of `foo::Bar` are the same and we'd like to avoid finding
			`/// them more than once.`
			`#[derive(Default)]`
			`pub(crate) struct UsageCache {`
			`usages: Vec<(Definition, Vec<Reference>)>,`
			`}`

SSR: Move search code into a submodule Also renamed find_matches to slow_scan_node to reflect that it's a slow way to do things. Actually the name came from a later commit and probably makes more sense once there's an alternative. 2020-07-22 01:31:32 -05:00			`impl<'db> MatchFinder<'db> {`
SSR: Refactor to not rely on recursive search for nesting of matches Previously, submatches were handled simply by searching in placeholders for more matches. That only works if we search all nodes in the tree recursively. In a subsequent commit, I intend to make search not always be recursive recursive. This commit prepares for that by finding all matches, even if they overlap, then nesting them and removing overlapping matches. 2020-07-22 01:48:12 -05:00			/// Adds all matches for `rule` to `matches_out`. Matches may overlap in ways that make
			`/// replacement impossible, so further processing is required in order to properly nest matches`
			/// and remove overlapping matches. This is done in the `nesting` module.
SSR: Use Definition::find_usages to speed up matching. When the search pattern contains a path, this substantially speeds up finding matches, especially if the path references a private item. 2020-07-21 23:01:21 -05:00			`pub(crate) fn find_matches_for_rule(`
			`&self,`
			`rule: &ResolvedRule,`
			`usage_cache: &mut UsageCache,`
			`matches_out: &mut Vec<Match>,`
			`) {`
			`if pick_path_for_usages(&rule.pattern).is_none() {`
			`self.slow_scan(rule, matches_out);`
			`return;`
			`}`
			`self.find_matches_for_pattern_tree(rule, &rule.pattern, usage_cache, matches_out);`
			`}`

			`fn find_matches_for_pattern_tree(`
			`&self,`
			`rule: &ResolvedRule,`
			`pattern: &ResolvedPattern,`
			`usage_cache: &mut UsageCache,`
			`matches_out: &mut Vec<Match>,`
			`) {`
			`if let Some(first_path) = pick_path_for_usages(pattern) {`
			`let definition: Definition = first_path.resolution.clone().into();`
			`for reference in self.find_usages(usage_cache, definition) {`
			`let file = self.sema.parse(reference.file_range.file_id);`
			`if let Some(path) = self.sema.find_node_at_offset_with_descend::<ast::Path>(`
			`file.syntax(),`
			`reference.file_range.range.start(),`
			`) {`
			`if let Some(node_to_match) = self`
			`.sema`
			`.ancestors_with_macros(path.syntax().clone())`
			`.skip(first_path.depth as usize)`
			`.next()`
			`{`
SSR: Disable matching within use declarations It currently does the wrong thing when the use declaration contains braces. 2020-07-23 06:28:31 -05:00			`if !is_search_permitted_ancestors(&node_to_match) {`
			`mark::hit!(use_declaration_with_braces);`
			`continue;`
			`}`
SSR: Use Definition::find_usages to speed up matching. When the search pattern contains a path, this substantially speeds up finding matches, especially if the path references a private item. 2020-07-21 23:01:21 -05:00			`if let Ok(m) =`
			`matching::get_match(false, rule, &node_to_match, &None, &self.sema)`
			`{`
			`matches_out.push(m);`
			`}`
			`}`
			`}`
			`}`
			`}`
			`}`

			`fn find_usages<'a>(`
			`&self,`
			`usage_cache: &'a mut UsageCache,`
			`definition: Definition,`
			`) -> &'a [Reference] {`
			`// Logically if a lookup succeeds we should just return it. Unfortunately returning it would`
			`// extend the lifetime of the borrow, then we wouldn't be able to do the insertion on a`
			`// cache miss. This is a limitation of NLL and is fixed with Polonius. For now we do two`
			`// lookups in the case of a cache hit.`
			`if usage_cache.find(&definition).is_none() {`
			`let usages = definition.find_usages(&self.sema, Some(self.search_scope()));`
			`usage_cache.usages.push((definition, usages));`
			`return &usage_cache.usages.last().unwrap().1;`
			`}`
			`usage_cache.find(&definition).unwrap()`
			`}`

			`/// Returns the scope within which we want to search. We don't want un unrestricted search`
			`/// scope, since we don't want to find references in external dependencies.`
			`fn search_scope(&self) -> SearchScope {`
			`// FIXME: We should ideally have a test that checks that we edit local roots and not library`
			`// roots. This probably would require some changes to fixtures, since currently everything`
			`// seems to get put into a single source root.`
			`use ra_db::SourceDatabaseExt;`
			`use ra_ide_db::symbol_index::SymbolsDatabase;`
			`let mut files = Vec::new();`
			`for &root in self.sema.db.local_roots().iter() {`
			`let sr = self.sema.db.source_root(root);`
			`files.extend(sr.iter());`
			`}`
			`SearchScope::files(&files)`
Move iteration over all files into the SSR crate The methods `edits_for_file` and `find_matches_in_file` are replaced with just `edits` and `matches`. This simplifies the API a bit, but more importantly it makes it possible in a subsequent commit for SSR to decide to not search all files. 2020-07-22 01:23:43 -05:00			`}`

SSR: Match paths based on what they resolve to Also render template paths appropriately for their context. 2020-07-22 01:46:29 -05:00			`fn slow_scan(&self, rule: &ResolvedRule, matches_out: &mut Vec<Match>) {`
Move iteration over all files into the SSR crate The methods `edits_for_file` and `find_matches_in_file` are replaced with just `edits` and `matches`. This simplifies the API a bit, but more importantly it makes it possible in a subsequent commit for SSR to decide to not search all files. 2020-07-22 01:23:43 -05:00			`use ra_db::SourceDatabaseExt;`
			`use ra_ide_db::symbol_index::SymbolsDatabase;`
			`for &root in self.sema.db.local_roots().iter() {`
			`let sr = self.sema.db.source_root(root);`
			`for file_id in sr.iter() {`
			`let file = self.sema.parse(file_id);`
			`let code = file.syntax();`
SSR: Refactor to not rely on recursive search for nesting of matches Previously, submatches were handled simply by searching in placeholders for more matches. That only works if we search all nodes in the tree recursively. In a subsequent commit, I intend to make search not always be recursive recursive. This commit prepares for that by finding all matches, even if they overlap, then nesting them and removing overlapping matches. 2020-07-22 01:48:12 -05:00			`self.slow_scan_node(code, rule, &None, matches_out);`
Move iteration over all files into the SSR crate The methods `edits_for_file` and `find_matches_in_file` are replaced with just `edits` and `matches`. This simplifies the API a bit, but more importantly it makes it possible in a subsequent commit for SSR to decide to not search all files. 2020-07-22 01:23:43 -05:00			`}`
			`}`
			`}`

			`fn slow_scan_node(`
SSR: Move search code into a submodule Also renamed find_matches to slow_scan_node to reflect that it's a slow way to do things. Actually the name came from a later commit and probably makes more sense once there's an alternative. 2020-07-22 01:31:32 -05:00			`&self,`
			`code: &SyntaxNode,`
SSR: Match paths based on what they resolve to Also render template paths appropriately for their context. 2020-07-22 01:46:29 -05:00			`rule: &ResolvedRule,`
SSR: Move search code into a submodule Also renamed find_matches to slow_scan_node to reflect that it's a slow way to do things. Actually the name came from a later commit and probably makes more sense once there's an alternative. 2020-07-22 01:31:32 -05:00			`restrict_range: &Option<FileRange>,`
			`matches_out: &mut Vec<Match>,`
			`) {`
SSR: Disable matching within use declarations It currently does the wrong thing when the use declaration contains braces. 2020-07-23 06:28:31 -05:00			`if !is_search_permitted(code) {`
			`return;`
			`}`
SSR: Refactor to not rely on recursive search for nesting of matches Previously, submatches were handled simply by searching in placeholders for more matches. That only works if we search all nodes in the tree recursively. In a subsequent commit, I intend to make search not always be recursive recursive. This commit prepares for that by finding all matches, even if they overlap, then nesting them and removing overlapping matches. 2020-07-22 01:48:12 -05:00			`if let Ok(m) = matching::get_match(false, rule, &code, restrict_range, &self.sema) {`
			`matches_out.push(m);`
SSR: Move search code into a submodule Also renamed find_matches to slow_scan_node to reflect that it's a slow way to do things. Actually the name came from a later commit and probably makes more sense once there's an alternative. 2020-07-22 01:31:32 -05:00			`}`
			`// If we've got a macro call, we already tried matching it pre-expansion, which is the only`
			`// way to match the whole macro, now try expanding it and matching the expansion.`
			`if let Some(macro_call) = ast::MacroCall::cast(code.clone()) {`
			`if let Some(expanded) = self.sema.expand(&macro_call) {`
			`if let Some(tt) = macro_call.token_tree() {`
			`// When matching within a macro expansion, we only want to allow matches of`
			`// nodes that originated entirely from within the token tree of the macro call.`
			`// i.e. we don't want to match something that came from the macro itself.`
			`self.slow_scan_node(`
			`&expanded,`
SSR: Refactor to not rely on recursive search for nesting of matches Previously, submatches were handled simply by searching in placeholders for more matches. That only works if we search all nodes in the tree recursively. In a subsequent commit, I intend to make search not always be recursive recursive. This commit prepares for that by finding all matches, even if they overlap, then nesting them and removing overlapping matches. 2020-07-22 01:48:12 -05:00			`rule,`
SSR: Move search code into a submodule Also renamed find_matches to slow_scan_node to reflect that it's a slow way to do things. Actually the name came from a later commit and probably makes more sense once there's an alternative. 2020-07-22 01:31:32 -05:00			`&Some(self.sema.original_range(tt.syntax())),`
			`matches_out,`
			`);`
			`}`
			`}`
			`}`
			`for child in code.children() {`
SSR: Refactor to not rely on recursive search for nesting of matches Previously, submatches were handled simply by searching in placeholders for more matches. That only works if we search all nodes in the tree recursively. In a subsequent commit, I intend to make search not always be recursive recursive. This commit prepares for that by finding all matches, even if they overlap, then nesting them and removing overlapping matches. 2020-07-22 01:48:12 -05:00			`self.slow_scan_node(&child, rule, restrict_range, matches_out);`
SSR: Move search code into a submodule Also renamed find_matches to slow_scan_node to reflect that it's a slow way to do things. Actually the name came from a later commit and probably makes more sense once there's an alternative. 2020-07-22 01:31:32 -05:00			`}`
			`}`
			`}`
SSR: Use Definition::find_usages to speed up matching. When the search pattern contains a path, this substantially speeds up finding matches, especially if the path references a private item. 2020-07-21 23:01:21 -05:00
SSR: Disable matching within use declarations It currently does the wrong thing when the use declaration contains braces. 2020-07-23 06:28:31 -05:00			/// Returns whether we support matching within `node` and all of its ancestors.
			`fn is_search_permitted_ancestors(node: &SyntaxNode) -> bool {`
			`if let Some(parent) = node.parent() {`
			`if !is_search_permitted_ancestors(&parent) {`
			`return false;`
			`}`
			`}`
			`is_search_permitted(node)`
			`}`

			`/// Returns whether we support matching within this kind of node.`
			`fn is_search_permitted(node: &SyntaxNode) -> bool {`
			// FIXME: Properly handle use declarations. At the moment, if our search pattern is `foo::bar`
			// and the code is `use foo::{baz, bar}`, we'll match `bar`, since it resolves to `foo::bar`.
			// However we'll then replace just the part we matched `bar`. We probably need to instead remove
			// `bar` and insert a new use declaration.
			`node.kind() != SyntaxKind::USE_ITEM`
			`}`

SSR: Use Definition::find_usages to speed up matching. When the search pattern contains a path, this substantially speeds up finding matches, especially if the path references a private item. 2020-07-21 23:01:21 -05:00			`impl UsageCache {`
			`fn find(&mut self, definition: &Definition) -> Option<&[Reference]> {`
			`// We expect a very small number of cache entries (generally 1), so a linear scan should be`
			`// fast enough and avoids the need to implement Hash for Definition.`
			`for (d, refs) in &self.usages {`
			`if d == definition {`
			`return Some(refs);`
			`}`
			`}`
			`None`
			`}`
			`}`

			`/// Returns a path that's suitable for path resolution. We exclude builtin types, since they aren't`
			`/// something that we can find references to. We then somewhat arbitrarily pick the path that is the`
			`/// longest as this is hopefully more likely to be less common, making it faster to find.`
			`fn pick_path_for_usages(pattern: &ResolvedPattern) -> Option<&ResolvedPath> {`
			`// FIXME: Take the scope of the resolved path into account. e.g. if there are any paths that are`
			`// private to the current module, then we definitely would want to pick them over say a path`
			`// from std. Possibly we should go further than this and intersect the search scopes for all`
			`// resolved paths then search only in that scope.`
			`pattern`
			`.resolved_paths`
			`.iter()`
			`.filter(\|(_, p)\| {`
			`!matches!(p.resolution, hir::PathResolution::Def(hir::ModuleDef::BuiltinType(_)))`
			`})`
			`.map(\|(node, resolved)\| (node.text().len(), resolved))`
			`.max_by(\|(a, _), (b, _)\| a.cmp(b))`
			`.map(\|(_, resolved)\| resolved)`
			`}`