rust/crates/hir-expand/src/ast_id_map.rs

//! `AstIdMap` allows to create stable IDs for "large" syntax nodes like items
//! and macro calls.
//!
//! Specifically, it enumerates all items in a file and uses position of a an
//! item as an ID. That way, id's don't change unless the set of items itself
//! changes.

use std::{
    any::type_name,
    fmt,
    hash::{BuildHasher, BuildHasherDefault, Hash, Hasher},
    marker::PhantomData,
};

use la_arena::{Arena, Idx};
use profile::Count;
use rustc_hash::FxHasher;
use syntax::{ast, match_ast, AstNode, AstPtr, SyntaxNode, SyntaxNodePtr};

/// `AstId` points to an AST node in a specific file.
pub struct FileAstId<N: AstNode> {
    raw: ErasedFileAstId,
    _ty: PhantomData<fn() -> N>,
}

impl<N: AstNode> Clone for FileAstId<N> {
    fn clone(&self) -> FileAstId<N> {
        *self
    }
}
impl<N: AstNode> Copy for FileAstId<N> {}

impl<N: AstNode> PartialEq for FileAstId<N> {
    fn eq(&self, other: &Self) -> bool {
        self.raw == other.raw
    }
}
impl<N: AstNode> Eq for FileAstId<N> {}
impl<N: AstNode> Hash for FileAstId<N> {
    fn hash<H: Hasher>(&self, hasher: &mut H) {
        self.raw.hash(hasher);
    }
}

impl<N: AstNode> fmt::Debug for FileAstId<N> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "FileAstId::<{}>({})", type_name::<N>(), self.raw.into_raw())
    }
}

impl<N: AstNode> FileAstId<N> {
    // Can't make this a From implementation because of coherence
    pub fn upcast<M: AstNode>(self) -> FileAstId<M>
    where
        N: Into<M>,
    {
        FileAstId { raw: self.raw, _ty: PhantomData }
    }
}

type ErasedFileAstId = Idx<SyntaxNodePtr>;

/// Maps items' `SyntaxNode`s to `ErasedFileAstId`s and back.
#[derive(Default)]
pub struct AstIdMap {
    /// Maps stable id to unstable ptr.
    arena: Arena<SyntaxNodePtr>,
    /// Reverse: map ptr to id.
    map: hashbrown::HashMap<Idx<SyntaxNodePtr>, (), ()>,
    _c: Count<Self>,
}

impl fmt::Debug for AstIdMap {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("AstIdMap").field("arena", &self.arena).finish()
    }
}

impl PartialEq for AstIdMap {
    fn eq(&self, other: &Self) -> bool {
        self.arena == other.arena
    }
}
impl Eq for AstIdMap {}

impl AstIdMap {
    pub(crate) fn from_source(node: &SyntaxNode) -> AstIdMap {
        assert!(node.parent().is_none());
        let mut res = AstIdMap::default();
        // By walking the tree in breadth-first order we make sure that parents
        // get lower ids then children. That is, adding a new child does not
        // change parent's id. This means that, say, adding a new function to a
        // trait does not change ids of top-level items, which helps caching.
        bdfs(node, |it| {
            match_ast! {
                match it {
                    ast::Item(module_item) => {
                        res.alloc(module_item.syntax());
                        true
                    },
                    ast::BlockExpr(block) => {
                        res.alloc(block.syntax());
                        true
                    },
                    _ => false,
                }
            }
        });
        res.map = hashbrown::HashMap::with_capacity_and_hasher(res.arena.len(), ());
        for (idx, ptr) in res.arena.iter() {
            let hash = hash_ptr(ptr);
            match res.map.raw_entry_mut().from_hash(hash, |idx2| *idx2 == idx) {
                hashbrown::hash_map::RawEntryMut::Occupied(_) => unreachable!(),
                hashbrown::hash_map::RawEntryMut::Vacant(entry) => {
                    entry.insert_with_hasher(hash, idx, (), |&idx| hash_ptr(&res.arena[idx]));
                }
            }
        }
        res
    }

    pub fn ast_id<N: AstNode>(&self, item: &N) -> FileAstId<N> {
        let raw = self.erased_ast_id(item.syntax());
        FileAstId { raw, _ty: PhantomData }
    }
    fn erased_ast_id(&self, item: &SyntaxNode) -> ErasedFileAstId {
        let ptr = SyntaxNodePtr::new(item);
        let hash = hash_ptr(&ptr);
        match self.map.raw_entry().from_hash(hash, |&idx| self.arena[idx] == ptr) {
            Some((&idx, &())) => idx,
            None => panic!(
                "Can't find {:?} in AstIdMap:\n{:?}",
                item,
                self.arena.iter().map(|(_id, i)| i).collect::<Vec<_>>(),
            ),
        }
    }

    pub fn get<N: AstNode>(&self, id: FileAstId<N>) -> AstPtr<N> {
        AstPtr::try_from_raw(self.arena[id.raw].clone()).unwrap()
    }

    fn alloc(&mut self, item: &SyntaxNode) -> ErasedFileAstId {
        self.arena.alloc(SyntaxNodePtr::new(item))
    }
}

fn hash_ptr(ptr: &SyntaxNodePtr) -> u64 {
    let mut hasher = BuildHasherDefault::<FxHasher>::default().build_hasher();
    ptr.hash(&mut hasher);
    hasher.finish()
}

/// Walks the subtree in bdfs order, calling `f` for each node. What is bdfs
/// order? It is a mix of breadth-first and depth first orders. Nodes for which
/// `f` returns true are visited breadth-first, all the other nodes are explored
/// depth-first.
///
/// In other words, the size of the bfs queue is bound by the number of "true"
/// nodes.
fn bdfs(node: &SyntaxNode, mut f: impl FnMut(SyntaxNode) -> bool) {
    let mut curr_layer = vec![node.clone()];
    let mut next_layer = vec![];
    while !curr_layer.is_empty() {
        curr_layer.drain(..).for_each(|node| {
            let mut preorder = node.preorder();
            while let Some(event) = preorder.next() {
                match event {
                    syntax::WalkEvent::Enter(node) => {
                        if f(node.clone()) {
                            next_layer.extend(node.children());
                            preorder.skip_subtree();
                        }
                    }
                    syntax::WalkEvent::Leave(_) => {}
                }
            }
        });
        std::mem::swap(&mut curr_layer, &mut next_layer);
    }
}
start ra_hir_def crate 2019-10-29 03:15:51 -05:00			//! `AstIdMap` allows to create stable IDs for "large" syntax nodes like items
			`//! and macro calls.`
			`//!`
			`//! Specifically, it enumerates all items in a file and uses position of a an`
			`//! item as an ID. That way, id's don't change unless the set of items itself`
			`//! changes.`

			`use std::{`
draw the rest of the owl 2020-06-22 08:07:06 -05:00			`any::type_name,`
			`fmt,`
more frugal map 2021-12-05 08:19:48 -06:00			`hash::{BuildHasher, BuildHasherDefault, Hash, Hasher},`
start ra_hir_def crate 2019-10-29 03:15:51 -05:00			`marker::PhantomData,`
			`};`

prepare to publish el libro de arena 2021-01-14 09:47:42 -06:00			`use la_arena::{Arena, Idx};`
add more counts 2021-01-27 03:16:24 -06:00			`use profile::Count;`
more frugal map 2021-12-05 08:19:48 -06:00			`use rustc_hash::FxHasher;`
Record `FileAstId`s for block expressiosn Every block expression may contain inner items, so we need to be able to refer to any block expression and use it as a salsa key. 2021-01-19 12:49:19 -06:00			`use syntax::{ast, match_ast, AstNode, AstPtr, SyntaxNode, SyntaxNodePtr};`
start ra_hir_def crate 2019-10-29 03:15:51 -05:00
			/// `AstId` points to an AST node in a specific file.
			`pub struct FileAstId<N: AstNode> {`
			`raw: ErasedFileAstId,`
			`_ty: PhantomData<fn() -> N>,`
			`}`

			`impl<N: AstNode> Clone for FileAstId<N> {`
			`fn clone(&self) -> FileAstId<N> {`
			`*self`
			`}`
			`}`
			`impl<N: AstNode> Copy for FileAstId<N> {}`

			`impl<N: AstNode> PartialEq for FileAstId<N> {`
			`fn eq(&self, other: &Self) -> bool {`
			`self.raw == other.raw`
			`}`
			`}`
			`impl<N: AstNode> Eq for FileAstId<N> {}`
			`impl<N: AstNode> Hash for FileAstId<N> {`
			`fn hash<H: Hasher>(&self, hasher: &mut H) {`
			`self.raw.hash(hasher);`
			`}`
			`}`

draw the rest of the owl 2020-06-22 08:07:06 -05:00			`impl<N: AstNode> fmt::Debug for FileAstId<N> {`
			`fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {`
			`write!(f, "FileAstId::<{}>({})", type_name::<N>(), self.raw.into_raw())`
			`}`
			`}`

Add expansion infrastructure for derive macros 2019-12-05 08:10:33 -06:00			`impl<N: AstNode> FileAstId<N> {`
			`// Can't make this a From implementation because of coherence`
			`pub fn upcast<M: AstNode>(self) -> FileAstId<M>`
			`where`
draw the rest of the owl 2020-06-22 08:07:06 -05:00			`N: Into<M>,`
Add expansion infrastructure for derive macros 2019-12-05 08:10:33 -06:00			`{`
			`FileAstId { raw: self.raw, _ty: PhantomData }`
			`}`
			`}`

Simplify Arena to use a generic index 2020-03-19 10:00:11 -05:00			`type ErasedFileAstId = Idx<SyntaxNodePtr>;`
start ra_hir_def crate 2019-10-29 03:15:51 -05:00
			/// Maps items' `SyntaxNode`s to `ErasedFileAstId`s and back.
try to optimize things unsuccessfully Baseline ``` Database loaded: 598.40ms, 304minstr, 118mb (metadata 390.57ms, 21minstr, 841kb; build 111.31ms, 8764kinstr, -214kb) crates: 39, mods: 824, decls: 18647, fns: 13910 Item Collection: 9.70s, 75ginstr, 377mb exprs: 382426, ??ty: 387 (0%), ?ty: 285 (0%), !ty: 145 Inference: 43.16s, 342ginstr, 641mb Total: 52.86s, 417ginstr, 1018mb ``` Eager ``` Database loaded: 625.86ms, 304minstr, 118mb (metadata 414.52ms, 21minstr, 841kb; build 113.81ms, 8764kinstr, -230kb) crates: 39, mods: 824, decls: 18647, fns: 13910 Item Collection: 10.09s, 75ginstr, 389mb exprs: 382426, ??ty: 387 (0%), ?ty: 285 (0%), !ty: 145 Inference: 43.27s, 341ginstr, 644mb Total: 53.37s, 417ginstr, 1034mb ``` Lazy ``` Database loaded: 626.34ms, 304minstr, 118mb (metadata 416.26ms, 21minstr, 841kb; build 113.67ms, 8750kinstr, -209kb) crates: 39, mods: 824, decls: 18647, fns: 13910 Item Collection: 10.16s, 75ginstr, 389mb exprs: 382426, ??ty: 387 (0%), ?ty: 285 (0%), !ty: 145 Inference: 44.51s, 342ginstr, 644mb Total: 54.67s, 417ginstr, 1034mb ``` 2021-11-27 12:13:07 -06:00			`#[derive(Default)]`
start ra_hir_def crate 2019-10-29 03:15:51 -05:00			`pub struct AstIdMap {`
more frugal map 2021-12-05 08:19:48 -06:00			`/// Maps stable id to unstable ptr.`
Simplify Arena to use a generic index 2020-03-19 10:00:11 -05:00			`arena: Arena<SyntaxNodePtr>,`
more frugal map 2021-12-05 08:19:48 -06:00			`/// Reverse: map ptr to id.`
			`map: hashbrown::HashMap<Idx<SyntaxNodePtr>, (), ()>,`
add more counts 2021-01-27 03:16:24 -06:00			`_c: Count<Self>,`
start ra_hir_def crate 2019-10-29 03:15:51 -05:00			`}`

try to optimize things unsuccessfully Baseline ``` Database loaded: 598.40ms, 304minstr, 118mb (metadata 390.57ms, 21minstr, 841kb; build 111.31ms, 8764kinstr, -214kb) crates: 39, mods: 824, decls: 18647, fns: 13910 Item Collection: 9.70s, 75ginstr, 377mb exprs: 382426, ??ty: 387 (0%), ?ty: 285 (0%), !ty: 145 Inference: 43.16s, 342ginstr, 641mb Total: 52.86s, 417ginstr, 1018mb ``` Eager ``` Database loaded: 625.86ms, 304minstr, 118mb (metadata 414.52ms, 21minstr, 841kb; build 113.81ms, 8764kinstr, -230kb) crates: 39, mods: 824, decls: 18647, fns: 13910 Item Collection: 10.09s, 75ginstr, 389mb exprs: 382426, ??ty: 387 (0%), ?ty: 285 (0%), !ty: 145 Inference: 43.27s, 341ginstr, 644mb Total: 53.37s, 417ginstr, 1034mb ``` Lazy ``` Database loaded: 626.34ms, 304minstr, 118mb (metadata 416.26ms, 21minstr, 841kb; build 113.67ms, 8750kinstr, -209kb) crates: 39, mods: 824, decls: 18647, fns: 13910 Item Collection: 10.16s, 75ginstr, 389mb exprs: 382426, ??ty: 387 (0%), ?ty: 285 (0%), !ty: 145 Inference: 44.51s, 342ginstr, 644mb Total: 54.67s, 417ginstr, 1034mb ``` 2021-11-27 12:13:07 -06:00			`impl fmt::Debug for AstIdMap {`
			`fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {`
			`f.debug_struct("AstIdMap").field("arena", &self.arena).finish()`
			`}`
			`}`

			`impl PartialEq for AstIdMap {`
			`fn eq(&self, other: &Self) -> bool {`
			`self.arena == other.arena`
			`}`
			`}`
			`impl Eq for AstIdMap {}`

start ra_hir_def crate 2019-10-29 03:15:51 -05:00			`impl AstIdMap {`
reduce visibility 2019-10-29 08:08:06 -05:00			`pub(crate) fn from_source(node: &SyntaxNode) -> AstIdMap {`
start ra_hir_def crate 2019-10-29 03:15:51 -05:00			`assert!(node.parent().is_none());`
add more counts 2021-01-27 03:16:24 -06:00			`let mut res = AstIdMap::default();`
Add expansion infrastructure for derive macros 2019-12-05 08:10:33 -06:00			`// By walking the tree in breadth-first order we make sure that parents`
start ra_hir_def crate 2019-10-29 03:15:51 -05:00			`// get lower ids then children. That is, adding a new child does not`
			`// change parent's id. This means that, say, adding a new function to a`
			`// trait does not change ids of top-level items, which helps caching.`
Record `FileAstId`s for block expressiosn Every block expression may contain inner items, so we need to be able to refer to any block expression and use it as a salsa key. 2021-01-19 12:49:19 -06:00			`bdfs(node, \|it\| {`
			`match_ast! {`
			`match it {`
			`ast::Item(module_item) => {`
			`res.alloc(module_item.syntax());`
			`true`
			`},`
			`ast::BlockExpr(block) => {`
			`res.alloc(block.syntax());`
			`true`
			`},`
			`_ => false,`
			`}`
start ra_hir_def crate 2019-10-29 03:15:51 -05:00			`}`
			`});`
more frugal map 2021-12-05 08:19:48 -06:00			`res.map = hashbrown::HashMap::with_capacity_and_hasher(res.arena.len(), ());`
			`for (idx, ptr) in res.arena.iter() {`
			`let hash = hash_ptr(ptr);`
			`match res.map.raw_entry_mut().from_hash(hash, \|idx2\| *idx2 == idx) {`
			`hashbrown::hash_map::RawEntryMut::Occupied(_) => unreachable!(),`
			`hashbrown::hash_map::RawEntryMut::Vacant(entry) => {`
			`entry.insert_with_hasher(hash, idx, (), \|&idx\| hash_ptr(&res.arena[idx]));`
			`}`
			`}`
			`}`
start ra_hir_def crate 2019-10-29 03:15:51 -05:00			`res`
			`}`

			`pub fn ast_id<N: AstNode>(&self, item: &N) -> FileAstId<N> {`
less generics 2019-10-29 07:20:08 -05:00			`let raw = self.erased_ast_id(item.syntax());`
			`FileAstId { raw, _ty: PhantomData }`
			`}`
			`fn erased_ast_id(&self, item: &SyntaxNode) -> ErasedFileAstId {`
			`let ptr = SyntaxNodePtr::new(item);`
more frugal map 2021-12-05 08:19:48 -06:00			`let hash = hash_ptr(&ptr);`
			`match self.map.raw_entry().from_hash(hash, \|&idx\| self.arena[idx] == ptr) {`
			`Some((&idx, &())) => idx,`
			`None => panic!(`
start ra_hir_def crate 2019-10-29 03:15:51 -05:00			`"Can't find {:?} in AstIdMap:\n{:?}",`
less generics 2019-10-29 07:20:08 -05:00			`item,`
start ra_hir_def crate 2019-10-29 03:15:51 -05:00			`self.arena.iter().map(\|(_id, i)\| i).collect::<Vec<_>>(),`
more frugal map 2021-12-05 08:19:48 -06:00			`),`
			`}`
start ra_hir_def crate 2019-10-29 03:15:51 -05:00			`}`

draw the rest of the owl 2020-06-22 08:07:06 -05:00			`pub fn get<N: AstNode>(&self, id: FileAstId<N>) -> AstPtr<N> {`
Migrate to SyntaxNodePtr in new rowan 2021-12-30 01:48:53 -06:00			`AstPtr::try_from_raw(self.arena[id.raw].clone()).unwrap()`
start ra_hir_def crate 2019-10-29 03:15:51 -05:00			`}`

reduce visibility 2019-10-29 07:25:46 -05:00			`fn alloc(&mut self, item: &SyntaxNode) -> ErasedFileAstId {`
			`self.arena.alloc(SyntaxNodePtr::new(item))`
start ra_hir_def crate 2019-10-29 03:15:51 -05:00			`}`
			`}`

more frugal map 2021-12-05 08:19:48 -06:00			`fn hash_ptr(ptr: &SyntaxNodePtr) -> u64 {`
			`let mut hasher = BuildHasherDefault::<FxHasher>::default().build_hasher();`
			`ptr.hash(&mut hasher);`
			`hasher.finish()`
			`}`

When building an item-tree, keep fewer nodes in memory 2021-01-16 13:38:22 -06:00			/// Walks the subtree in bdfs order, calling `f` for each node. What is bdfs
			`/// order? It is a mix of breadth-first and depth first orders. Nodes for which`
			/// `f` returns true are visited breadth-first, all the other nodes are explored
			`/// depth-first.`
			`///`
			`/// In other words, the size of the bfs queue is bound by the number of "true"`
			`/// nodes.`
			`fn bdfs(node: &SyntaxNode, mut f: impl FnMut(SyntaxNode) -> bool) {`
start ra_hir_def crate 2019-10-29 03:15:51 -05:00			`let mut curr_layer = vec![node.clone()];`
			`let mut next_layer = vec![];`
			`while !curr_layer.is_empty() {`
			`curr_layer.drain(..).for_each(\|node\| {`
When building an item-tree, keep fewer nodes in memory 2021-01-16 13:38:22 -06:00			`let mut preorder = node.preorder();`
			`while let Some(event) = preorder.next() {`
			`match event {`
			`syntax::WalkEvent::Enter(node) => {`
			`if f(node.clone()) {`
			`next_layer.extend(node.children());`
			`preorder.skip_subtree();`
			`}`
			`}`
			`syntax::WalkEvent::Leave(_) => {}`
			`}`
			`}`
start ra_hir_def crate 2019-10-29 03:15:51 -05:00			`});`
			`std::mem::swap(&mut curr_layer, &mut next_layer);`
			`}`
			`}`