rust/crates/hir_expand/src/db.rs

//! Defines database & queries for macro expansion.

use std::sync::Arc;

use base_db::{salsa, SourceDatabase};
use either::Either;
use limit::Limit;
use mbe::{syntax_node_to_token_tree, ExpandError, ExpandResult};
use rustc_hash::FxHashSet;
use syntax::{
    algo::diff,
    ast::{self, HasAttrs, HasDocComments},
    AstNode, GreenNode, Parse, SyntaxNode, SyntaxToken, T,
};

use crate::{
    ast_id_map::AstIdMap, hygiene::HygieneFrame, BuiltinAttrExpander, BuiltinDeriveExpander,
    BuiltinFnLikeExpander, ExpandTo, HirFileId, HirFileIdRepr, MacroCallId, MacroCallKind,
    MacroCallLoc, MacroDefId, MacroDefKind, MacroFile, ProcMacroExpander,
};

/// Total limit on the number of tokens produced by any macro invocation.
///
/// If an invocation produces more tokens than this limit, it will not be stored in the database and
/// an error will be emitted.
///
/// Actual max for `analysis-stats .` at some point: 30672.
static TOKEN_LIMIT: Limit = Limit::new(524_288);

#[derive(Debug, Clone, Eq, PartialEq)]
pub enum TokenExpander {
    /// Old-style `macro_rules` or the new macros 2.0
    DeclarativeMacro { mac: mbe::DeclarativeMacro, def_site_token_map: mbe::TokenMap },
    /// Stuff like `line!` and `file!`.
    Builtin(BuiltinFnLikeExpander),
    /// `global_allocator` and such.
    BuiltinAttr(BuiltinAttrExpander),
    /// `derive(Copy)` and such.
    BuiltinDerive(BuiltinDeriveExpander),
    /// The thing we love the most here in rust-analyzer -- procedural macros.
    ProcMacro(ProcMacroExpander),
}

impl TokenExpander {
    fn expand(
        &self,
        db: &dyn AstDatabase,
        id: MacroCallId,
        tt: &tt::Subtree,
    ) -> mbe::ExpandResult<tt::Subtree> {
        match self {
            TokenExpander::DeclarativeMacro { mac, .. } => mac.expand(tt),
            TokenExpander::Builtin(it) => it.expand(db, id, tt),
            TokenExpander::BuiltinAttr(it) => it.expand(db, id, tt),
            TokenExpander::BuiltinDerive(it) => it.expand(db, id, tt),
            TokenExpander::ProcMacro(_) => {
                // We store the result in salsa db to prevent non-deterministic behavior in
                // some proc-macro implementation
                // See #4315 for details
                db.expand_proc_macro(id)
            }
        }
    }

    pub(crate) fn map_id_down(&self, id: tt::TokenId) -> tt::TokenId {
        match self {
            TokenExpander::DeclarativeMacro { mac, .. } => mac.map_id_down(id),
            TokenExpander::Builtin(..)
            | TokenExpander::BuiltinAttr(..)
            | TokenExpander::BuiltinDerive(..)
            | TokenExpander::ProcMacro(..) => id,
        }
    }

    pub(crate) fn map_id_up(&self, id: tt::TokenId) -> (tt::TokenId, mbe::Origin) {
        match self {
            TokenExpander::DeclarativeMacro { mac, .. } => mac.map_id_up(id),
            TokenExpander::Builtin(..)
            | TokenExpander::BuiltinAttr(..)
            | TokenExpander::BuiltinDerive(..)
            | TokenExpander::ProcMacro(..) => (id, mbe::Origin::Call),
        }
    }
}

// FIXME: rename to ExpandDatabase
#[salsa::query_group(AstDatabaseStorage)]
pub trait AstDatabase: SourceDatabase {
    fn ast_id_map(&self, file_id: HirFileId) -> Arc<AstIdMap>;

    /// Main public API -- parses a hir file, not caring whether it's a real
    /// file or a macro expansion.
    #[salsa::transparent]
    fn parse_or_expand(&self, file_id: HirFileId) -> Option<SyntaxNode>;
    /// Implementation for the macro case.
    fn parse_macro_expansion(
        &self,
        macro_file: MacroFile,
    ) -> ExpandResult<Option<(Parse<SyntaxNode>, Arc<mbe::TokenMap>)>>;

    /// Macro ids. That's probably the tricksiest bit in rust-analyzer, and the
    /// reason why we use salsa at all.
    ///
    /// We encode macro definitions into ids of macro calls, this what allows us
    /// to be incremental.
    #[salsa::interned]
    fn intern_macro_call(&self, macro_call: MacroCallLoc) -> MacroCallId;

    /// Lowers syntactic macro call to a token tree representation.
    #[salsa::transparent]
    fn macro_arg(&self, id: MacroCallId) -> Option<Arc<(tt::Subtree, mbe::TokenMap)>>;
    /// Extracts syntax node, corresponding to a macro call. That's a firewall
    /// query, only typing in the macro call itself changes the returned
    /// subtree.
    fn macro_arg_text(&self, id: MacroCallId) -> Option<GreenNode>;
    /// Gets the expander for this macro. This compiles declarative macros, and
    /// just fetches procedural ones.
    fn macro_def(&self, id: MacroDefId) -> Result<Arc<TokenExpander>, mbe::ParseError>;

    /// Expand macro call to a token tree. This query is LRUed (we keep 128 or so results in memory)
    fn macro_expand(&self, macro_call: MacroCallId) -> ExpandResult<Option<Arc<tt::Subtree>>>;
    /// Special case of the previous query for procedural macros. We can't LRU
    /// proc macros, since they are not deterministic in general, and
    /// non-determinism breaks salsa in a very, very, very bad way. @edwin0cheng
    /// heroically debugged this once!
    fn expand_proc_macro(&self, call: MacroCallId) -> ExpandResult<tt::Subtree>;
    /// Firewall query that returns the error from the `macro_expand` query.
    fn macro_expand_error(&self, macro_call: MacroCallId) -> Option<ExpandError>;

    fn hygiene_frame(&self, file_id: HirFileId) -> Arc<HygieneFrame>;
}

/// This expands the given macro call, but with different arguments. This is
/// used for completion, where we want to see what 'would happen' if we insert a
/// token. The `token_to_map` mapped down into the expansion, with the mapped
/// token returned.
pub fn expand_speculative(
    db: &dyn AstDatabase,
    actual_macro_call: MacroCallId,
    speculative_args: &SyntaxNode,
    token_to_map: SyntaxToken,
) -> Option<(SyntaxNode, SyntaxToken)> {
    let loc = db.lookup_intern_macro_call(actual_macro_call);
    let macro_def = db.macro_def(loc.def).ok()?;
    let token_range = token_to_map.text_range();

    // Build the subtree and token mapping for the speculative args
    let censor = censor_for_macro_input(&loc, &speculative_args);
    let (mut tt, spec_args_tmap) =
        mbe::syntax_node_to_token_tree_censored(&speculative_args, &censor);

    let (attr_arg, token_id) = match loc.kind {
        MacroCallKind::Attr { invoc_attr_index, .. } => {
            // Attributes may have an input token tree, build the subtree and map for this as well
            // then try finding a token id for our token if it is inside this input subtree.
            let item = ast::Item::cast(speculative_args.clone())?;
            let attr = item
                .doc_comments_and_attrs()
                .nth(invoc_attr_index as usize)
                .and_then(Either::left)?;
            match attr.token_tree() {
                Some(token_tree) => {
                    let (mut tree, map) = syntax_node_to_token_tree(attr.token_tree()?.syntax());
                    tree.delimiter = None;

                    let shift = mbe::Shift::new(&tt);
                    shift.shift_all(&mut tree);

                    let token_id = if token_tree.syntax().text_range().contains_range(token_range) {
                        let attr_input_start =
                            token_tree.left_delimiter_token()?.text_range().start();
                        let range = token_range.checked_sub(attr_input_start)?;
                        let token_id = shift.shift(map.token_by_range(range)?);
                        Some(token_id)
                    } else {
                        None
                    };
                    (Some(tree), token_id)
                }
                _ => (None, None),
            }
        }
        _ => (None, None),
    };
    let token_id = match token_id {
        Some(token_id) => token_id,
        // token wasn't inside an attribute input so it has to be in the general macro input
        None => {
            let range = token_range.checked_sub(speculative_args.text_range().start())?;
            let token_id = spec_args_tmap.token_by_range(range)?;
            macro_def.map_id_down(token_id)
        }
    };

    // Do the actual expansion, we need to directly expand the proc macro due to the attribute args
    // Otherwise the expand query will fetch the non speculative attribute args and pass those instead.
    let speculative_expansion = if let MacroDefKind::ProcMacro(expander, ..) = loc.def.kind {
        tt.delimiter = None;
        expander.expand(db, loc.krate, &tt, attr_arg.as_ref())
    } else {
        macro_def.expand(db, actual_macro_call, &tt)
    };

    let expand_to = macro_expand_to(db, actual_macro_call);
    let (node, rev_tmap) = token_tree_to_syntax_node(&speculative_expansion.value, expand_to);

    let range = rev_tmap.first_range_by_token(token_id, token_to_map.kind())?;
    let token = node.syntax_node().covering_element(range).into_token()?;
    Some((node.syntax_node(), token))
}

fn ast_id_map(db: &dyn AstDatabase, file_id: HirFileId) -> Arc<AstIdMap> {
    let map = db.parse_or_expand(file_id).map(|it| AstIdMap::from_source(&it)).unwrap_or_default();
    Arc::new(map)
}

fn parse_or_expand(db: &dyn AstDatabase, file_id: HirFileId) -> Option<SyntaxNode> {
    match file_id.0 {
        HirFileIdRepr::FileId(file_id) => Some(db.parse(file_id).tree().syntax().clone()),
        HirFileIdRepr::MacroFile(macro_file) => {
            // FIXME: Note how we convert from `Parse` to `SyntaxNode` here,
            // forgetting about parse errors.
            db.parse_macro_expansion(macro_file).value.map(|(it, _)| it.syntax_node())
        }
    }
}

fn parse_macro_expansion(
    db: &dyn AstDatabase,
    macro_file: MacroFile,
) -> ExpandResult<Option<(Parse<SyntaxNode>, Arc<mbe::TokenMap>)>> {
    let _p = profile::span("parse_macro_expansion");
    let result = db.macro_expand(macro_file.macro_call_id);

    if let Some(err) = &result.err {
        // Note:
        // The final goal we would like to make all parse_macro success,
        // such that the following log will not call anyway.
        let loc: MacroCallLoc = db.lookup_intern_macro_call(macro_file.macro_call_id);
        let node = loc.kind.to_node(db);

        // collect parent information for warning log
        let parents =
            std::iter::successors(loc.kind.file_id().call_node(db), |it| it.file_id.call_node(db))
                .map(|n| format!("{:#}", n.value))
                .collect::<Vec<_>>()
                .join("\n");

        tracing::warn!(
            "fail on macro_parse: (reason: {:?} macro_call: {:#}) parents: {}",
            err,
            node.value,
            parents
        );
    }
    let tt = match result.value {
        Some(tt) => tt,
        None => return ExpandResult { value: None, err: result.err },
    };

    let expand_to = macro_expand_to(db, macro_file.macro_call_id);

    tracing::debug!("expanded = {}", tt.as_debug_string());
    tracing::debug!("kind = {:?}", expand_to);

    let (parse, rev_token_map) = token_tree_to_syntax_node(&tt, expand_to);

    match result.err {
        Some(err) => {
            // Safety check for recursive identity macro.
            let node = parse.syntax_node();
            let file: HirFileId = macro_file.into();
            let call_node = match file.call_node(db) {
                Some(it) => it,
                None => {
                    return ExpandResult::only_err(err);
                }
            };
            if is_self_replicating(&node, &call_node.value) {
                ExpandResult::only_err(err)
            } else {
                ExpandResult { value: Some((parse, Arc::new(rev_token_map))), err: Some(err) }
            }
        }
        None => {
            tracing::debug!("parse = {:?}", parse.syntax_node().kind());
            ExpandResult { value: Some((parse, Arc::new(rev_token_map))), err: None }
        }
    }
}

fn macro_arg(db: &dyn AstDatabase, id: MacroCallId) -> Option<Arc<(tt::Subtree, mbe::TokenMap)>> {
    let arg = db.macro_arg_text(id)?;
    let loc = db.lookup_intern_macro_call(id);

    let node = SyntaxNode::new_root(arg);
    let censor = censor_for_macro_input(&loc, &node);
    let (mut tt, tmap) = mbe::syntax_node_to_token_tree_censored(&node, &censor);

    if loc.def.is_proc_macro() {
        // proc macros expect their inputs without parentheses, MBEs expect it with them included
        tt.delimiter = None;
    }

    Some(Arc::new((tt, tmap)))
}

fn censor_for_macro_input(loc: &MacroCallLoc, node: &SyntaxNode) -> FxHashSet<SyntaxNode> {
    (|| {
        let censor = match loc.kind {
            MacroCallKind::FnLike { .. } => return None,
            MacroCallKind::Derive { derive_attr_index, .. } => {
                cov_mark::hit!(derive_censoring);
                ast::Item::cast(node.clone())?
                    .attrs()
                    .take(derive_attr_index as usize + 1)
                    .filter(|attr| attr.simple_name().as_deref() == Some("derive"))
                    .map(|it| it.syntax().clone())
                    .collect()
            }
            MacroCallKind::Attr { invoc_attr_index, .. } => {
                cov_mark::hit!(attribute_macro_attr_censoring);
                ast::Item::cast(node.clone())?
                    .doc_comments_and_attrs()
                    .nth(invoc_attr_index as usize)
                    .and_then(Either::left)
                    .map(|attr| attr.syntax().clone())
                    .into_iter()
                    .collect()
            }
        };
        Some(censor)
    })()
    .unwrap_or_default()
}

fn macro_arg_text(db: &dyn AstDatabase, id: MacroCallId) -> Option<GreenNode> {
    let loc = db.lookup_intern_macro_call(id);
    let arg = loc.kind.arg(db)?;
    if matches!(loc.kind, MacroCallKind::FnLike { .. }) {
        let first = arg.first_child_or_token().map_or(T![.], |it| it.kind());
        let last = arg.last_child_or_token().map_or(T![.], |it| it.kind());
        let well_formed_tt =
            matches!((first, last), (T!['('], T![')']) | (T!['['], T![']']) | (T!['{'], T!['}']));
        if !well_formed_tt {
            // Don't expand malformed (unbalanced) macro invocations. This is
            // less than ideal, but trying to expand unbalanced  macro calls
            // sometimes produces pathological, deeply nested code which breaks
            // all kinds of things.
            //
            // Some day, we'll have explicit recursion counters for all
            // recursive things, at which point this code might be removed.
            cov_mark::hit!(issue9358_bad_macro_stack_overflow);
            return None;
        }
    }
    Some(arg.green().into())
}

fn macro_def(db: &dyn AstDatabase, id: MacroDefId) -> Result<Arc<TokenExpander>, mbe::ParseError> {
    match id.kind {
        MacroDefKind::Declarative(ast_id) => {
            let (mac, def_site_token_map) = match ast_id.to_node(db) {
                ast::Macro::MacroRules(macro_rules) => {
                    let arg = macro_rules
                        .token_tree()
                        .ok_or_else(|| mbe::ParseError::Expected("expected a token tree".into()))?;
                    let (tt, def_site_token_map) = mbe::syntax_node_to_token_tree(arg.syntax());
                    let mac = mbe::DeclarativeMacro::parse_macro_rules(&tt)?;
                    (mac, def_site_token_map)
                }
                ast::Macro::MacroDef(macro_def) => {
                    let arg = macro_def
                        .body()
                        .ok_or_else(|| mbe::ParseError::Expected("expected a token tree".into()))?;
                    let (tt, def_site_token_map) = mbe::syntax_node_to_token_tree(arg.syntax());
                    let mac = mbe::DeclarativeMacro::parse_macro2(&tt)?;
                    (mac, def_site_token_map)
                }
            };
            Ok(Arc::new(TokenExpander::DeclarativeMacro { mac, def_site_token_map }))
        }
        MacroDefKind::BuiltIn(expander, _) => Ok(Arc::new(TokenExpander::Builtin(expander))),
        MacroDefKind::BuiltInAttr(expander, _) => {
            Ok(Arc::new(TokenExpander::BuiltinAttr(expander)))
        }
        MacroDefKind::BuiltInDerive(expander, _) => {
            Ok(Arc::new(TokenExpander::BuiltinDerive(expander)))
        }
        MacroDefKind::BuiltInEager(..) => {
            // FIXME: Return a random error here just to make the types align.
            // This obviously should do something real instead.
            Err(mbe::ParseError::UnexpectedToken("unexpected eager macro".into()))
        }
        MacroDefKind::ProcMacro(expander, ..) => Ok(Arc::new(TokenExpander::ProcMacro(expander))),
    }
}

fn macro_expand(db: &dyn AstDatabase, id: MacroCallId) -> ExpandResult<Option<Arc<tt::Subtree>>> {
    let _p = profile::span("macro_expand");
    let loc: MacroCallLoc = db.lookup_intern_macro_call(id);
    if let Some(eager) = &loc.eager {
        return ExpandResult {
            value: Some(eager.arg_or_expansion.clone()),
            // FIXME: There could be errors here!
            err: None,
        };
    }

    let macro_arg = match db.macro_arg(id) {
        Some(it) => it,
        None => return ExpandResult::str_err("Failed to lower macro args to token tree".into()),
    };

    let expander = match db.macro_def(loc.def) {
        Ok(it) => it,
        // FIXME: This is weird -- we effectively report macro *definition*
        // errors lazily, when we try to expand the macro. Instead, they should
        // be reported at the definition site (when we construct a def map).
        Err(err) => return ExpandResult::str_err(format!("invalid macro definition: {}", err)),
    };
    let ExpandResult { value: tt, err } = expander.expand(db, id, &macro_arg.0);
    // Set a hard limit for the expanded tt
    let count = tt.count();
    // XXX: Make ExpandResult a real error and use .map_err instead?
    if TOKEN_LIMIT.check(count).is_err() {
        return ExpandResult::str_err(format!(
            "macro invocation exceeds token limit: produced {} tokens, limit is {}",
            count,
            TOKEN_LIMIT.inner(),
        ));
    }

    ExpandResult { value: Some(Arc::new(tt)), err }
}

fn macro_expand_error(db: &dyn AstDatabase, macro_call: MacroCallId) -> Option<ExpandError> {
    db.macro_expand(macro_call).err
}

fn expand_proc_macro(db: &dyn AstDatabase, id: MacroCallId) -> ExpandResult<tt::Subtree> {
    let loc: MacroCallLoc = db.lookup_intern_macro_call(id);
    let macro_arg = match db.macro_arg(id) {
        Some(it) => it,
        None => return ExpandResult::str_err("No arguments for proc-macro".to_string()),
    };

    let expander = match loc.def.kind {
        MacroDefKind::ProcMacro(expander, ..) => expander,
        _ => unreachable!(),
    };

    let attr_arg = match &loc.kind {
        MacroCallKind::Attr { attr_args, .. } => {
            let mut attr_args = attr_args.0.clone();
            mbe::Shift::new(&macro_arg.0).shift_all(&mut attr_args);
            Some(attr_args)
        }
        _ => None,
    };

    expander.expand(db, loc.krate, &macro_arg.0, attr_arg.as_ref())
}

fn is_self_replicating(from: &SyntaxNode, to: &SyntaxNode) -> bool {
    if diff(from, to).is_empty() {
        return true;
    }
    if let Some(stmts) = ast::MacroStmts::cast(from.clone()) {
        if stmts.statements().any(|stmt| diff(stmt.syntax(), to).is_empty()) {
            return true;
        }
        if let Some(expr) = stmts.expr() {
            if diff(expr.syntax(), to).is_empty() {
                return true;
            }
        }
    }
    false
}

fn hygiene_frame(db: &dyn AstDatabase, file_id: HirFileId) -> Arc<HygieneFrame> {
    Arc::new(HygieneFrame::new(db, file_id))
}

fn macro_expand_to(db: &dyn AstDatabase, id: MacroCallId) -> ExpandTo {
    let loc: MacroCallLoc = db.lookup_intern_macro_call(id);
    loc.kind.expand_to()
}

fn token_tree_to_syntax_node(
    tt: &tt::Subtree,
    expand_to: ExpandTo,
) -> (Parse<SyntaxNode>, mbe::TokenMap) {
    let entry_point = match expand_to {
        ExpandTo::Statements => mbe::TopEntryPoint::MacroStmts,
        ExpandTo::Items => mbe::TopEntryPoint::MacroItems,
        ExpandTo::Pattern => mbe::TopEntryPoint::Pattern,
        ExpandTo::Type => mbe::TopEntryPoint::Type,
        ExpandTo::Expr => mbe::TopEntryPoint::Expr,
    };
    mbe::token_tree_to_syntax_node(tt, entry_point)
}