Stronger typing for macro_arg query

This commit is contained in:
Lukas Wirth 2024-03-13 17:42:01 +01:00
parent e8182a5bb3
commit 87e0bbc534
2 changed files with 158 additions and 171 deletions

View File

@ -6,19 +6,16 @@
use mbe::{syntax_node_to_token_tree, ValueResult};
use rustc_hash::FxHashSet;
use span::{AstIdMap, SyntaxContextData, SyntaxContextId};
use syntax::{
ast::{self, HasAttrs},
AstNode, Parse, SyntaxElement, SyntaxError, SyntaxNode, SyntaxToken, T,
};
use syntax::{ast, AstNode, Parse, SyntaxElement, SyntaxError, SyntaxNode, SyntaxToken, T};
use triomphe::Arc;
use crate::{
attrs::collect_attrs,
attrs::{collect_attrs, AttrId},
builtin_attr_macro::pseudo_derive_attr_expansion,
builtin_fn_macro::EagerExpander,
cfg_process,
declarative::DeclarativeMacroExpander,
fixup::{self, reverse_fixups, SyntaxFixupUndoInfo},
fixup::{self, SyntaxFixupUndoInfo},
hygiene::{span_with_call_site_ctxt, span_with_def_site_ctxt, span_with_mixed_site_ctxt},
proc_macro::ProcMacros,
span_map::{RealSpanMap, SpanMap, SpanMapRef},
@ -149,8 +146,14 @@ pub fn expand_speculative(
mbe::syntax_node_to_token_tree(speculative_args, span_map, loc.call_site),
SyntaxFixupUndoInfo::NONE,
),
MacroCallKind::Derive { .. } | MacroCallKind::Attr { .. } => {
let censor = censor_for_macro_input(&loc, speculative_args);
MacroCallKind::Derive { derive_attr_index: index, .. }
| MacroCallKind::Attr { invoc_attr_index: index, .. } => {
let censor = if let MacroCallKind::Derive { .. } = loc.kind {
censor_derive_input(index, &ast::Adt::cast(speculative_args.clone())?)
} else {
censor_attr_input(index, &ast::Item::cast(speculative_args.clone())?)
};
let censor_cfg =
cfg_process::process_cfg_attrs(speculative_args, &loc, db).unwrap_or_default();
let mut fixups = fixup::fixup_syntax(span_map, speculative_args, loc.call_site);
@ -324,7 +327,8 @@ pub(crate) fn parse_with_map(
}
}
// FIXME: for derive attributes, this will return separate copies of the same structures!
// FIXME: for derive attributes, this will return separate copies of the same structures! Though
// they may differ in spans due to differing call sites...
fn macro_arg(
db: &dyn ExpandDatabase,
id: MacroCallId,
@ -336,173 +340,155 @@ fn macro_arg(
.then(|| loc.eager.as_deref())
.flatten()
{
ValueResult::ok((arg.clone(), SyntaxFixupUndoInfo::NONE))
} else {
let (parse, map) = parse_with_map(db, loc.kind.file_id());
let root = parse.syntax_node();
let syntax = match loc.kind {
MacroCallKind::FnLike { ast_id, .. } => {
let dummy_tt = |kind| {
(
Arc::new(tt::Subtree {
delimiter: tt::Delimiter {
open: loc.call_site,
close: loc.call_site,
kind,
},
token_trees: Box::default(),
}),
SyntaxFixupUndoInfo::default(),
)
};
let node = &ast_id.to_ptr(db).to_node(&root);
let offset = node.syntax().text_range().start();
let Some(tt) = node.token_tree() else {
return ValueResult::new(
dummy_tt(tt::DelimiterKind::Invisible),
Arc::new(Box::new([SyntaxError::new_at_offset(
"missing token tree".to_owned(),
offset,
)])),
);
};
let first = tt.left_delimiter_token().map(|it| it.kind()).unwrap_or(T!['(']);
let last = tt.right_delimiter_token().map(|it| it.kind()).unwrap_or(T![.]);
let mismatched_delimiters = !matches!(
(first, last),
(T!['('], T![')']) | (T!['['], T![']']) | (T!['{'], T!['}'])
);
if mismatched_delimiters {
// Don't expand malformed (unbalanced) macro invocations. This is
// less than ideal, but trying to expand unbalanced macro calls
// sometimes produces pathological, deeply nested code which breaks
// all kinds of things.
//
// So instead, we'll return an empty subtree here
cov_mark::hit!(issue9358_bad_macro_stack_overflow);
let kind = match first {
_ if loc.def.is_proc_macro() => tt::DelimiterKind::Invisible,
T!['('] => tt::DelimiterKind::Parenthesis,
T!['['] => tt::DelimiterKind::Bracket,
T!['{'] => tt::DelimiterKind::Brace,
_ => tt::DelimiterKind::Invisible,
};
return ValueResult::new(
dummy_tt(kind),
Arc::new(Box::new([SyntaxError::new_at_offset(
"mismatched delimiters".to_owned(),
offset,
)])),
);
}
tt.syntax().clone()
}
MacroCallKind::Derive { ast_id, .. } => {
ast_id.to_ptr(db).to_node(&root).syntax().clone()
}
MacroCallKind::Attr { ast_id, .. } => ast_id.to_ptr(db).to_node(&root).syntax().clone(),
};
let (mut tt, undo_info) = match loc.kind {
MacroCallKind::FnLike { .. } => (
mbe::syntax_node_to_token_tree(&syntax, map.as_ref(), loc.call_site),
SyntaxFixupUndoInfo::NONE,
),
MacroCallKind::Derive { .. } | MacroCallKind::Attr { .. } => {
let censor = censor_for_macro_input(&loc, &syntax);
let censor_cfg =
cfg_process::process_cfg_attrs(&syntax, &loc, db).unwrap_or_default();
let mut fixups = fixup::fixup_syntax(map.as_ref(), &syntax, loc.call_site);
fixups.append.retain(|it, _| match it {
syntax::NodeOrToken::Token(_) => true,
it => !censor.contains(it) && !censor_cfg.contains(it),
});
fixups.remove.extend(censor);
fixups.remove.extend(censor_cfg);
{
let mut tt = mbe::syntax_node_to_token_tree_modified(
&syntax,
map.as_ref(),
fixups.append.clone(),
fixups.remove.clone(),
loc.call_site,
);
reverse_fixups(&mut tt, &fixups.undo_info);
}
(
mbe::syntax_node_to_token_tree_modified(
&syntax,
map,
fixups.append,
fixups.remove,
loc.call_site,
),
fixups.undo_info,
)
}
};
if loc.def.is_proc_macro() {
// proc macros expect their inputs without parentheses, MBEs expect it with them included
tt.delimiter.kind = tt::DelimiterKind::Invisible;
}
if matches!(loc.def.kind, MacroDefKind::BuiltInEager(..)) {
match parse.errors() {
errors if errors.is_empty() => ValueResult::ok((Arc::new(tt), undo_info)),
errors => ValueResult::new(
(Arc::new(tt), undo_info),
// Box::<[_]>::from(res.errors()), not stable yet
Arc::new(errors.to_vec().into_boxed_slice()),
),
}
} else {
ValueResult::ok((Arc::new(tt), undo_info))
}
return ValueResult::ok((arg.clone(), SyntaxFixupUndoInfo::NONE));
}
let (parse, map) = parse_with_map(db, loc.kind.file_id());
let root = parse.syntax_node();
let (censor, item_node) = match loc.kind {
MacroCallKind::FnLike { ast_id, .. } => {
let dummy_tt = |kind| {
(
Arc::new(tt::Subtree {
delimiter: tt::Delimiter {
open: loc.call_site,
close: loc.call_site,
kind,
},
token_trees: Box::default(),
}),
SyntaxFixupUndoInfo::default(),
)
};
let node = &ast_id.to_ptr(db).to_node(&root);
let offset = node.syntax().text_range().start();
let Some(tt) = node.token_tree() else {
return ValueResult::new(
dummy_tt(tt::DelimiterKind::Invisible),
Arc::new(Box::new([SyntaxError::new_at_offset(
"missing token tree".to_owned(),
offset,
)])),
);
};
let first = tt.left_delimiter_token().map(|it| it.kind()).unwrap_or(T!['(']);
let last = tt.right_delimiter_token().map(|it| it.kind()).unwrap_or(T![.]);
let mismatched_delimiters = !matches!(
(first, last),
(T!['('], T![')']) | (T!['['], T![']']) | (T!['{'], T!['}'])
);
if mismatched_delimiters {
// Don't expand malformed (unbalanced) macro invocations. This is
// less than ideal, but trying to expand unbalanced macro calls
// sometimes produces pathological, deeply nested code which breaks
// all kinds of things.
//
// So instead, we'll return an empty subtree here
cov_mark::hit!(issue9358_bad_macro_stack_overflow);
let kind = match first {
_ if loc.def.is_proc_macro() => tt::DelimiterKind::Invisible,
T!['('] => tt::DelimiterKind::Parenthesis,
T!['['] => tt::DelimiterKind::Bracket,
T!['{'] => tt::DelimiterKind::Brace,
_ => tt::DelimiterKind::Invisible,
};
return ValueResult::new(
dummy_tt(kind),
Arc::new(Box::new([SyntaxError::new_at_offset(
"mismatched delimiters".to_owned(),
offset,
)])),
);
}
let tt = mbe::syntax_node_to_token_tree(tt.syntax(), map.as_ref(), loc.call_site);
let val = (Arc::new(tt), SyntaxFixupUndoInfo::NONE);
return if matches!(loc.def.kind, MacroDefKind::BuiltInEager(..)) {
match parse.errors() {
errors if errors.is_empty() => ValueResult::ok(val),
errors => ValueResult::new(
val,
// Box::<[_]>::from(res.errors()), not stable yet
Arc::new(errors.to_vec().into_boxed_slice()),
),
}
} else {
ValueResult::ok(val)
};
}
MacroCallKind::Derive { ast_id, derive_attr_index, .. } => {
let node = ast_id.to_ptr(db).to_node(&root);
(censor_derive_input(derive_attr_index, &node), node.into())
}
MacroCallKind::Attr { ast_id, invoc_attr_index, .. } => {
let node = ast_id.to_ptr(db).to_node(&root);
(censor_attr_input(invoc_attr_index, &node), node)
}
};
let (mut tt, undo_info) = {
let syntax = item_node.syntax();
let censor_cfg = cfg_process::process_cfg_attrs(syntax, &loc, db).unwrap_or_default();
let mut fixups = fixup::fixup_syntax(map.as_ref(), syntax, loc.call_site);
fixups.append.retain(|it, _| match it {
syntax::NodeOrToken::Token(_) => true,
it => !censor.contains(it) && !censor_cfg.contains(it),
});
fixups.remove.extend(censor);
fixups.remove.extend(censor_cfg);
(
mbe::syntax_node_to_token_tree_modified(
syntax,
map,
fixups.append,
fixups.remove,
loc.call_site,
),
fixups.undo_info,
)
};
if loc.def.is_proc_macro() {
// proc macros expect their inputs without parentheses, MBEs expect it with them included
tt.delimiter.kind = tt::DelimiterKind::Invisible;
}
ValueResult::ok((Arc::new(tt), undo_info))
}
// FIXME: Censoring info should be calculated by the caller! Namely by name resolution
/// Certain macro calls expect some nodes in the input to be preprocessed away, namely:
/// - derives expect all `#[derive(..)]` invocations up to the currently invoked one to be stripped
/// - attributes expect the invoking attribute to be stripped
fn censor_for_macro_input(loc: &MacroCallLoc, node: &SyntaxNode) -> FxHashSet<SyntaxElement> {
/// Derives expect all `#[derive(..)]` invocations up to the currently invoked one to be stripped
fn censor_derive_input(derive_attr_index: AttrId, node: &ast::Adt) -> FxHashSet<SyntaxElement> {
// FIXME: handle `cfg_attr`
(|| {
let censor = match loc.kind {
MacroCallKind::FnLike { .. } => return None,
MacroCallKind::Derive { derive_attr_index, .. } => {
cov_mark::hit!(derive_censoring);
ast::Item::cast(node.clone())?
.attrs()
.take(derive_attr_index.ast_index() + 1)
// FIXME, this resolution should not be done syntactically
// derive is a proper macro now, no longer builtin
// But we do not have resolution at this stage, this means
// we need to know about all macro calls for the given ast item here
// so we require some kind of mapping...
.filter(|attr| attr.simple_name().as_deref() == Some("derive"))
.map(|it| it.syntax().clone().into())
.collect()
}
MacroCallKind::Attr { .. } if loc.def.is_attribute_derive() => return None,
MacroCallKind::Attr { invoc_attr_index, .. } => {
cov_mark::hit!(attribute_macro_attr_censoring);
collect_attrs(&ast::Item::cast(node.clone())?)
.nth(invoc_attr_index.ast_index())
.and_then(|x| Either::left(x.1))
.map(|attr| attr.syntax().clone().into())
.into_iter()
.collect()
}
};
Some(censor)
})()
.unwrap_or_default()
cov_mark::hit!(derive_censoring);
collect_attrs(node)
.take(derive_attr_index.ast_index() + 1)
.filter_map(|(_, attr)| Either::left(attr))
// FIXME, this resolution should not be done syntactically
// derive is a proper macro now, no longer builtin
// But we do not have resolution at this stage, this means
// we need to know about all macro calls for the given ast item here
// so we require some kind of mapping...
.filter(|attr| attr.simple_name().as_deref() == Some("derive"))
.map(|it| it.syntax().clone().into())
.collect()
}
/// Attributes expect the invoking attribute to be stripped\
fn censor_attr_input(invoc_attr_index: AttrId, node: &ast::Item) -> FxHashSet<SyntaxElement> {
// FIXME: handle `cfg_attr`
cov_mark::hit!(attribute_macro_attr_censoring);
collect_attrs(node)
.nth(invoc_attr_index.ast_index())
.and_then(|(_, attr)| Either::left(attr))
.map(|attr| attr.syntax().clone().into())
.into_iter()
.collect()
}
impl TokenExpander {

View File

@ -176,6 +176,7 @@ pub struct MacroCallLoc {
// leakage problems here
eager: Option<Arc<EagerCallInfo>>,
pub kind: MacroCallKind,
// FIXME: Spans while relative to an anchor, are still rather unstable
pub call_site: Span,
}
impl_intern_value_trivial!(MacroCallLoc);