From 0bae33fcd503473aec70aef28b0e08abce965557 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 20 Aug 2024 12:28:39 +1000 Subject: [PATCH] Avoid nested replacement ranges. In a case like this: ``` mod a { mod b { #[cfg_attr(unix, inline)] fn f() { #[cfg_attr(linux, inline)] fn g1() {} #[cfg_attr(linux, inline)] fn g2() {} } } } ``` We currently end up with the following replacement ranges. - The lazy tokens for `f` has replacement ranges for `g1` and `g2`. - The lazy tokens for `a` has replacement ranges for `f`, `g1`, and `g2`. I.e. the replacement ranges for `g1` and `g2` are duplicated. In general, replacement ranges for inner AST nodes are duplicated up the chain for each nested `collect_tokens` call. And the code that processes the replacements is careful about the ordering in which the replacements are applied, to ensure that inner replacements are applied before outer replacements. But all of this is unnecessary. If you apply an inner replacement and then an outer replacement, the outer replacement completely overwrites the inner replacement. This commit avoids the duplication by removing replacements from `self.capture_state.parser_replacements` when they are used. (The effect on the example above is that the lazy tokesn for `a` no longer include replacement ranges for `g1` and `g2`.) This eliminates the possibility of nested replacements on individual AST nodes, which avoids the need for careful ordering of replacements. --- .../rustc_parse/src/parser/attr_wrapper.rs | 28 +++++-------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index 81b683705f3..a74c87ca2a7 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -134,9 +134,8 @@ fn to_attr_token_stream(&self) -> AttrTokenStream { node_replacements.array_windows() { assert!( - node_range.0.end <= next_node_range.0.start - || node_range.0.end >= next_node_range.0.end, - "Node ranges should be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})", + node_range.0.end <= next_node_range.0.start, + "Node ranges should be disjoint: ({:?}, {:?}) ({:?}, {:?})", node_range, tokens, next_node_range, @@ -144,20 +143,8 @@ fn to_attr_token_stream(&self) -> AttrTokenStream { ); } - // Process the replace ranges, starting from the highest start - // position and working our way back. If have tokens like: - // - // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }` - // - // Then we will generate replace ranges for both - // the `#[cfg(FALSE)] field: bool` and the entire - // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }` - // - // By starting processing from the replace range with the greatest - // start position, we ensure that any (outer) replace range which - // encloses another (inner) replace range will fully overwrite the - // inner range's replacement. - for (node_range, target) in node_replacements.into_iter().rev() { + // Process the replace ranges. + for (node_range, target) in node_replacements.into_iter() { assert!( !node_range.0.is_empty(), "Cannot replace an empty node range: {:?}", @@ -364,10 +351,9 @@ pub(super) fn collect_tokens( // from `ParserRange` form to `NodeRange` form. We will perform the actual // replacement only when we convert the `LazyAttrTokenStream` to an // `AttrTokenStream`. - self.capture_state.parser_replacements - [parser_replacements_start..parser_replacements_end] - .iter() - .cloned() + self.capture_state + .parser_replacements + .drain(parser_replacements_start..parser_replacements_end) .chain(inner_attr_parser_replacements.into_iter()) .map(|(parser_range, data)| { (NodeRange::new(parser_range, collect_pos.start_pos), data)