From 0bae33fcd503473aec70aef28b0e08abce965557 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Tue, 20 Aug 2024 12:28:39 +1000
Subject: [PATCH] Avoid nested replacement ranges.

In a case like this:
```
mod a {
    mod b {
        #[cfg_attr(unix, inline)]
        fn f() {
            #[cfg_attr(linux, inline)]
            fn g1() {}
            #[cfg_attr(linux, inline)]
            fn g2() {}
        }
    }
}
```
We currently end up with the following replacement ranges.
- The lazy tokens for `f` has replacement ranges for `g1` and `g2`.
- The lazy tokens for `a` has replacement ranges for `f`, `g1`, and
  `g2`.

I.e. the replacement ranges for `g1` and `g2` are duplicated. In
general, replacement ranges for inner AST nodes are duplicated up the
chain for each nested `collect_tokens` call. And the code that processes
the replacements is careful about the ordering in which the replacements
are applied, to ensure that inner replacements are applied before outer
replacements.

But all of this is unnecessary. If you apply an inner replacement and
then an outer replacement, the outer replacement completely overwrites
the inner replacement.

This commit avoids the duplication by removing replacements from
`self.capture_state.parser_replacements` when they are used. (The effect
on the example above is that the lazy tokesn for `a` no longer include
replacement ranges for `g1` and `g2`.) This eliminates the possibility
of nested replacements on individual AST nodes, which avoids the need
for careful ordering of replacements.
---
 .../rustc_parse/src/parser/attr_wrapper.rs    | 28 +++++--------------
 1 file changed, 7 insertions(+), 21 deletions(-)

diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs
index 81b683705f3..a74c87ca2a7 100644
--- a/compiler/rustc_parse/src/parser/attr_wrapper.rs
+++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs
@@ -134,9 +134,8 @@ fn to_attr_token_stream(&self) -> AttrTokenStream {
                 node_replacements.array_windows()
             {
                 assert!(
-                    node_range.0.end <= next_node_range.0.start
-                        || node_range.0.end >= next_node_range.0.end,
-                    "Node ranges should be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})",
+                    node_range.0.end <= next_node_range.0.start,
+                    "Node ranges should be disjoint: ({:?}, {:?}) ({:?}, {:?})",
                     node_range,
                     tokens,
                     next_node_range,
@@ -144,20 +143,8 @@ fn to_attr_token_stream(&self) -> AttrTokenStream {
                 );
             }
 
-            // Process the replace ranges, starting from the highest start
-            // position and working our way back. If have tokens like:
-            //
-            // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
-            //
-            // Then we will generate replace ranges for both
-            // the `#[cfg(FALSE)] field: bool` and the entire
-            // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
-            //
-            // By starting processing from the replace range with the greatest
-            // start position, we ensure that any (outer) replace range which
-            // encloses another (inner) replace range will fully overwrite the
-            // inner range's replacement.
-            for (node_range, target) in node_replacements.into_iter().rev() {
+            // Process the replace ranges.
+            for (node_range, target) in node_replacements.into_iter() {
                 assert!(
                     !node_range.0.is_empty(),
                     "Cannot replace an empty node range: {:?}",
@@ -364,10 +351,9 @@ pub(super) fn collect_tokens<R: HasAttrs + HasTokens>(
             // from `ParserRange` form to `NodeRange` form. We will perform the actual
             // replacement only when we convert the `LazyAttrTokenStream` to an
             // `AttrTokenStream`.
-            self.capture_state.parser_replacements
-                [parser_replacements_start..parser_replacements_end]
-                .iter()
-                .cloned()
+            self.capture_state
+                .parser_replacements
+                .drain(parser_replacements_start..parser_replacements_end)
                 .chain(inner_attr_parser_replacements.into_iter())
                 .map(|(parser_range, data)| {
                     (NodeRange::new(parser_range, collect_pos.start_pos), data)