Return earlier in some cases in collect_token.

This example triggers an assertion failure: ``` fn f() -> u32 { #[cfg_eval] #[cfg(not(FALSE))] 0 } ``` The sequence of events: - `configure_annotatable` calls `parse_expr_force_collect`, which calls `collect_tokens`. - Within that, we end up in `parse_expr_dot_or_call`, which again calls `collect_tokens`. - The return value of the `f` call is the expression `0`. - This inner call collects tokens for `0` (parser range 10..11) and creates a replacement covering `#[cfg(not(FALSE))] 0` (parser range 0..11). - We return to the outer `collect_tokens` call. The return value of the `f` call is *again* the expression `0`, again with the range 10..11, but the replacement from earlier covers the range 0..11. The code mistakenly assumes that any attributes from an inner `collect_tokens` call fit entirely within the body of the result of an outer `collect_tokens` call. So it adjusts the replacement parser range 0..11 to a node range by subtracting 10, resulting in -10..1. This is an invalid range and triggers an assertion failure. It's tricky to follow, but basically things get complicated when an AST node is returned from an inner `collect_tokens` call and then returned again from an outer `collect_token` node without being wrapped in any kind of additional layer. This commit changes `collect_tokens` to return early in some extra cases, avoiding the construction of lazy tokens. In the example above, the outer `collect_tokens` returns earlier because the `0` token already has tokens and `self.capture_state.capturing` is `Capturing::No`. This early return avoids the creation of the invalid range and the assertion failure. Fixes #129166. Note: these invalid ranges have been happening for a long time. #128725 looks like it's at fault only because it introduced the assertion that catches the invalid ranges.
2024-08-20 17:47:53 +10:00 · 2024-08-20 17:47:53 +10:00 · 1ae521e9d5
commit 1ae521e9d5
parent 312ecdb2ed
4 changed files with 41 additions and 24 deletions
--- a/compiler/rustc_parse/src/parser/attr_wrapper.rs
+++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs
@ -234,6 +234,8 @@ pub(super) fn collect_tokens<R: HasAttrs + HasTokens>(
        force_collect: ForceCollect,
        f: impl FnOnce(&mut Self, AttrVec) -> PResult<'a, (R, Trailing, UsePreAttrPos)>,
    ) -> PResult<'a, R> {
+        let possible_capture_mode = self.capture_cfg;
+
        // We must collect if anything could observe the collected tokens, i.e.
        // if any of the following conditions hold.
        // - We are force collecting tokens (because force collection requires
@ -244,9 +246,9 @@ pub(super) fn collect_tokens<R: HasAttrs + HasTokens>(
            // - Our target supports custom inner attributes (custom
            //   inner attribute invocation might require token capturing).
            || R::SUPPORTS_CUSTOM_INNER_ATTRS
-            // - We are in `capture_cfg` mode (which requires tokens if
+            // - We are in "possible capture mode" (which requires tokens if
            //   the parsed node has `#[cfg]` or `#[cfg_attr]` attributes).
-            || self.capture_cfg;
+            || possible_capture_mode;
        if !needs_collection {
            return Ok(f(self, attrs.attrs)?.0);
        }
@ -267,7 +269,7 @@ pub(super) fn collect_tokens<R: HasAttrs + HasTokens>(
            res?
        };

-        // When we're not in `capture_cfg` mode, then skip collecting and
+        // When we're not in "definite capture mode", then skip collecting and
        // return early if either of the following conditions hold.
        // - `None`: Our target doesn't support tokens at all (e.g. `NtIdent`).
        // - `Some(Some(_))`: Our target already has tokens set (e.g. we've
@ -278,7 +280,10 @@ pub(super) fn collect_tokens<R: HasAttrs + HasTokens>(
        // Note that this check is independent of `force_collect`. There's no
        // need to collect tokens when we don't support tokens or already have
        // tokens.
-        if !self.capture_cfg && matches!(ret.tokens_mut(), None | Some(Some(_))) {
+        let definite_capture_mode = self.capture_cfg
+            && matches!(self.capture_state.capturing, Capturing::Yes)
+            && has_cfg_or_cfg_attr(ret.attrs());
+        if !definite_capture_mode && matches!(ret.tokens_mut(), None | Some(Some(_))) {
            return Ok(ret);
        }

@ -298,11 +303,11 @@ pub(super) fn collect_tokens<R: HasAttrs + HasTokens>(
            //   the earlier `needs_tokens` check, and we don't need to
            //   check `R::SUPPORTS_CUSTOM_INNER_ATTRS`.)
            || needs_tokens(ret.attrs())
-            // - We are in `capture_cfg` mode and there are `#[cfg]` or
-            //   `#[cfg_attr]` attributes. (During normal non-`capture_cfg`
-            //   parsing, we don't need any special capturing for those
-            //   attributes, because they're builtin.)
-            || (self.capture_cfg && has_cfg_or_cfg_attr(ret.attrs()));
+            // - We are in "definite capture mode", which requires that there
+            //   are `#[cfg]` or `#[cfg_attr]` attributes. (During normal
+            //   non-`capture_cfg` parsing, we don't need any special capturing
+            //   for those attributes, because they're builtin.)
+            || definite_capture_mode;
        if !needs_collection {
            return Ok(ret);
        }
@ -399,20 +404,18 @@ pub(super) fn collect_tokens<R: HasAttrs + HasTokens>(
            break_last_token: self.break_last_token,
            node_replacements,
        });
+        let mut tokens_used = false;

        // If we support tokens and don't already have them, store the newly captured tokens.
        if let Some(target_tokens @ None) = ret.tokens_mut() {
+            tokens_used = true;
            *target_tokens = Some(tokens.clone());
        }

-        // If `capture_cfg` is set and we're inside a recursive call to
-        // `collect_tokens`, then we need to register a replace range if we
-        // have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager
-        // cfg-expansion on the captured token stream.
-        if self.capture_cfg
-            && matches!(self.capture_state.capturing, Capturing::Yes)
-            && has_cfg_or_cfg_attr(ret.attrs())
-        {
+        // If in "definite capture mode" we need to register a replace range
+        // for the `#[cfg]` and/or `#[cfg_attr]` attrs. This allows us to run
+        // eager cfg-expansion on the captured token stream.
+        if definite_capture_mode {
            assert!(!self.break_last_token, "Should not have unglued last token with cfg attr");

            // What is the status here when parsing the example code at the top of this method?
@ -430,6 +433,7 @@ pub(super) fn collect_tokens<R: HasAttrs + HasTokens>(
            let start_pos =
                if has_outer_attrs { attrs.start_pos.unwrap() } else { collect_pos.start_pos };
            let target = AttrsTarget { attrs: ret.attrs().iter().cloned().collect(), tokens };
+            tokens_used = true;
            self.capture_state
                .parser_replacements
                .push((ParserRange(start_pos..end_pos), Some(target)));
@ -439,6 +443,7 @@ pub(super) fn collect_tokens<R: HasAttrs + HasTokens>(
            self.capture_state.parser_replacements.clear();
            self.capture_state.inner_attr_parser_ranges.clear();
        }
+        assert!(tokens_used); // check we didn't create `tokens` unnecessarily
        Ok(ret)
    }
 }
--- a/tests/crashes/129166.rs
+++ b/tests/crashes/129166.rs
@ -1,7 +0,0 @@
-//@ known-bug: rust-lang/rust#129166
-
-fn main() {
-    #[cfg_eval]
-    #[cfg]
-    0
-}
--- a/tests/ui/conditional-compilation/invalid-node-range-issue-129166.rs
+++ b/tests/ui/conditional-compilation/invalid-node-range-issue-129166.rs
@ -0,0 +1,11 @@
+// This was triggering an assertion failure in `NodeRange::new`.
+
+#![feature(cfg_eval)]
+#![feature(stmt_expr_attributes)]
+
+fn f() -> u32 {
+    #[cfg_eval] #[cfg(not(FALSE))] 0
+    //~^ ERROR removing an expression is not supported in this position
+}
+
+fn main() {}
--- a/tests/ui/conditional-compilation/invalid-node-range-issue-129166.stderr
+++ b/tests/ui/conditional-compilation/invalid-node-range-issue-129166.stderr
@ -0,0 +1,8 @@
+error: removing an expression is not supported in this position
+  --> $DIR/invalid-node-range-issue-129166.rs:7:17
+   |
+LL |     #[cfg_eval] #[cfg(not(FALSE))] 0
+   |                 ^^^^^^^^^^^^^^^^^^
+
+error: aborting due to 1 previous error
+