Move TokenCursor::break_last_token into Parser.

Similar to the last commit, it's more of a `Parser`-level concern than a `TokenCursor`-level concern. And the struct size reductions are nice. After this change, `TokenCursor` is as minimal as possible (two fields and two methods) which is nice.
2023-07-31 16:36:27 +10:00 · 2023-07-31 16:36:27 +10:00 · 6fc2c481e5
commit 6fc2c481e5
parent 54eb6bc34c
3 changed files with 25 additions and 39 deletions
--- a/compiler/rustc_parse/src/parser/attr_wrapper.rs
+++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs
@ -301,7 +301,7 @@ impl<'a> Parser<'a> {
        // then extend the range of captured tokens to include it, since the parser
        // was not actually bumped past it. When the `LazyAttrTokenStream` gets converted
        // into an `AttrTokenStream`, we will create the proper token.
-        if self.token_cursor.break_last_token {
+        if self.break_last_token {
            assert!(!captured_trailing, "Cannot set break_last_token and have trailing token");
            end_pos += 1;
        }
@ -331,7 +331,7 @@ impl<'a> Parser<'a> {
            start_token,
            num_calls,
            cursor_snapshot,
-            break_last_token: self.token_cursor.break_last_token,
+            break_last_token: self.break_last_token,
            replace_ranges,
        });

@ -362,10 +362,7 @@ impl<'a> Parser<'a> {
            let start_pos = if has_outer_attrs { attrs.start_pos } else { start_pos };
            let new_tokens = vec![(FlatToken::AttrTarget(attr_data), Spacing::Alone)];

-            assert!(
-                !self.token_cursor.break_last_token,
-                "Should not have unglued last token with cfg attr"
-            );
+            assert!(!self.break_last_token, "Should not have unglued last token with cfg attr");
            let range: Range<u32> = (start_pos.try_into().unwrap())..(end_pos.try_into().unwrap());
            self.capture_state.replace_ranges.push((range, new_tokens));
            self.capture_state.replace_ranges.extend(inner_attr_replace_ranges);
@ -463,6 +460,6 @@ mod size_asserts {
    use rustc_data_structures::static_assert_size;
    // tidy-alphabetical-start
    static_assert_size!(AttrWrapper, 16);
-    static_assert_size!(LazyAttrTokenStreamImpl, 112);
+    static_assert_size!(LazyAttrTokenStreamImpl, 104);
    // tidy-alphabetical-end
 }
--- a/compiler/rustc_parse/src/parser/expr.rs
+++ b/compiler/rustc_parse/src/parser/expr.rs
@ -1167,7 +1167,7 @@ impl<'a> Parser<'a> {
            DestructuredFloat::TrailingDot(sym, sym_span, dot_span) => {
                assert!(suffix.is_none());
                // Analogous to `Self::break_and_eat`
-                self.token_cursor.break_last_token = true;
+                self.break_last_token = true;
                // This might work, in cases like `1. 2`, and might not,
                // in cases like `offset_of!(Ty, 1.)`. It depends on what comes
                // after the float-like token, and therefore we have to make
--- a/compiler/rustc_parse/src/parser/mod.rs
+++ b/compiler/rustc_parse/src/parser/mod.rs
@ -138,6 +138,21 @@ pub struct Parser<'a> {
    token_cursor: TokenCursor,
    // The number of calls to `bump`, i.e. the position in the token stream.
    num_bump_calls: usize,
+    // During parsing we may sometimes need to 'unglue' a glued token into two
+    // component tokens (e.g. '>>' into '>' and '>), so the parser can consume
+    // them one at a time. This process bypasses the normal capturing mechanism
+    // (e.g. `num_bump_calls` will not be incremented), since the 'unglued'
+    // tokens due not exist in the original `TokenStream`.
+    //
+    // If we end up consuming both unglued tokens, this is not an issue. We'll
+    // end up capturing the single 'glued' token.
+    //
+    // However, sometimes we may want to capture just the first 'unglued'
+    // token. For example, capturing the `Vec<u8>` in `Option<Vec<u8>>`
+    // requires us to unglue the trailing `>>` token. The `break_last_token`
+    // field is used to track this token. It gets appended to the captured
+    // stream when we evaluate a `LazyAttrTokenStream`.
+    break_last_token: bool,
    /// This field is used to keep track of how many left angle brackets we have seen. This is
    /// required in order to detect extra leading left angle brackets (`<` characters) and error
    /// appropriately.
@ -161,7 +176,7 @@ pub struct Parser<'a> {
 // This type is used a lot, e.g. it's cloned when matching many declarative macro rules with nonterminals. Make sure
 // it doesn't unintentionally get bigger.
 #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
-rustc_data_structures::static_assert_size!(Parser<'_>, 272);
+rustc_data_structures::static_assert_size!(Parser<'_>, 264);

 /// Stores span information about a closure.
 #[derive(Clone)]
@ -223,29 +238,6 @@ struct TokenCursor {
    // tokens are in `stack[n-1]`. `stack[0]` (when present) has no delimiters
    // because it's the outermost token stream which never has delimiters.
    stack: Vec<(TokenTreeCursor, Delimiter, DelimSpan)>,
-
-    // During parsing, we may sometimes need to 'unglue' a
-    // glued token into two component tokens
-    // (e.g. '>>' into '>' and '>), so that the parser
-    // can consume them one at a time. This process
-    // bypasses the normal capturing mechanism
-    // (e.g. `num_next_calls` will not be incremented),
-    // since the 'unglued' tokens due not exist in
-    // the original `TokenStream`.
-    //
-    // If we end up consuming both unglued tokens,
-    // then this is not an issue - we'll end up
-    // capturing the single 'glued' token.
-    //
-    // However, in certain circumstances, we may
-    // want to capture just the first 'unglued' token.
-    // For example, capturing the `Vec<u8>`
-    // in `Option<Vec<u8>>` requires us to unglue
-    // the trailing `>>` token. The `break_last_token`
-    // field is used to track this token - it gets
-    // appended to the captured stream when
-    // we evaluate a `LazyAttrTokenStream`.
-    break_last_token: bool,
 }

 impl TokenCursor {
@ -396,12 +388,9 @@ impl<'a> Parser<'a> {
            capture_cfg: false,
            restrictions: Restrictions::empty(),
            expected_tokens: Vec::new(),
-            token_cursor: TokenCursor {
-                tree_cursor: stream.into_trees(),
-                stack: Vec::new(),
-                break_last_token: false,
-            },
+            token_cursor: TokenCursor { tree_cursor: stream.into_trees(), stack: Vec::new() },
            num_bump_calls: 0,
+            break_last_token: false,
            unmatched_angle_bracket_count: 0,
            max_angle_bracket_count: 0,
            last_unexpected_token_span: None,
@ -704,7 +693,7 @@ impl<'a> Parser<'a> {
                // If we consume any additional tokens, then this token
                // is not needed (we'll capture the entire 'glued' token),
                // and `bump` will set this field to `None`
-                self.token_cursor.break_last_token = true;
+                self.break_last_token = true;
                // Use the spacing of the glued token as the spacing
                // of the unglued second token.
                self.bump_with((Token::new(second, second_span), self.token_spacing));
@ -1050,7 +1039,7 @@ impl<'a> Parser<'a> {
        // We've retrieved an token from the underlying
        // cursor, so we no longer need to worry about
        // an unglued token. See `break_and_eat` for more details
-        self.token_cursor.break_last_token = false;
+        self.break_last_token = false;
        if next.0.span.is_dummy() {
            // Tweak the location for better diagnostics, but keep syntactic context intact.
            let fallback_span = self.token.span;