Properly capture trailing 'unglued' token

If we try to capture the `Vec<u8>` in `Option<Vec<u8>>`, we'll need to capture a `>` token which was 'unglued' from a `>>` token. The processing of unglueing a token for parsing purposes bypasses the usual capturing infrastructure, so we currently lose the trailing `>`. As a result, we fall back to the reparsed `TokenStream`, causing us to lose spans. This commit makes token capturing keep track of a trailing 'unglued' token. Note that we don't need to care about unglueing except at the end of the captured tokens - if we capture both the first and second unglued tokens, then we'll end up capturing the full 'glued' token, which already works correctly.
2020-12-12 15:20:22 -05:00 · 2020-12-12 15:20:22 -05:00 · e6fa6334dd
commit e6fa6334dd
parent 388eb24b6c
3 changed files with 106 additions and 9 deletions
--- a/compiler/rustc_parse/src/parser/mod.rs
+++ b/compiler/rustc_parse/src/parser/mod.rs
@ -17,7 +17,7 @@
 use rustc_ast::ptr::P;
 use rustc_ast::token::{self, DelimToken, Token, TokenKind};
 use rustc_ast::tokenstream::{self, DelimSpan, LazyTokenStream, Spacing};
-use rustc_ast::tokenstream::{CreateTokenStream, TokenStream, TokenTree};
+use rustc_ast::tokenstream::{CreateTokenStream, TokenStream, TokenTree, TreeAndSpacing};
 use rustc_ast::DUMMY_NODE_ID;
 use rustc_ast::{self as ast, AnonConst, AttrStyle, AttrVec, Const, CrateSugar, Extern, Unsafe};
 use rustc_ast::{Async, Expr, ExprKind, MacArgs, MacDelimiter, Mutability, StrLit};
@ -132,6 +132,28 @@ struct TokenCursor {
    // Counts the number of calls to `next` or `next_desugared`,
    // depending on whether `desugar_doc_comments` is set.
    num_next_calls: usize,
+    // During parsing, we may sometimes need to 'unglue' a
+    // glued token into two component tokens
+    // (e.g. '>>' into '>' and '>), so that the parser
+    // can consume them one at a time. This process
+    // bypasses the normal capturing mechanism
+    // (e.g. `num_next_calls` will not be incremented),
+    // since the 'unglued' tokens due not exist in
+    // the original `TokenStream`.
+    //
+    // If we end up consuming both unglued tokens,
+    // then this is not an issue - we'll end up
+    // capturing the single 'glued' token.
+    //
+    // However, in certain circumstances, we may
+    // want to capture just the first 'unglued' token.
+    // For example, capturing the `Vec<u8>`
+    // in `Option<Vec<u8>>` requires us to unglue
+    // the trailing `>>` token. The `append_unglued_token`
+    // field is used to track this token - it gets
+    // appended to the captured stream when
+    // we evaluate a `LazyTokenStream`
+    append_unglued_token: Option<TreeAndSpacing>,
 }

 #[derive(Clone)]
@ -336,6 +358,7 @@ pub fn new(
                stack: Vec::new(),
                num_next_calls: 0,
                desugar_doc_comments,
+                append_unglued_token: None,
            },
            desugar_doc_comments,
            unmatched_angle_bracket_count: 0,
@ -359,6 +382,10 @@ fn next_tok(&mut self, fallback_span: Span) -> (Token, Spacing) {
            self.token_cursor.next()
        };
        self.token_cursor.num_next_calls += 1;
+        // We've retrieved an token from the underlying
+        // cursor, so we no longer need to worry about
+        // an unglued token. See `break_and_eat` for more details
+        self.token_cursor.append_unglued_token = None;
        if next.span.is_dummy() {
            // Tweak the location for better diagnostics, but keep syntactic context intact.
            next.span = fallback_span.with_ctxt(next.span.ctxt());
@ -555,6 +582,14 @@ fn break_and_eat(&mut self, expected: TokenKind) -> bool {
                let first_span = self.sess.source_map().start_point(self.token.span);
                let second_span = self.token.span.with_lo(first_span.hi());
                self.token = Token::new(first, first_span);
+                // Keep track of this token - if we end token capturing now,
+                // we'll want to append this token to the captured stream.
+                //
+                // If we consume any additional tokens, then this token
+                // is not needed (we'll capture the entire 'glued' token),
+                // and `next_tok` will set this field to `None`
+                self.token_cursor.append_unglued_token =
+                    Some((TokenTree::Token(self.token.clone()), Spacing::Alone));
                // Use the spacing of the glued token as the spacing
                // of the unglued second token.
                self.bump_with((Token::new(second, second_span), self.token_spacing));
@ -1230,6 +1265,7 @@ struct LazyTokenStreamImpl {
            num_calls: usize,
            desugar_doc_comments: bool,
            trailing_semi: bool,
+            append_unglued_token: Option<TreeAndSpacing>,
        }
        impl CreateTokenStream for LazyTokenStreamImpl {
            fn create_token_stream(&self) -> TokenStream {
@ -1253,12 +1289,18 @@ fn create_token_stream(&self) -> TokenStream {
                    }))
                    .take(num_calls);

-                make_token_stream(tokens)
+                make_token_stream(tokens, self.append_unglued_token.clone())
            }
            fn add_trailing_semi(&self) -> Box<dyn CreateTokenStream> {
                if self.trailing_semi {
                    panic!("Called `add_trailing_semi` twice!");
                }
+                if self.append_unglued_token.is_some() {
+                    panic!(
+                        "Cannot call `add_trailing_semi` when we have an unglued token {:?}",
+                        self.append_unglued_token
+                    );
+                }
                let mut new = self.clone();
                new.trailing_semi = true;
                Box::new(new)
@ -1271,6 +1313,7 @@ fn add_trailing_semi(&self) -> Box<dyn CreateTokenStream> {
            cursor_snapshot,
            desugar_doc_comments: self.desugar_doc_comments,
            trailing_semi: false,
+            append_unglued_token: self.token_cursor.append_unglued_token.clone(),
        };
        Ok((ret, Some(LazyTokenStream::new(lazy_impl))))
    }
@ -1325,7 +1368,10 @@ pub fn emit_unclosed_delims(unclosed_delims: &mut Vec<UnmatchedBrace>, sess: &Pa
 /// Converts a flattened iterator of tokens (including open and close delimiter tokens)
 /// into a `TokenStream`, creating a `TokenTree::Delimited` for each matching pair
 /// of open and close delims.
-fn make_token_stream(tokens: impl Iterator<Item = (Token, Spacing)>) -> TokenStream {
+fn make_token_stream(
+    tokens: impl Iterator<Item = (Token, Spacing)>,
+    append_unglued_token: Option<TreeAndSpacing>,
+) -> TokenStream {
    #[derive(Debug)]
    struct FrameData {
        open: Span,
@ -1348,14 +1394,17 @@ struct FrameData {
                    .inner
                    .push((delimited, Spacing::Alone));
            }
-            token => stack
-                .last_mut()
-                .expect("Bottom token frame is missing!")
-                .inner
-                .push((TokenTree::Token(token), spacing)),
+            token => {
+                stack
+                    .last_mut()
+                    .expect("Bottom token frame is missing!")
+                    .inner
+                    .push((TokenTree::Token(token), spacing));
+            }
        }
    }
-    let final_buf = stack.pop().expect("Missing final buf!");
+    let mut final_buf = stack.pop().expect("Missing final buf!");
+    final_buf.inner.extend(append_unglued_token);
    assert!(stack.is_empty(), "Stack should be empty: final_buf={:?} stack={:?}", final_buf, stack);
    TokenStream::new(final_buf.inner)
 }
--- a/src/test/ui/proc-macro/capture-unglued-token.rs
+++ b/src/test/ui/proc-macro/capture-unglued-token.rs
@ -0,0 +1,20 @@
+// aux-build:test-macros.rs
+// compile-flags: -Z span-debug
+// check-pass
+
+// Tests that we properly handle parsing a nonterminal
+// where we have two consecutive angle brackets (one inside
+// the nonterminal, and one outside)
+
+#![no_std] // Don't load unnecessary hygiene information from std
+extern crate std;
+extern crate test_macros;
+
+macro_rules! trailing_angle {
+    (Option<$field:ty>) => {
+        test_macros::print_bang_consume!($field);
+    }
+}
+
+trailing_angle!(Option<Vec<u8>>);
+fn main() {}
--- a/src/test/ui/proc-macro/capture-unglued-token.stdout
+++ b/src/test/ui/proc-macro/capture-unglued-token.stdout
@ -0,0 +1,28 @@
+PRINT-BANG INPUT (DISPLAY): Vec<u8>
+PRINT-BANG RE-COLLECTED (DISPLAY): Vec < u8 >
+PRINT-BANG INPUT (DEBUG): TokenStream [
+    Group {
+        delimiter: None,
+        stream: TokenStream [
+            Ident {
+                ident: "Vec",
+                span: $DIR/capture-unglued-token.rs:19:24: 19:27 (#0),
+            },
+            Punct {
+                ch: '<',
+                spacing: Alone,
+                span: $DIR/capture-unglued-token.rs:19:27: 19:28 (#0),
+            },
+            Ident {
+                ident: "u8",
+                span: $DIR/capture-unglued-token.rs:19:28: 19:30 (#0),
+            },
+            Punct {
+                ch: '>',
+                spacing: Alone,
+                span: $DIR/capture-unglued-token.rs:19:30: 19:31 (#0),
+            },
+        ],
+        span: $DIR/capture-unglued-token.rs:15:42: 15:48 (#4),
+    },
+]