Properly capture trailing 'unglued' token

If we try to capture the `Vec<u8>` in `Option<Vec<u8>>`, we'll
need to capture a `>` token which was 'unglued' from a `>>` token.
The processing of unglueing a token for parsing purposes bypasses the
usual capturing infrastructure, so we currently lose the trailing `>`.
As a result, we fall back to the reparsed `TokenStream`, causing us to
lose spans.

This commit makes token capturing keep track of a trailing 'unglued'
token. Note that we don't need to care about unglueing except at the end
of the captured tokens - if we capture both the first and second unglued
tokens, then we'll end up capturing the full 'glued' token, which
already works correctly.
This commit is contained in:
Aaron Hill 2020-12-12 15:20:22 -05:00
parent 388eb24b6c
commit e6fa6334dd
No known key found for this signature in database
GPG Key ID: B4087E510E98B164
3 changed files with 106 additions and 9 deletions

View File

@ -17,7 +17,7 @@
use rustc_ast::ptr::P;
use rustc_ast::token::{self, DelimToken, Token, TokenKind};
use rustc_ast::tokenstream::{self, DelimSpan, LazyTokenStream, Spacing};
use rustc_ast::tokenstream::{CreateTokenStream, TokenStream, TokenTree};
use rustc_ast::tokenstream::{CreateTokenStream, TokenStream, TokenTree, TreeAndSpacing};
use rustc_ast::DUMMY_NODE_ID;
use rustc_ast::{self as ast, AnonConst, AttrStyle, AttrVec, Const, CrateSugar, Extern, Unsafe};
use rustc_ast::{Async, Expr, ExprKind, MacArgs, MacDelimiter, Mutability, StrLit};
@ -132,6 +132,28 @@ struct TokenCursor {
// Counts the number of calls to `next` or `next_desugared`,
// depending on whether `desugar_doc_comments` is set.
num_next_calls: usize,
// During parsing, we may sometimes need to 'unglue' a
// glued token into two component tokens
// (e.g. '>>' into '>' and '>), so that the parser
// can consume them one at a time. This process
// bypasses the normal capturing mechanism
// (e.g. `num_next_calls` will not be incremented),
// since the 'unglued' tokens due not exist in
// the original `TokenStream`.
//
// If we end up consuming both unglued tokens,
// then this is not an issue - we'll end up
// capturing the single 'glued' token.
//
// However, in certain circumstances, we may
// want to capture just the first 'unglued' token.
// For example, capturing the `Vec<u8>`
// in `Option<Vec<u8>>` requires us to unglue
// the trailing `>>` token. The `append_unglued_token`
// field is used to track this token - it gets
// appended to the captured stream when
// we evaluate a `LazyTokenStream`
append_unglued_token: Option<TreeAndSpacing>,
}
#[derive(Clone)]
@ -336,6 +358,7 @@ pub fn new(
stack: Vec::new(),
num_next_calls: 0,
desugar_doc_comments,
append_unglued_token: None,
},
desugar_doc_comments,
unmatched_angle_bracket_count: 0,
@ -359,6 +382,10 @@ fn next_tok(&mut self, fallback_span: Span) -> (Token, Spacing) {
self.token_cursor.next()
};
self.token_cursor.num_next_calls += 1;
// We've retrieved an token from the underlying
// cursor, so we no longer need to worry about
// an unglued token. See `break_and_eat` for more details
self.token_cursor.append_unglued_token = None;
if next.span.is_dummy() {
// Tweak the location for better diagnostics, but keep syntactic context intact.
next.span = fallback_span.with_ctxt(next.span.ctxt());
@ -555,6 +582,14 @@ fn break_and_eat(&mut self, expected: TokenKind) -> bool {
let first_span = self.sess.source_map().start_point(self.token.span);
let second_span = self.token.span.with_lo(first_span.hi());
self.token = Token::new(first, first_span);
// Keep track of this token - if we end token capturing now,
// we'll want to append this token to the captured stream.
//
// If we consume any additional tokens, then this token
// is not needed (we'll capture the entire 'glued' token),
// and `next_tok` will set this field to `None`
self.token_cursor.append_unglued_token =
Some((TokenTree::Token(self.token.clone()), Spacing::Alone));
// Use the spacing of the glued token as the spacing
// of the unglued second token.
self.bump_with((Token::new(second, second_span), self.token_spacing));
@ -1230,6 +1265,7 @@ struct LazyTokenStreamImpl {
num_calls: usize,
desugar_doc_comments: bool,
trailing_semi: bool,
append_unglued_token: Option<TreeAndSpacing>,
}
impl CreateTokenStream for LazyTokenStreamImpl {
fn create_token_stream(&self) -> TokenStream {
@ -1253,12 +1289,18 @@ fn create_token_stream(&self) -> TokenStream {
}))
.take(num_calls);
make_token_stream(tokens)
make_token_stream(tokens, self.append_unglued_token.clone())
}
fn add_trailing_semi(&self) -> Box<dyn CreateTokenStream> {
if self.trailing_semi {
panic!("Called `add_trailing_semi` twice!");
}
if self.append_unglued_token.is_some() {
panic!(
"Cannot call `add_trailing_semi` when we have an unglued token {:?}",
self.append_unglued_token
);
}
let mut new = self.clone();
new.trailing_semi = true;
Box::new(new)
@ -1271,6 +1313,7 @@ fn add_trailing_semi(&self) -> Box<dyn CreateTokenStream> {
cursor_snapshot,
desugar_doc_comments: self.desugar_doc_comments,
trailing_semi: false,
append_unglued_token: self.token_cursor.append_unglued_token.clone(),
};
Ok((ret, Some(LazyTokenStream::new(lazy_impl))))
}
@ -1325,7 +1368,10 @@ pub fn emit_unclosed_delims(unclosed_delims: &mut Vec<UnmatchedBrace>, sess: &Pa
/// Converts a flattened iterator of tokens (including open and close delimiter tokens)
/// into a `TokenStream`, creating a `TokenTree::Delimited` for each matching pair
/// of open and close delims.
fn make_token_stream(tokens: impl Iterator<Item = (Token, Spacing)>) -> TokenStream {
fn make_token_stream(
tokens: impl Iterator<Item = (Token, Spacing)>,
append_unglued_token: Option<TreeAndSpacing>,
) -> TokenStream {
#[derive(Debug)]
struct FrameData {
open: Span,
@ -1348,14 +1394,17 @@ struct FrameData {
.inner
.push((delimited, Spacing::Alone));
}
token => stack
.last_mut()
.expect("Bottom token frame is missing!")
.inner
.push((TokenTree::Token(token), spacing)),
token => {
stack
.last_mut()
.expect("Bottom token frame is missing!")
.inner
.push((TokenTree::Token(token), spacing));
}
}
}
let final_buf = stack.pop().expect("Missing final buf!");
let mut final_buf = stack.pop().expect("Missing final buf!");
final_buf.inner.extend(append_unglued_token);
assert!(stack.is_empty(), "Stack should be empty: final_buf={:?} stack={:?}", final_buf, stack);
TokenStream::new(final_buf.inner)
}

View File

@ -0,0 +1,20 @@
// aux-build:test-macros.rs
// compile-flags: -Z span-debug
// check-pass
// Tests that we properly handle parsing a nonterminal
// where we have two consecutive angle brackets (one inside
// the nonterminal, and one outside)
#![no_std] // Don't load unnecessary hygiene information from std
extern crate std;
extern crate test_macros;
macro_rules! trailing_angle {
(Option<$field:ty>) => {
test_macros::print_bang_consume!($field);
}
}
trailing_angle!(Option<Vec<u8>>);
fn main() {}

View File

@ -0,0 +1,28 @@
PRINT-BANG INPUT (DISPLAY): Vec<u8>
PRINT-BANG RE-COLLECTED (DISPLAY): Vec < u8 >
PRINT-BANG INPUT (DEBUG): TokenStream [
Group {
delimiter: None,
stream: TokenStream [
Ident {
ident: "Vec",
span: $DIR/capture-unglued-token.rs:19:24: 19:27 (#0),
},
Punct {
ch: '<',
spacing: Alone,
span: $DIR/capture-unglued-token.rs:19:27: 19:28 (#0),
},
Ident {
ident: "u8",
span: $DIR/capture-unglued-token.rs:19:28: 19:30 (#0),
},
Punct {
ch: '>',
spacing: Alone,
span: $DIR/capture-unglued-token.rs:19:30: 19:31 (#0),
},
],
span: $DIR/capture-unglued-token.rs:15:42: 15:48 (#4),
},
]