Improve doc comment desugaring.

Sometimes the parser needs to desugar a doc comment into `#[doc =
r"foo"]`. Currently it does this in a hacky way: by pushing a "fake" new
frame (one without a delimiter) onto the `TokenCursor` stack.

This commit changes things so that the token stream itself is modified
in place. The nice thing about this is that it means
`TokenCursorFrame::delim_sp` is now only `None` for the outermost frame.
This commit is contained in:
Nicholas Nethercote 2023-02-01 10:53:00 +11:00
parent 97872b792c
commit af1d16e82d
2 changed files with 30 additions and 27 deletions

View File

@ -614,6 +614,15 @@ impl Cursor {
pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> { pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> {
self.stream.0.get(self.index + n) self.stream.0.get(self.index + n)
} }
// Replace the previously obtained token tree with `tts`, and rewind to
// just before them.
pub fn replace_prev_and_rewind(&mut self, tts: Vec<TokenTree>) {
assert!(self.index > 0);
self.index -= 1;
let stream = Lrc::make_mut(&mut self.stream.0);
stream.splice(self.index..self.index + 1, tts);
}
} }
#[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)] #[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]

View File

@ -224,7 +224,7 @@ impl<'a> Drop for Parser<'a> {
#[derive(Clone)] #[derive(Clone)]
struct TokenCursor { struct TokenCursor {
// The current (innermost) frame. `frame` and `stack` could be combined, // The current (innermost) frame. `frame` and `stack` could be combined,
// but it's faster to have them separately to access `frame` directly // but it's faster to keep them separate and access `frame` directly
// rather than via something like `stack.last().unwrap()` or // rather than via something like `stack.last().unwrap()` or
// `stack[stack.len() - 1]`. // `stack[stack.len() - 1]`.
frame: TokenCursorFrame, frame: TokenCursorFrame,
@ -259,6 +259,7 @@ struct TokenCursor {
#[derive(Clone)] #[derive(Clone)]
struct TokenCursorFrame { struct TokenCursorFrame {
// This is `None` only for the outermost frame.
delim_sp: Option<(Delimiter, DelimSpan)>, delim_sp: Option<(Delimiter, DelimSpan)>,
tree_cursor: tokenstream::Cursor, tree_cursor: tokenstream::Cursor,
} }
@ -285,7 +286,9 @@ impl TokenCursor {
match tree { match tree {
&TokenTree::Token(ref token, spacing) => match (desugar_doc_comments, token) { &TokenTree::Token(ref token, spacing) => match (desugar_doc_comments, token) {
(true, &Token { kind: token::DocComment(_, attr_style, data), span }) => { (true, &Token { kind: token::DocComment(_, attr_style, data), span }) => {
return self.desugar(attr_style, data, span); let desugared = self.desugar(attr_style, data, span);
self.frame.tree_cursor.replace_prev_and_rewind(desugared);
// Continue to get the first token of the desugared doc comment.
} }
_ => return (token.clone(), spacing), _ => return (token.clone(), spacing),
}, },
@ -300,19 +303,22 @@ impl TokenCursor {
} }
}; };
} else if let Some(frame) = self.stack.pop() { } else if let Some(frame) = self.stack.pop() {
if let Some((delim, span)) = self.frame.delim_sp && delim != Delimiter::Invisible { // We have exhausted this frame. Move back to its parent frame.
self.frame = frame; let (delim, span) = self.frame.delim_sp.unwrap();
self.frame = frame;
if delim != Delimiter::Invisible {
return (Token::new(token::CloseDelim(delim), span.close), Spacing::Alone); return (Token::new(token::CloseDelim(delim), span.close), Spacing::Alone);
} }
self.frame = frame;
// No close delimiter to return; continue on to the next iteration. // No close delimiter to return; continue on to the next iteration.
} else { } else {
// We have exhausted the outermost frame.
return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone); return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone);
} }
} }
} }
fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> (Token, Spacing) { // Desugar a doc comment into something like `#[doc = r"foo"]`.
fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> {
// Searches for the occurrences of `"#*` and returns the minimum number of `#`s // Searches for the occurrences of `"#*` and returns the minimum number of `#`s
// required to wrap the text. E.g. // required to wrap the text. E.g.
// - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0) // - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
@ -346,27 +352,15 @@ impl TokenCursor {
.collect::<TokenStream>(), .collect::<TokenStream>(),
); );
self.stack.push(mem::replace( if attr_style == AttrStyle::Inner {
&mut self.frame, vec![
TokenCursorFrame::new( TokenTree::token_alone(token::Pound, span),
None, TokenTree::token_alone(token::Not, span),
if attr_style == AttrStyle::Inner { body,
[ ]
TokenTree::token_alone(token::Pound, span), } else {
TokenTree::token_alone(token::Not, span), vec![TokenTree::token_alone(token::Pound, span), body]
body, }
]
.into_iter()
.collect::<TokenStream>()
} else {
[TokenTree::token_alone(token::Pound, span), body]
.into_iter()
.collect::<TokenStream>()
},
),
));
self.next(/* desugar_doc_comments */ false)
} }
} }