Auto merge of #102508 - nnethercote:even-more-lexer-improvements, r=matklad
Even more lexer improvements These are just about code clarity, rather than performance. r? `@matklad`
This commit is contained in:
commit
dbaf3e67aa
@ -345,17 +345,14 @@ impl Token {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_op(&self) -> bool {
|
pub fn is_op(&self) -> bool {
|
||||||
!matches!(
|
match self.kind {
|
||||||
self.kind,
|
Eq | Lt | Le | EqEq | Ne | Ge | Gt | AndAnd | OrOr | Not | Tilde | BinOp(_)
|
||||||
OpenDelim(..)
|
| BinOpEq(_) | At | Dot | DotDot | DotDotDot | DotDotEq | Comma | Semi | Colon
|
||||||
| CloseDelim(..)
|
| ModSep | RArrow | LArrow | FatArrow | Pound | Dollar | Question | SingleQuote => true,
|
||||||
| Literal(..)
|
|
||||||
| DocComment(..)
|
OpenDelim(..) | CloseDelim(..) | Literal(..) | DocComment(..) | Ident(..)
|
||||||
| Ident(..)
|
| Lifetime(..) | Interpolated(..) | Eof => false,
|
||||||
| Lifetime(..)
|
}
|
||||||
| Interpolated(..)
|
|
||||||
| Eof
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_like_plus(&self) -> bool {
|
pub fn is_like_plus(&self) -> bool {
|
||||||
|
@ -304,9 +304,20 @@ pub struct AttributesData {
|
|||||||
#[derive(Clone, Debug, Default, Encodable, Decodable)]
|
#[derive(Clone, Debug, Default, Encodable, Decodable)]
|
||||||
pub struct TokenStream(pub(crate) Lrc<Vec<TokenTree>>);
|
pub struct TokenStream(pub(crate) Lrc<Vec<TokenTree>>);
|
||||||
|
|
||||||
|
/// Similar to `proc_macro::Spacing`, but for tokens.
|
||||||
|
///
|
||||||
|
/// Note that all `ast::TokenTree::Token` instances have a `Spacing`, but when
|
||||||
|
/// we convert to `proc_macro::TokenTree` for proc macros only `Punct`
|
||||||
|
/// `TokenTree`s have a `proc_macro::Spacing`.
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)]
|
#[derive(Clone, Copy, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)]
|
||||||
pub enum Spacing {
|
pub enum Spacing {
|
||||||
|
/// The token is not immediately followed by an operator token (as
|
||||||
|
/// determined by `Token::is_op`). E.g. a `+` token is `Alone` in `+ =`,
|
||||||
|
/// `+/*foo*/=`, `+ident`, and `+()`.
|
||||||
Alone,
|
Alone,
|
||||||
|
|
||||||
|
/// The token is immediately followed by an operator token. E.g. a `+`
|
||||||
|
/// token is `Joint` in `+=` and `++`.
|
||||||
Joint,
|
Joint,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -110,10 +110,14 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> for Vec<TokenTree<TokenStre
|
|||||||
tokenstream::TokenTree::Token(token, spacing) => (token, spacing == Joint),
|
tokenstream::TokenTree::Token(token, spacing) => (token, spacing == Joint),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Split the operator into one or more `Punct`s, one per character.
|
||||||
|
// The final one inherits the jointness of the original token. Any
|
||||||
|
// before that get `joint = true`.
|
||||||
let mut op = |s: &str| {
|
let mut op = |s: &str| {
|
||||||
assert!(s.is_ascii());
|
assert!(s.is_ascii());
|
||||||
trees.extend(s.as_bytes().iter().enumerate().map(|(idx, &ch)| {
|
trees.extend(s.bytes().enumerate().map(|(idx, ch)| {
|
||||||
TokenTree::Punct(Punct { ch, joint: joint || idx != s.len() - 1, span })
|
let is_final = idx == s.len() - 1;
|
||||||
|
TokenTree::Punct(Punct { ch, joint: if is_final { joint } else { true }, span })
|
||||||
}));
|
}));
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -52,7 +52,7 @@ pub(crate) fn parse_token_trees<'a>(
|
|||||||
let cursor = Cursor::new(src);
|
let cursor = Cursor::new(src);
|
||||||
let string_reader =
|
let string_reader =
|
||||||
StringReader { sess, start_pos, pos: start_pos, src, cursor, override_span };
|
StringReader { sess, start_pos, pos: start_pos, src, cursor, override_span };
|
||||||
tokentrees::TokenTreesReader::parse_token_trees(string_reader)
|
tokentrees::TokenTreesReader::parse_all_token_trees(string_reader)
|
||||||
}
|
}
|
||||||
|
|
||||||
struct StringReader<'a> {
|
struct StringReader<'a> {
|
||||||
|
@ -27,7 +27,7 @@ pub(super) struct TokenTreesReader<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> TokenTreesReader<'a> {
|
impl<'a> TokenTreesReader<'a> {
|
||||||
pub(super) fn parse_token_trees(
|
pub(super) fn parse_all_token_trees(
|
||||||
string_reader: StringReader<'a>,
|
string_reader: StringReader<'a>,
|
||||||
) -> (PResult<'a, TokenStream>, Vec<UnmatchedBrace>) {
|
) -> (PResult<'a, TokenStream>, Vec<UnmatchedBrace>) {
|
||||||
let mut tt_reader = TokenTreesReader {
|
let mut tt_reader = TokenTreesReader {
|
||||||
@ -40,36 +40,51 @@ impl<'a> TokenTreesReader<'a> {
|
|||||||
last_delim_empty_block_spans: FxHashMap::default(),
|
last_delim_empty_block_spans: FxHashMap::default(),
|
||||||
matching_block_spans: Vec::new(),
|
matching_block_spans: Vec::new(),
|
||||||
};
|
};
|
||||||
let res = tt_reader.parse_all_token_trees();
|
let res = tt_reader.parse_token_trees(/* is_delimited */ false);
|
||||||
(res, tt_reader.unmatched_braces)
|
(res, tt_reader.unmatched_braces)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse a stream of tokens into a list of `TokenTree`s, up to an `Eof`.
|
// Parse a stream of tokens into a list of `TokenTree`s.
|
||||||
fn parse_all_token_trees(&mut self) -> PResult<'a, TokenStream> {
|
fn parse_token_trees(&mut self, is_delimited: bool) -> PResult<'a, TokenStream> {
|
||||||
self.token = self.string_reader.next_token().0;
|
self.token = self.string_reader.next_token().0;
|
||||||
let mut buf = TokenStreamBuilder::default();
|
let mut buf = Vec::new();
|
||||||
loop {
|
loop {
|
||||||
match self.token.kind {
|
match self.token.kind {
|
||||||
token::OpenDelim(delim) => buf.push(self.parse_token_tree_open_delim(delim)),
|
token::OpenDelim(delim) => buf.push(self.parse_token_tree_open_delim(delim)),
|
||||||
token::CloseDelim(delim) => return Err(self.close_delim_err(delim)),
|
token::CloseDelim(delim) => {
|
||||||
token::Eof => return Ok(buf.into_token_stream()),
|
return if is_delimited {
|
||||||
_ => buf.push(self.parse_token_tree_non_delim_non_eof()),
|
Ok(TokenStream::new(buf))
|
||||||
}
|
} else {
|
||||||
}
|
Err(self.close_delim_err(delim))
|
||||||
}
|
};
|
||||||
|
}
|
||||||
// Parse a stream of tokens into a list of `TokenTree`s, up to a `CloseDelim`.
|
token::Eof => {
|
||||||
fn parse_token_trees_until_close_delim(&mut self) -> TokenStream {
|
if is_delimited {
|
||||||
let mut buf = TokenStreamBuilder::default();
|
self.eof_err().emit();
|
||||||
loop {
|
}
|
||||||
match self.token.kind {
|
return Ok(TokenStream::new(buf));
|
||||||
token::OpenDelim(delim) => buf.push(self.parse_token_tree_open_delim(delim)),
|
}
|
||||||
token::CloseDelim(..) => return buf.into_token_stream(),
|
_ => {
|
||||||
token::Eof => {
|
// Get the next normal token. This might require getting multiple adjacent
|
||||||
self.eof_err().emit();
|
// single-char tokens and joining them together.
|
||||||
return buf.into_token_stream();
|
let (this_spacing, next_tok) = loop {
|
||||||
|
let (next_tok, is_next_tok_preceded_by_whitespace) =
|
||||||
|
self.string_reader.next_token();
|
||||||
|
if !is_next_tok_preceded_by_whitespace {
|
||||||
|
if let Some(glued) = self.token.glue(&next_tok) {
|
||||||
|
self.token = glued;
|
||||||
|
} else {
|
||||||
|
let this_spacing =
|
||||||
|
if next_tok.is_op() { Spacing::Joint } else { Spacing::Alone };
|
||||||
|
break (this_spacing, next_tok);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
break (Spacing::Alone, next_tok);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let this_tok = std::mem::replace(&mut self.token, next_tok);
|
||||||
|
buf.push(TokenTree::Token(this_tok, this_spacing));
|
||||||
}
|
}
|
||||||
_ => buf.push(self.parse_token_tree_non_delim_non_eof()),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -113,14 +128,12 @@ impl<'a> TokenTreesReader<'a> {
|
|||||||
// The span for beginning of the delimited section
|
// The span for beginning of the delimited section
|
||||||
let pre_span = self.token.span;
|
let pre_span = self.token.span;
|
||||||
|
|
||||||
// Move past the open delimiter.
|
|
||||||
self.open_braces.push((open_delim, self.token.span));
|
self.open_braces.push((open_delim, self.token.span));
|
||||||
self.token = self.string_reader.next_token().0;
|
|
||||||
|
|
||||||
// Parse the token trees within the delimiters.
|
// Parse the token trees within the delimiters.
|
||||||
// We stop at any delimiter so we can try to recover if the user
|
// We stop at any delimiter so we can try to recover if the user
|
||||||
// uses an incorrect delimiter.
|
// uses an incorrect delimiter.
|
||||||
let tts = self.parse_token_trees_until_close_delim();
|
let tts = self.parse_token_trees(/* is_delimited */ true).unwrap();
|
||||||
|
|
||||||
// Expand to cover the entire delimited token tree
|
// Expand to cover the entire delimited token tree
|
||||||
let delim_span = DelimSpan::from_pair(pre_span, self.token.span);
|
let delim_span = DelimSpan::from_pair(pre_span, self.token.span);
|
||||||
@ -242,43 +255,4 @@ impl<'a> TokenTreesReader<'a> {
|
|||||||
err.span_label(self.token.span, "unexpected closing delimiter");
|
err.span_label(self.token.span, "unexpected closing delimiter");
|
||||||
err
|
err
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn parse_token_tree_non_delim_non_eof(&mut self) -> TokenTree {
|
|
||||||
// `this_spacing` for the returned token refers to whether the token is
|
|
||||||
// immediately followed by another op token. It is determined by the
|
|
||||||
// next token: its kind and its `preceded_by_whitespace` status.
|
|
||||||
let (next_tok, is_next_tok_preceded_by_whitespace) = self.string_reader.next_token();
|
|
||||||
let this_spacing = if is_next_tok_preceded_by_whitespace || !next_tok.is_op() {
|
|
||||||
Spacing::Alone
|
|
||||||
} else {
|
|
||||||
Spacing::Joint
|
|
||||||
};
|
|
||||||
let this_tok = std::mem::replace(&mut self.token, next_tok);
|
|
||||||
TokenTree::Token(this_tok, this_spacing)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Default)]
|
|
||||||
struct TokenStreamBuilder {
|
|
||||||
buf: Vec<TokenTree>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TokenStreamBuilder {
|
|
||||||
#[inline(always)]
|
|
||||||
fn push(&mut self, tree: TokenTree) {
|
|
||||||
if let Some(TokenTree::Token(prev_token, Spacing::Joint)) = self.buf.last()
|
|
||||||
&& let TokenTree::Token(token, joint) = &tree
|
|
||||||
&& let Some(glued) = prev_token.glue(token)
|
|
||||||
{
|
|
||||||
self.buf.pop();
|
|
||||||
self.buf.push(TokenTree::Token(glued, *joint));
|
|
||||||
} else {
|
|
||||||
self.buf.push(tree)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn into_token_stream(self) -> TokenStream {
|
|
||||||
TokenStream::new(self.buf)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -302,7 +302,10 @@ impl TokenCursor {
|
|||||||
|
|
||||||
fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> (Token, Spacing) {
|
fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> (Token, Spacing) {
|
||||||
// Searches for the occurrences of `"#*` and returns the minimum number of `#`s
|
// Searches for the occurrences of `"#*` and returns the minimum number of `#`s
|
||||||
// required to wrap the text.
|
// required to wrap the text. E.g.
|
||||||
|
// - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
|
||||||
|
// - `abc "d"` is wrapped as `r#"abc "d""#` (num_of_hashes = 1)
|
||||||
|
// - `abc "##d##"` is wrapped as `r###"abc "d""###` (num_of_hashes = 3)
|
||||||
let mut num_of_hashes = 0;
|
let mut num_of_hashes = 0;
|
||||||
let mut count = 0;
|
let mut count = 0;
|
||||||
for ch in data.as_str().chars() {
|
for ch in data.as_str().chars() {
|
||||||
@ -314,6 +317,7 @@ impl TokenCursor {
|
|||||||
num_of_hashes = cmp::max(num_of_hashes, count);
|
num_of_hashes = cmp::max(num_of_hashes, count);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// `/// foo` becomes `doc = r"foo".
|
||||||
let delim_span = DelimSpan::from_single(span);
|
let delim_span = DelimSpan::from_single(span);
|
||||||
let body = TokenTree::Delimited(
|
let body = TokenTree::Delimited(
|
||||||
delim_span,
|
delim_span,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user