Auto merge of #102302 - nnethercote:more-lexer-improvements, r=matklad
More lexer improvements A follow-up to #99884. r? `@matklad`
This commit is contained in:
commit
6201eabde8
@ -13,7 +13,7 @@ use rustc_span::symbol::{kw, sym};
|
||||
use rustc_span::symbol::{Ident, Symbol};
|
||||
use rustc_span::{self, edition::Edition, Span, DUMMY_SP};
|
||||
use std::borrow::Cow;
|
||||
use std::{fmt, mem};
|
||||
use std::fmt;
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
|
||||
pub enum CommentKind {
|
||||
@ -335,11 +335,6 @@ impl Token {
|
||||
Token::new(Ident(ident.name, ident.is_raw_guess()), ident.span)
|
||||
}
|
||||
|
||||
/// Return this token by value and leave a dummy token in its place.
|
||||
pub fn take(&mut self) -> Self {
|
||||
mem::replace(self, Token::dummy())
|
||||
}
|
||||
|
||||
/// For interpolated tokens, returns a span of the fragment to which the interpolated
|
||||
/// token refers. For all other tokens this is just a regular span.
|
||||
/// It is particularly important to use this for identifiers and lifetimes
|
||||
|
@ -62,7 +62,8 @@ pub mod translation;
|
||||
pub use diagnostic_builder::IntoDiagnostic;
|
||||
pub use snippet::Style;
|
||||
|
||||
pub type PResult<'a, T> = Result<T, DiagnosticBuilder<'a, ErrorGuaranteed>>;
|
||||
pub type PErr<'a> = DiagnosticBuilder<'a, ErrorGuaranteed>;
|
||||
pub type PResult<'a, T> = Result<T, PErr<'a>>;
|
||||
|
||||
// `PResult` is used a lot. Make sure it doesn't unintentionally get bigger.
|
||||
// (See also the comment on `DiagnosticBuilder`'s `diagnostic` field.)
|
||||
|
@ -4,8 +4,8 @@ use std::str::Chars;
|
||||
///
|
||||
/// Next characters can be peeked via `first` method,
|
||||
/// and position can be shifted forward via `bump` method.
|
||||
pub(crate) struct Cursor<'a> {
|
||||
initial_len: usize,
|
||||
pub struct Cursor<'a> {
|
||||
len_remaining: usize,
|
||||
/// Iterator over chars. Slightly faster than a &str.
|
||||
chars: Chars<'a>,
|
||||
#[cfg(debug_assertions)]
|
||||
@ -15,9 +15,9 @@ pub(crate) struct Cursor<'a> {
|
||||
pub(crate) const EOF_CHAR: char = '\0';
|
||||
|
||||
impl<'a> Cursor<'a> {
|
||||
pub(crate) fn new(input: &'a str) -> Cursor<'a> {
|
||||
pub fn new(input: &'a str) -> Cursor<'a> {
|
||||
Cursor {
|
||||
initial_len: input.len(),
|
||||
len_remaining: input.len(),
|
||||
chars: input.chars(),
|
||||
#[cfg(debug_assertions)]
|
||||
prev: EOF_CHAR,
|
||||
@ -61,13 +61,13 @@ impl<'a> Cursor<'a> {
|
||||
}
|
||||
|
||||
/// Returns amount of already consumed symbols.
|
||||
pub(crate) fn len_consumed(&self) -> u32 {
|
||||
(self.initial_len - self.chars.as_str().len()) as u32
|
||||
pub(crate) fn pos_within_token(&self) -> u32 {
|
||||
(self.len_remaining - self.chars.as_str().len()) as u32
|
||||
}
|
||||
|
||||
/// Resets the number of bytes consumed to 0.
|
||||
pub(crate) fn reset_len_consumed(&mut self) {
|
||||
self.initial_len = self.chars.as_str().len();
|
||||
pub(crate) fn reset_pos_within_token(&mut self) {
|
||||
self.len_remaining = self.chars.as_str().len();
|
||||
}
|
||||
|
||||
/// Moves to the next character.
|
||||
|
@ -29,9 +29,11 @@ pub mod unescape;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
pub use crate::cursor::Cursor;
|
||||
|
||||
use self::LiteralKind::*;
|
||||
use self::TokenKind::*;
|
||||
use crate::cursor::{Cursor, EOF_CHAR};
|
||||
use crate::cursor::EOF_CHAR;
|
||||
use std::convert::TryFrom;
|
||||
|
||||
/// Parsed token.
|
||||
@ -139,6 +141,9 @@ pub enum TokenKind {
|
||||
|
||||
/// Unknown token, not expected by the lexer, e.g. "№"
|
||||
Unknown,
|
||||
|
||||
/// End of input.
|
||||
Eof,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
@ -219,13 +224,6 @@ pub fn strip_shebang(input: &str) -> Option<usize> {
|
||||
None
|
||||
}
|
||||
|
||||
/// Parses the first token from the provided input string.
|
||||
#[inline]
|
||||
pub fn first_token(input: &str) -> Token {
|
||||
debug_assert!(!input.is_empty());
|
||||
Cursor::new(input).advance_token()
|
||||
}
|
||||
|
||||
/// Validates a raw string literal. Used for getting more information about a
|
||||
/// problem with a `RawStr`/`RawByteStr` with a `None` field.
|
||||
#[inline]
|
||||
@ -243,12 +241,8 @@ pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError>
|
||||
pub fn tokenize(input: &str) -> impl Iterator<Item = Token> + '_ {
|
||||
let mut cursor = Cursor::new(input);
|
||||
std::iter::from_fn(move || {
|
||||
if cursor.is_eof() {
|
||||
None
|
||||
} else {
|
||||
cursor.reset_len_consumed();
|
||||
Some(cursor.advance_token())
|
||||
}
|
||||
let token = cursor.advance_token();
|
||||
if token.kind != TokenKind::Eof { Some(token) } else { None }
|
||||
})
|
||||
}
|
||||
|
||||
@ -311,8 +305,11 @@ pub fn is_ident(string: &str) -> bool {
|
||||
|
||||
impl Cursor<'_> {
|
||||
/// Parses a token from the input string.
|
||||
fn advance_token(&mut self) -> Token {
|
||||
let first_char = self.bump().unwrap();
|
||||
pub fn advance_token(&mut self) -> Token {
|
||||
let first_char = match self.bump() {
|
||||
Some(c) => c,
|
||||
None => return Token::new(TokenKind::Eof, 0),
|
||||
};
|
||||
let token_kind = match first_char {
|
||||
// Slash, comment or block comment.
|
||||
'/' => match self.first() {
|
||||
@ -329,7 +326,7 @@ impl Cursor<'_> {
|
||||
('#', c1) if is_id_start(c1) => self.raw_ident(),
|
||||
('#', _) | ('"', _) => {
|
||||
let res = self.raw_double_quoted_string(1);
|
||||
let suffix_start = self.len_consumed();
|
||||
let suffix_start = self.pos_within_token();
|
||||
if res.is_ok() {
|
||||
self.eat_literal_suffix();
|
||||
}
|
||||
@ -344,7 +341,7 @@ impl Cursor<'_> {
|
||||
('\'', _) => {
|
||||
self.bump();
|
||||
let terminated = self.single_quoted_string();
|
||||
let suffix_start = self.len_consumed();
|
||||
let suffix_start = self.pos_within_token();
|
||||
if terminated {
|
||||
self.eat_literal_suffix();
|
||||
}
|
||||
@ -354,7 +351,7 @@ impl Cursor<'_> {
|
||||
('"', _) => {
|
||||
self.bump();
|
||||
let terminated = self.double_quoted_string();
|
||||
let suffix_start = self.len_consumed();
|
||||
let suffix_start = self.pos_within_token();
|
||||
if terminated {
|
||||
self.eat_literal_suffix();
|
||||
}
|
||||
@ -364,7 +361,7 @@ impl Cursor<'_> {
|
||||
('r', '"') | ('r', '#') => {
|
||||
self.bump();
|
||||
let res = self.raw_double_quoted_string(2);
|
||||
let suffix_start = self.len_consumed();
|
||||
let suffix_start = self.pos_within_token();
|
||||
if res.is_ok() {
|
||||
self.eat_literal_suffix();
|
||||
}
|
||||
@ -381,7 +378,7 @@ impl Cursor<'_> {
|
||||
// Numeric literal.
|
||||
c @ '0'..='9' => {
|
||||
let literal_kind = self.number(c);
|
||||
let suffix_start = self.len_consumed();
|
||||
let suffix_start = self.pos_within_token();
|
||||
self.eat_literal_suffix();
|
||||
TokenKind::Literal { kind: literal_kind, suffix_start }
|
||||
}
|
||||
@ -420,7 +417,7 @@ impl Cursor<'_> {
|
||||
// String literal.
|
||||
'"' => {
|
||||
let terminated = self.double_quoted_string();
|
||||
let suffix_start = self.len_consumed();
|
||||
let suffix_start = self.pos_within_token();
|
||||
if terminated {
|
||||
self.eat_literal_suffix();
|
||||
}
|
||||
@ -433,7 +430,9 @@ impl Cursor<'_> {
|
||||
}
|
||||
_ => Unknown,
|
||||
};
|
||||
Token::new(token_kind, self.len_consumed())
|
||||
let res = Token::new(token_kind, self.pos_within_token());
|
||||
self.reset_pos_within_token();
|
||||
res
|
||||
}
|
||||
|
||||
fn line_comment(&mut self) -> TokenKind {
|
||||
@ -618,7 +617,7 @@ impl Cursor<'_> {
|
||||
|
||||
if !can_be_a_lifetime {
|
||||
let terminated = self.single_quoted_string();
|
||||
let suffix_start = self.len_consumed();
|
||||
let suffix_start = self.pos_within_token();
|
||||
if terminated {
|
||||
self.eat_literal_suffix();
|
||||
}
|
||||
@ -643,7 +642,7 @@ impl Cursor<'_> {
|
||||
if self.first() == '\'' {
|
||||
self.bump();
|
||||
let kind = Char { terminated: true };
|
||||
Literal { kind, suffix_start: self.len_consumed() }
|
||||
Literal { kind, suffix_start: self.pos_within_token() }
|
||||
} else {
|
||||
Lifetime { starts_with_number }
|
||||
}
|
||||
@ -724,7 +723,7 @@ impl Cursor<'_> {
|
||||
|
||||
fn raw_string_unvalidated(&mut self, prefix_len: u32) -> Result<u32, RawStrError> {
|
||||
debug_assert!(self.prev() == 'r');
|
||||
let start_pos = self.len_consumed();
|
||||
let start_pos = self.pos_within_token();
|
||||
let mut possible_terminator_offset = None;
|
||||
let mut max_hashes = 0;
|
||||
|
||||
@ -778,7 +777,7 @@ impl Cursor<'_> {
|
||||
// Keep track of possible terminators to give a hint about
|
||||
// where there might be a missing terminator
|
||||
possible_terminator_offset =
|
||||
Some(self.len_consumed() - start_pos - n_end_hashes + prefix_len);
|
||||
Some(self.pos_within_token() - start_pos - n_end_hashes + prefix_len);
|
||||
max_hashes = n_end_hashes;
|
||||
}
|
||||
}
|
||||
|
@ -1,10 +1,11 @@
|
||||
use crate::lexer::unicode_chars::UNICODE_ARRAY;
|
||||
use rustc_ast::ast::{self, AttrStyle};
|
||||
use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind};
|
||||
use rustc_ast::tokenstream::{Spacing, TokenStream};
|
||||
use rustc_ast::tokenstream::TokenStream;
|
||||
use rustc_ast::util::unicode::contains_text_flow_control_chars;
|
||||
use rustc_errors::{error_code, Applicability, DiagnosticBuilder, ErrorGuaranteed, PResult};
|
||||
use rustc_lexer::unescape::{self, Mode};
|
||||
use rustc_lexer::Cursor;
|
||||
use rustc_lexer::{Base, DocStyle, RawStrError};
|
||||
use rustc_session::lint::builtin::{
|
||||
RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
|
||||
@ -38,11 +39,20 @@ pub struct UnmatchedBrace {
|
||||
|
||||
pub(crate) fn parse_token_trees<'a>(
|
||||
sess: &'a ParseSess,
|
||||
src: &'a str,
|
||||
start_pos: BytePos,
|
||||
mut src: &'a str,
|
||||
mut start_pos: BytePos,
|
||||
override_span: Option<Span>,
|
||||
) -> (PResult<'a, TokenStream>, Vec<UnmatchedBrace>) {
|
||||
StringReader { sess, start_pos, pos: start_pos, src, override_span }.into_token_trees()
|
||||
// Skip `#!`, if present.
|
||||
if let Some(shebang_len) = rustc_lexer::strip_shebang(src) {
|
||||
src = &src[shebang_len..];
|
||||
start_pos = start_pos + BytePos::from_usize(shebang_len);
|
||||
}
|
||||
|
||||
let cursor = Cursor::new(src);
|
||||
let string_reader =
|
||||
StringReader { sess, start_pos, pos: start_pos, src, cursor, override_span };
|
||||
tokentrees::TokenTreesReader::parse_token_trees(string_reader)
|
||||
}
|
||||
|
||||
struct StringReader<'a> {
|
||||
@ -53,6 +63,8 @@ struct StringReader<'a> {
|
||||
pos: BytePos,
|
||||
/// Source text to tokenize.
|
||||
src: &'a str,
|
||||
/// Cursor for getting lexer tokens.
|
||||
cursor: Cursor<'a>,
|
||||
override_span: Option<Span>,
|
||||
}
|
||||
|
||||
@ -61,42 +73,195 @@ impl<'a> StringReader<'a> {
|
||||
self.override_span.unwrap_or_else(|| Span::with_root_ctxt(lo, hi))
|
||||
}
|
||||
|
||||
/// Returns the next token, and info about preceding whitespace, if any.
|
||||
fn next_token(&mut self) -> (Spacing, Token) {
|
||||
let mut spacing = Spacing::Joint;
|
||||
|
||||
// Skip `#!` at the start of the file
|
||||
if self.pos == self.start_pos
|
||||
&& let Some(shebang_len) = rustc_lexer::strip_shebang(self.src)
|
||||
{
|
||||
self.pos = self.pos + BytePos::from_usize(shebang_len);
|
||||
spacing = Spacing::Alone;
|
||||
}
|
||||
/// Returns the next token, paired with a bool indicating if the token was
|
||||
/// preceded by whitespace.
|
||||
fn next_token(&mut self) -> (Token, bool) {
|
||||
let mut preceded_by_whitespace = false;
|
||||
|
||||
// Skip trivial (whitespace & comments) tokens
|
||||
loop {
|
||||
let start_src_index = self.src_index(self.pos);
|
||||
let text: &str = &self.src[start_src_index..];
|
||||
|
||||
if text.is_empty() {
|
||||
let span = self.mk_sp(self.pos, self.pos);
|
||||
return (spacing, Token::new(token::Eof, span));
|
||||
}
|
||||
|
||||
let token = rustc_lexer::first_token(text);
|
||||
|
||||
let token = self.cursor.advance_token();
|
||||
let start = self.pos;
|
||||
self.pos = self.pos + BytePos(token.len);
|
||||
|
||||
debug!("next_token: {:?}({:?})", token.kind, self.str_from(start));
|
||||
|
||||
match self.cook_lexer_token(token.kind, start) {
|
||||
Some(kind) => {
|
||||
let span = self.mk_sp(start, self.pos);
|
||||
return (spacing, Token::new(kind, span));
|
||||
// Now "cook" the token, converting the simple `rustc_lexer::TokenKind` enum into a
|
||||
// rich `rustc_ast::TokenKind`. This turns strings into interned symbols and runs
|
||||
// additional validation.
|
||||
let kind = match token.kind {
|
||||
rustc_lexer::TokenKind::LineComment { doc_style } => {
|
||||
// Skip non-doc comments
|
||||
let Some(doc_style) = doc_style else {
|
||||
self.lint_unicode_text_flow(start);
|
||||
preceded_by_whitespace = true;
|
||||
continue;
|
||||
};
|
||||
|
||||
// Opening delimiter of the length 3 is not included into the symbol.
|
||||
let content_start = start + BytePos(3);
|
||||
let content = self.str_from(content_start);
|
||||
self.cook_doc_comment(content_start, content, CommentKind::Line, doc_style)
|
||||
}
|
||||
None => spacing = Spacing::Alone,
|
||||
}
|
||||
rustc_lexer::TokenKind::BlockComment { doc_style, terminated } => {
|
||||
if !terminated {
|
||||
self.report_unterminated_block_comment(start, doc_style);
|
||||
}
|
||||
|
||||
// Skip non-doc comments
|
||||
let Some(doc_style) = doc_style else {
|
||||
self.lint_unicode_text_flow(start);
|
||||
preceded_by_whitespace = true;
|
||||
continue;
|
||||
};
|
||||
|
||||
// Opening delimiter of the length 3 and closing delimiter of the length 2
|
||||
// are not included into the symbol.
|
||||
let content_start = start + BytePos(3);
|
||||
let content_end = self.pos - BytePos(if terminated { 2 } else { 0 });
|
||||
let content = self.str_from_to(content_start, content_end);
|
||||
self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style)
|
||||
}
|
||||
rustc_lexer::TokenKind::Whitespace => {
|
||||
preceded_by_whitespace = true;
|
||||
continue;
|
||||
}
|
||||
rustc_lexer::TokenKind::Ident => {
|
||||
let sym = nfc_normalize(self.str_from(start));
|
||||
let span = self.mk_sp(start, self.pos);
|
||||
self.sess.symbol_gallery.insert(sym, span);
|
||||
token::Ident(sym, false)
|
||||
}
|
||||
rustc_lexer::TokenKind::RawIdent => {
|
||||
let sym = nfc_normalize(self.str_from(start + BytePos(2)));
|
||||
let span = self.mk_sp(start, self.pos);
|
||||
self.sess.symbol_gallery.insert(sym, span);
|
||||
if !sym.can_be_raw() {
|
||||
self.err_span(span, &format!("`{}` cannot be a raw identifier", sym));
|
||||
}
|
||||
self.sess.raw_identifier_spans.borrow_mut().push(span);
|
||||
token::Ident(sym, true)
|
||||
}
|
||||
rustc_lexer::TokenKind::UnknownPrefix => {
|
||||
self.report_unknown_prefix(start);
|
||||
let sym = nfc_normalize(self.str_from(start));
|
||||
let span = self.mk_sp(start, self.pos);
|
||||
self.sess.symbol_gallery.insert(sym, span);
|
||||
token::Ident(sym, false)
|
||||
}
|
||||
rustc_lexer::TokenKind::InvalidIdent
|
||||
// Do not recover an identifier with emoji if the codepoint is a confusable
|
||||
// with a recoverable substitution token, like `➖`.
|
||||
if !UNICODE_ARRAY
|
||||
.iter()
|
||||
.any(|&(c, _, _)| {
|
||||
let sym = self.str_from(start);
|
||||
sym.chars().count() == 1 && c == sym.chars().next().unwrap()
|
||||
}) =>
|
||||
{
|
||||
let sym = nfc_normalize(self.str_from(start));
|
||||
let span = self.mk_sp(start, self.pos);
|
||||
self.sess.bad_unicode_identifiers.borrow_mut().entry(sym).or_default()
|
||||
.push(span);
|
||||
token::Ident(sym, false)
|
||||
}
|
||||
rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
|
||||
let suffix_start = start + BytePos(suffix_start);
|
||||
let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
|
||||
let suffix = if suffix_start < self.pos {
|
||||
let string = self.str_from(suffix_start);
|
||||
if string == "_" {
|
||||
self.sess
|
||||
.span_diagnostic
|
||||
.struct_span_warn(
|
||||
self.mk_sp(suffix_start, self.pos),
|
||||
"underscore literal suffix is not allowed",
|
||||
)
|
||||
.warn(
|
||||
"this was previously accepted by the compiler but is \
|
||||
being phased out; it will become a hard error in \
|
||||
a future release!",
|
||||
)
|
||||
.note(
|
||||
"see issue #42326 \
|
||||
<https://github.com/rust-lang/rust/issues/42326> \
|
||||
for more information",
|
||||
)
|
||||
.emit();
|
||||
None
|
||||
} else {
|
||||
Some(Symbol::intern(string))
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
token::Literal(token::Lit { kind, symbol, suffix })
|
||||
}
|
||||
rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
|
||||
// Include the leading `'` in the real identifier, for macro
|
||||
// expansion purposes. See #12512 for the gory details of why
|
||||
// this is necessary.
|
||||
let lifetime_name = self.str_from(start);
|
||||
if starts_with_number {
|
||||
self.err_span_(start, self.pos, "lifetimes cannot start with a number");
|
||||
}
|
||||
let ident = Symbol::intern(lifetime_name);
|
||||
token::Lifetime(ident)
|
||||
}
|
||||
rustc_lexer::TokenKind::Semi => token::Semi,
|
||||
rustc_lexer::TokenKind::Comma => token::Comma,
|
||||
rustc_lexer::TokenKind::Dot => token::Dot,
|
||||
rustc_lexer::TokenKind::OpenParen => token::OpenDelim(Delimiter::Parenthesis),
|
||||
rustc_lexer::TokenKind::CloseParen => token::CloseDelim(Delimiter::Parenthesis),
|
||||
rustc_lexer::TokenKind::OpenBrace => token::OpenDelim(Delimiter::Brace),
|
||||
rustc_lexer::TokenKind::CloseBrace => token::CloseDelim(Delimiter::Brace),
|
||||
rustc_lexer::TokenKind::OpenBracket => token::OpenDelim(Delimiter::Bracket),
|
||||
rustc_lexer::TokenKind::CloseBracket => token::CloseDelim(Delimiter::Bracket),
|
||||
rustc_lexer::TokenKind::At => token::At,
|
||||
rustc_lexer::TokenKind::Pound => token::Pound,
|
||||
rustc_lexer::TokenKind::Tilde => token::Tilde,
|
||||
rustc_lexer::TokenKind::Question => token::Question,
|
||||
rustc_lexer::TokenKind::Colon => token::Colon,
|
||||
rustc_lexer::TokenKind::Dollar => token::Dollar,
|
||||
rustc_lexer::TokenKind::Eq => token::Eq,
|
||||
rustc_lexer::TokenKind::Bang => token::Not,
|
||||
rustc_lexer::TokenKind::Lt => token::Lt,
|
||||
rustc_lexer::TokenKind::Gt => token::Gt,
|
||||
rustc_lexer::TokenKind::Minus => token::BinOp(token::Minus),
|
||||
rustc_lexer::TokenKind::And => token::BinOp(token::And),
|
||||
rustc_lexer::TokenKind::Or => token::BinOp(token::Or),
|
||||
rustc_lexer::TokenKind::Plus => token::BinOp(token::Plus),
|
||||
rustc_lexer::TokenKind::Star => token::BinOp(token::Star),
|
||||
rustc_lexer::TokenKind::Slash => token::BinOp(token::Slash),
|
||||
rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret),
|
||||
rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),
|
||||
|
||||
rustc_lexer::TokenKind::Unknown | rustc_lexer::TokenKind::InvalidIdent => {
|
||||
let c = self.str_from(start).chars().next().unwrap();
|
||||
let mut err =
|
||||
self.struct_err_span_char(start, self.pos, "unknown start of token", c);
|
||||
// FIXME: the lexer could be used to turn the ASCII version of unicode
|
||||
// homoglyphs, instead of keeping a table in `check_for_substitution`into the
|
||||
// token. Ideally, this should be inside `rustc_lexer`. However, we should
|
||||
// first remove compound tokens like `<<` from `rustc_lexer`, and then add
|
||||
// fancier error recovery to it, as there will be less overall work to do this
|
||||
// way.
|
||||
let token = unicode_chars::check_for_substitution(self, start, c, &mut err);
|
||||
if c == '\x00' {
|
||||
err.help("source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used");
|
||||
}
|
||||
err.emit();
|
||||
if let Some(token) = token {
|
||||
token
|
||||
} else {
|
||||
preceded_by_whitespace = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
rustc_lexer::TokenKind::Eof => token::Eof,
|
||||
};
|
||||
let span = self.mk_sp(start, self.pos);
|
||||
return (Token::new(kind, span), preceded_by_whitespace);
|
||||
}
|
||||
}
|
||||
|
||||
@ -162,171 +327,6 @@ impl<'a> StringReader<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Turns simple `rustc_lexer::TokenKind` enum into a rich
|
||||
/// `rustc_ast::TokenKind`. This turns strings into interned
|
||||
/// symbols and runs additional validation.
|
||||
fn cook_lexer_token(&self, token: rustc_lexer::TokenKind, start: BytePos) -> Option<TokenKind> {
|
||||
Some(match token {
|
||||
rustc_lexer::TokenKind::LineComment { doc_style } => {
|
||||
// Skip non-doc comments
|
||||
let Some(doc_style) = doc_style else {
|
||||
self.lint_unicode_text_flow(start);
|
||||
return None;
|
||||
};
|
||||
|
||||
// Opening delimiter of the length 3 is not included into the symbol.
|
||||
let content_start = start + BytePos(3);
|
||||
let content = self.str_from(content_start);
|
||||
self.cook_doc_comment(content_start, content, CommentKind::Line, doc_style)
|
||||
}
|
||||
rustc_lexer::TokenKind::BlockComment { doc_style, terminated } => {
|
||||
if !terminated {
|
||||
self.report_unterminated_block_comment(start, doc_style);
|
||||
}
|
||||
|
||||
// Skip non-doc comments
|
||||
let Some(doc_style) = doc_style else {
|
||||
self.lint_unicode_text_flow(start);
|
||||
return None;
|
||||
};
|
||||
|
||||
// Opening delimiter of the length 3 and closing delimiter of the length 2
|
||||
// are not included into the symbol.
|
||||
let content_start = start + BytePos(3);
|
||||
let content_end = self.pos - BytePos(if terminated { 2 } else { 0 });
|
||||
let content = self.str_from_to(content_start, content_end);
|
||||
self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style)
|
||||
}
|
||||
rustc_lexer::TokenKind::Whitespace => return None,
|
||||
rustc_lexer::TokenKind::Ident
|
||||
| rustc_lexer::TokenKind::RawIdent
|
||||
| rustc_lexer::TokenKind::UnknownPrefix => {
|
||||
let is_raw_ident = token == rustc_lexer::TokenKind::RawIdent;
|
||||
let is_unknown_prefix = token == rustc_lexer::TokenKind::UnknownPrefix;
|
||||
let mut ident_start = start;
|
||||
if is_raw_ident {
|
||||
ident_start = ident_start + BytePos(2);
|
||||
}
|
||||
if is_unknown_prefix {
|
||||
self.report_unknown_prefix(start);
|
||||
}
|
||||
let sym = nfc_normalize(self.str_from(ident_start));
|
||||
let span = self.mk_sp(start, self.pos);
|
||||
self.sess.symbol_gallery.insert(sym, span);
|
||||
if is_raw_ident {
|
||||
if !sym.can_be_raw() {
|
||||
self.err_span(span, &format!("`{}` cannot be a raw identifier", sym));
|
||||
}
|
||||
self.sess.raw_identifier_spans.borrow_mut().push(span);
|
||||
}
|
||||
token::Ident(sym, is_raw_ident)
|
||||
}
|
||||
rustc_lexer::TokenKind::InvalidIdent
|
||||
// Do not recover an identifier with emoji if the codepoint is a confusable
|
||||
// with a recoverable substitution token, like `➖`.
|
||||
if !UNICODE_ARRAY
|
||||
.iter()
|
||||
.any(|&(c, _, _)| {
|
||||
let sym = self.str_from(start);
|
||||
sym.chars().count() == 1 && c == sym.chars().next().unwrap()
|
||||
})
|
||||
=>
|
||||
{
|
||||
let sym = nfc_normalize(self.str_from(start));
|
||||
let span = self.mk_sp(start, self.pos);
|
||||
self.sess.bad_unicode_identifiers.borrow_mut().entry(sym).or_default().push(span);
|
||||
token::Ident(sym, false)
|
||||
}
|
||||
rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
|
||||
let suffix_start = start + BytePos(suffix_start);
|
||||
let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
|
||||
let suffix = if suffix_start < self.pos {
|
||||
let string = self.str_from(suffix_start);
|
||||
if string == "_" {
|
||||
self.sess
|
||||
.span_diagnostic
|
||||
.struct_span_warn(
|
||||
self.mk_sp(suffix_start, self.pos),
|
||||
"underscore literal suffix is not allowed",
|
||||
)
|
||||
.warn(
|
||||
"this was previously accepted by the compiler but is \
|
||||
being phased out; it will become a hard error in \
|
||||
a future release!",
|
||||
)
|
||||
.note(
|
||||
"see issue #42326 \
|
||||
<https://github.com/rust-lang/rust/issues/42326> \
|
||||
for more information",
|
||||
)
|
||||
.emit();
|
||||
None
|
||||
} else {
|
||||
Some(Symbol::intern(string))
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
token::Literal(token::Lit { kind, symbol, suffix })
|
||||
}
|
||||
rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
|
||||
// Include the leading `'` in the real identifier, for macro
|
||||
// expansion purposes. See #12512 for the gory details of why
|
||||
// this is necessary.
|
||||
let lifetime_name = self.str_from(start);
|
||||
if starts_with_number {
|
||||
self.err_span_(start, self.pos, "lifetimes cannot start with a number");
|
||||
}
|
||||
let ident = Symbol::intern(lifetime_name);
|
||||
token::Lifetime(ident)
|
||||
}
|
||||
rustc_lexer::TokenKind::Semi => token::Semi,
|
||||
rustc_lexer::TokenKind::Comma => token::Comma,
|
||||
rustc_lexer::TokenKind::Dot => token::Dot,
|
||||
rustc_lexer::TokenKind::OpenParen => token::OpenDelim(Delimiter::Parenthesis),
|
||||
rustc_lexer::TokenKind::CloseParen => token::CloseDelim(Delimiter::Parenthesis),
|
||||
rustc_lexer::TokenKind::OpenBrace => token::OpenDelim(Delimiter::Brace),
|
||||
rustc_lexer::TokenKind::CloseBrace => token::CloseDelim(Delimiter::Brace),
|
||||
rustc_lexer::TokenKind::OpenBracket => token::OpenDelim(Delimiter::Bracket),
|
||||
rustc_lexer::TokenKind::CloseBracket => token::CloseDelim(Delimiter::Bracket),
|
||||
rustc_lexer::TokenKind::At => token::At,
|
||||
rustc_lexer::TokenKind::Pound => token::Pound,
|
||||
rustc_lexer::TokenKind::Tilde => token::Tilde,
|
||||
rustc_lexer::TokenKind::Question => token::Question,
|
||||
rustc_lexer::TokenKind::Colon => token::Colon,
|
||||
rustc_lexer::TokenKind::Dollar => token::Dollar,
|
||||
rustc_lexer::TokenKind::Eq => token::Eq,
|
||||
rustc_lexer::TokenKind::Bang => token::Not,
|
||||
rustc_lexer::TokenKind::Lt => token::Lt,
|
||||
rustc_lexer::TokenKind::Gt => token::Gt,
|
||||
rustc_lexer::TokenKind::Minus => token::BinOp(token::Minus),
|
||||
rustc_lexer::TokenKind::And => token::BinOp(token::And),
|
||||
rustc_lexer::TokenKind::Or => token::BinOp(token::Or),
|
||||
rustc_lexer::TokenKind::Plus => token::BinOp(token::Plus),
|
||||
rustc_lexer::TokenKind::Star => token::BinOp(token::Star),
|
||||
rustc_lexer::TokenKind::Slash => token::BinOp(token::Slash),
|
||||
rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret),
|
||||
rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),
|
||||
|
||||
rustc_lexer::TokenKind::Unknown | rustc_lexer::TokenKind::InvalidIdent => {
|
||||
let c = self.str_from(start).chars().next().unwrap();
|
||||
let mut err =
|
||||
self.struct_err_span_char(start, self.pos, "unknown start of token", c);
|
||||
// FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs,
|
||||
// instead of keeping a table in `check_for_substitution`into the token. Ideally,
|
||||
// this should be inside `rustc_lexer`. However, we should first remove compound
|
||||
// tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it,
|
||||
// as there will be less overall work to do this way.
|
||||
let token = unicode_chars::check_for_substitution(self, start, c, &mut err);
|
||||
if c == '\x00' {
|
||||
err.help("source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used");
|
||||
}
|
||||
err.emit();
|
||||
token?
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn cook_doc_comment(
|
||||
&self,
|
||||
content_start: BytePos,
|
||||
|
@ -1,31 +1,15 @@
|
||||
use super::{StringReader, UnmatchedBrace};
|
||||
|
||||
use rustc_ast::token::{self, Delimiter, Token};
|
||||
use rustc_ast::tokenstream::{DelimSpan, Spacing, TokenStream, TokenTree};
|
||||
use rustc_ast_pretty::pprust::token_to_string;
|
||||
use rustc_data_structures::fx::FxHashMap;
|
||||
use rustc_errors::PResult;
|
||||
use rustc_errors::{PErr, PResult};
|
||||
use rustc_span::Span;
|
||||
|
||||
impl<'a> StringReader<'a> {
|
||||
pub(super) fn into_token_trees(self) -> (PResult<'a, TokenStream>, Vec<UnmatchedBrace>) {
|
||||
let mut tt_reader = TokenTreesReader {
|
||||
string_reader: self,
|
||||
token: Token::dummy(),
|
||||
open_braces: Vec::new(),
|
||||
unmatched_braces: Vec::new(),
|
||||
matching_delim_spans: Vec::new(),
|
||||
last_unclosed_found_span: None,
|
||||
last_delim_empty_block_spans: FxHashMap::default(),
|
||||
matching_block_spans: Vec::new(),
|
||||
};
|
||||
let res = tt_reader.parse_all_token_trees();
|
||||
(res, tt_reader.unmatched_braces)
|
||||
}
|
||||
}
|
||||
|
||||
struct TokenTreesReader<'a> {
|
||||
pub(super) struct TokenTreesReader<'a> {
|
||||
string_reader: StringReader<'a>,
|
||||
/// The "next" token, which has been obtained from the `StringReader` but
|
||||
/// not yet handled by the `TokenTreesReader`.
|
||||
token: Token,
|
||||
/// Stack of open delimiters and their spans. Used for error message.
|
||||
open_braces: Vec<(Delimiter, Span)>,
|
||||
@ -43,231 +27,235 @@ struct TokenTreesReader<'a> {
|
||||
}
|
||||
|
||||
impl<'a> TokenTreesReader<'a> {
|
||||
pub(super) fn parse_token_trees(
|
||||
string_reader: StringReader<'a>,
|
||||
) -> (PResult<'a, TokenStream>, Vec<UnmatchedBrace>) {
|
||||
let mut tt_reader = TokenTreesReader {
|
||||
string_reader,
|
||||
token: Token::dummy(),
|
||||
open_braces: Vec::new(),
|
||||
unmatched_braces: Vec::new(),
|
||||
matching_delim_spans: Vec::new(),
|
||||
last_unclosed_found_span: None,
|
||||
last_delim_empty_block_spans: FxHashMap::default(),
|
||||
matching_block_spans: Vec::new(),
|
||||
};
|
||||
let res = tt_reader.parse_all_token_trees();
|
||||
(res, tt_reader.unmatched_braces)
|
||||
}
|
||||
|
||||
// Parse a stream of tokens into a list of `TokenTree`s, up to an `Eof`.
|
||||
fn parse_all_token_trees(&mut self) -> PResult<'a, TokenStream> {
|
||||
self.token = self.string_reader.next_token().0;
|
||||
let mut buf = TokenStreamBuilder::default();
|
||||
|
||||
self.bump();
|
||||
while self.token != token::Eof {
|
||||
buf.push(self.parse_token_tree()?);
|
||||
loop {
|
||||
match self.token.kind {
|
||||
token::OpenDelim(delim) => buf.push(self.parse_token_tree_open_delim(delim)),
|
||||
token::CloseDelim(delim) => return Err(self.close_delim_err(delim)),
|
||||
token::Eof => return Ok(buf.into_token_stream()),
|
||||
_ => buf.push(self.parse_token_tree_non_delim_non_eof()),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(buf.into_token_stream())
|
||||
}
|
||||
|
||||
// Parse a stream of tokens into a list of `TokenTree`s, up to a `CloseDelim`.
|
||||
fn parse_token_trees_until_close_delim(&mut self) -> TokenStream {
|
||||
let mut buf = TokenStreamBuilder::default();
|
||||
loop {
|
||||
if let token::CloseDelim(..) = self.token.kind {
|
||||
return buf.into_token_stream();
|
||||
}
|
||||
|
||||
match self.parse_token_tree() {
|
||||
Ok(tree) => buf.push(tree),
|
||||
Err(mut e) => {
|
||||
e.emit();
|
||||
match self.token.kind {
|
||||
token::OpenDelim(delim) => buf.push(self.parse_token_tree_open_delim(delim)),
|
||||
token::CloseDelim(..) => return buf.into_token_stream(),
|
||||
token::Eof => {
|
||||
self.eof_err().emit();
|
||||
return buf.into_token_stream();
|
||||
}
|
||||
_ => buf.push(self.parse_token_tree_non_delim_non_eof()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_token_tree(&mut self) -> PResult<'a, TokenTree> {
|
||||
let sm = self.string_reader.sess.source_map();
|
||||
fn eof_err(&mut self) -> PErr<'a> {
|
||||
let msg = "this file contains an unclosed delimiter";
|
||||
let mut err = self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, msg);
|
||||
for &(_, sp) in &self.open_braces {
|
||||
err.span_label(sp, "unclosed delimiter");
|
||||
self.unmatched_braces.push(UnmatchedBrace {
|
||||
expected_delim: Delimiter::Brace,
|
||||
found_delim: None,
|
||||
found_span: self.token.span,
|
||||
unclosed_span: Some(sp),
|
||||
candidate_span: None,
|
||||
});
|
||||
}
|
||||
|
||||
if let Some((delim, _)) = self.open_braces.last() {
|
||||
if let Some((_, open_sp, close_sp)) =
|
||||
self.matching_delim_spans.iter().find(|(d, open_sp, close_sp)| {
|
||||
let sm = self.string_reader.sess.source_map();
|
||||
if let Some(close_padding) = sm.span_to_margin(*close_sp) {
|
||||
if let Some(open_padding) = sm.span_to_margin(*open_sp) {
|
||||
return delim == d && close_padding != open_padding;
|
||||
}
|
||||
}
|
||||
false
|
||||
})
|
||||
// these are in reverse order as they get inserted on close, but
|
||||
{
|
||||
// we want the last open/first close
|
||||
err.span_label(*open_sp, "this delimiter might not be properly closed...");
|
||||
err.span_label(*close_sp, "...as it matches this but it has different indentation");
|
||||
}
|
||||
}
|
||||
err
|
||||
}
|
||||
|
||||
fn parse_token_tree_open_delim(&mut self, open_delim: Delimiter) -> TokenTree {
|
||||
// The span for beginning of the delimited section
|
||||
let pre_span = self.token.span;
|
||||
|
||||
// Move past the open delimiter.
|
||||
self.open_braces.push((open_delim, self.token.span));
|
||||
self.token = self.string_reader.next_token().0;
|
||||
|
||||
// Parse the token trees within the delimiters.
|
||||
// We stop at any delimiter so we can try to recover if the user
|
||||
// uses an incorrect delimiter.
|
||||
let tts = self.parse_token_trees_until_close_delim();
|
||||
|
||||
// Expand to cover the entire delimited token tree
|
||||
let delim_span = DelimSpan::from_pair(pre_span, self.token.span);
|
||||
|
||||
match self.token.kind {
|
||||
token::Eof => {
|
||||
let msg = "this file contains an unclosed delimiter";
|
||||
let mut err =
|
||||
self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, msg);
|
||||
for &(_, sp) in &self.open_braces {
|
||||
err.span_label(sp, "unclosed delimiter");
|
||||
// Correct delimiter.
|
||||
token::CloseDelim(close_delim) if close_delim == open_delim => {
|
||||
let (open_brace, open_brace_span) = self.open_braces.pop().unwrap();
|
||||
let close_brace_span = self.token.span;
|
||||
|
||||
if tts.is_empty() {
|
||||
let empty_block_span = open_brace_span.to(close_brace_span);
|
||||
let sm = self.string_reader.sess.source_map();
|
||||
if !sm.is_multiline(empty_block_span) {
|
||||
// Only track if the block is in the form of `{}`, otherwise it is
|
||||
// likely that it was written on purpose.
|
||||
self.last_delim_empty_block_spans.insert(open_delim, empty_block_span);
|
||||
}
|
||||
}
|
||||
|
||||
//only add braces
|
||||
if let (Delimiter::Brace, Delimiter::Brace) = (open_brace, open_delim) {
|
||||
self.matching_block_spans.push((open_brace_span, close_brace_span));
|
||||
}
|
||||
|
||||
if self.open_braces.is_empty() {
|
||||
// Clear up these spans to avoid suggesting them as we've found
|
||||
// properly matched delimiters so far for an entire block.
|
||||
self.matching_delim_spans.clear();
|
||||
} else {
|
||||
self.matching_delim_spans.push((open_brace, open_brace_span, close_brace_span));
|
||||
}
|
||||
// Move past the closing delimiter.
|
||||
self.token = self.string_reader.next_token().0;
|
||||
}
|
||||
// Incorrect delimiter.
|
||||
token::CloseDelim(close_delim) => {
|
||||
let mut unclosed_delimiter = None;
|
||||
let mut candidate = None;
|
||||
|
||||
if self.last_unclosed_found_span != Some(self.token.span) {
|
||||
// do not complain about the same unclosed delimiter multiple times
|
||||
self.last_unclosed_found_span = Some(self.token.span);
|
||||
// This is a conservative error: only report the last unclosed
|
||||
// delimiter. The previous unclosed delimiters could actually be
|
||||
// closed! The parser just hasn't gotten to them yet.
|
||||
if let Some(&(_, sp)) = self.open_braces.last() {
|
||||
unclosed_delimiter = Some(sp);
|
||||
};
|
||||
let sm = self.string_reader.sess.source_map();
|
||||
if let Some(current_padding) = sm.span_to_margin(self.token.span) {
|
||||
for (brace, brace_span) in &self.open_braces {
|
||||
if let Some(padding) = sm.span_to_margin(*brace_span) {
|
||||
// high likelihood of these two corresponding
|
||||
if current_padding == padding && brace == &close_delim {
|
||||
candidate = Some(*brace_span);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let (tok, _) = self.open_braces.pop().unwrap();
|
||||
self.unmatched_braces.push(UnmatchedBrace {
|
||||
expected_delim: Delimiter::Brace,
|
||||
found_delim: None,
|
||||
expected_delim: tok,
|
||||
found_delim: Some(close_delim),
|
||||
found_span: self.token.span,
|
||||
unclosed_span: Some(sp),
|
||||
candidate_span: None,
|
||||
unclosed_span: unclosed_delimiter,
|
||||
candidate_span: candidate,
|
||||
});
|
||||
} else {
|
||||
self.open_braces.pop();
|
||||
}
|
||||
|
||||
if let Some((delim, _)) = self.open_braces.last() {
|
||||
if let Some((_, open_sp, close_sp)) =
|
||||
self.matching_delim_spans.iter().find(|(d, open_sp, close_sp)| {
|
||||
if let Some(close_padding) = sm.span_to_margin(*close_sp) {
|
||||
if let Some(open_padding) = sm.span_to_margin(*open_sp) {
|
||||
return delim == d && close_padding != open_padding;
|
||||
}
|
||||
}
|
||||
false
|
||||
})
|
||||
// these are in reverse order as they get inserted on close, but
|
||||
{
|
||||
// we want the last open/first close
|
||||
err.span_label(*open_sp, "this delimiter might not be properly closed...");
|
||||
err.span_label(
|
||||
*close_sp,
|
||||
"...as it matches this but it has different indentation",
|
||||
);
|
||||
}
|
||||
// If the incorrect delimiter matches an earlier opening
|
||||
// delimiter, then don't consume it (it can be used to
|
||||
// close the earlier one). Otherwise, consume it.
|
||||
// E.g., we try to recover from:
|
||||
// fn foo() {
|
||||
// bar(baz(
|
||||
// } // Incorrect delimiter but matches the earlier `{`
|
||||
if !self.open_braces.iter().any(|&(b, _)| b == close_delim) {
|
||||
self.token = self.string_reader.next_token().0;
|
||||
}
|
||||
Err(err)
|
||||
}
|
||||
token::OpenDelim(delim) => {
|
||||
// The span for beginning of the delimited section
|
||||
let pre_span = self.token.span;
|
||||
|
||||
// Parse the open delimiter.
|
||||
self.open_braces.push((delim, self.token.span));
|
||||
self.bump();
|
||||
|
||||
// Parse the token trees within the delimiters.
|
||||
// We stop at any delimiter so we can try to recover if the user
|
||||
// uses an incorrect delimiter.
|
||||
let tts = self.parse_token_trees_until_close_delim();
|
||||
|
||||
// Expand to cover the entire delimited token tree
|
||||
let delim_span = DelimSpan::from_pair(pre_span, self.token.span);
|
||||
|
||||
match self.token.kind {
|
||||
// Correct delimiter.
|
||||
token::CloseDelim(d) if d == delim => {
|
||||
let (open_brace, open_brace_span) = self.open_braces.pop().unwrap();
|
||||
let close_brace_span = self.token.span;
|
||||
|
||||
if tts.is_empty() {
|
||||
let empty_block_span = open_brace_span.to(close_brace_span);
|
||||
if !sm.is_multiline(empty_block_span) {
|
||||
// Only track if the block is in the form of `{}`, otherwise it is
|
||||
// likely that it was written on purpose.
|
||||
self.last_delim_empty_block_spans.insert(delim, empty_block_span);
|
||||
}
|
||||
}
|
||||
|
||||
//only add braces
|
||||
if let (Delimiter::Brace, Delimiter::Brace) = (open_brace, delim) {
|
||||
self.matching_block_spans.push((open_brace_span, close_brace_span));
|
||||
}
|
||||
|
||||
if self.open_braces.is_empty() {
|
||||
// Clear up these spans to avoid suggesting them as we've found
|
||||
// properly matched delimiters so far for an entire block.
|
||||
self.matching_delim_spans.clear();
|
||||
} else {
|
||||
self.matching_delim_spans.push((
|
||||
open_brace,
|
||||
open_brace_span,
|
||||
close_brace_span,
|
||||
));
|
||||
}
|
||||
// Parse the closing delimiter.
|
||||
self.bump();
|
||||
}
|
||||
// Incorrect delimiter.
|
||||
token::CloseDelim(other) => {
|
||||
let mut unclosed_delimiter = None;
|
||||
let mut candidate = None;
|
||||
|
||||
if self.last_unclosed_found_span != Some(self.token.span) {
|
||||
// do not complain about the same unclosed delimiter multiple times
|
||||
self.last_unclosed_found_span = Some(self.token.span);
|
||||
// This is a conservative error: only report the last unclosed
|
||||
// delimiter. The previous unclosed delimiters could actually be
|
||||
// closed! The parser just hasn't gotten to them yet.
|
||||
if let Some(&(_, sp)) = self.open_braces.last() {
|
||||
unclosed_delimiter = Some(sp);
|
||||
};
|
||||
if let Some(current_padding) = sm.span_to_margin(self.token.span) {
|
||||
for (brace, brace_span) in &self.open_braces {
|
||||
if let Some(padding) = sm.span_to_margin(*brace_span) {
|
||||
// high likelihood of these two corresponding
|
||||
if current_padding == padding && brace == &other {
|
||||
candidate = Some(*brace_span);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let (tok, _) = self.open_braces.pop().unwrap();
|
||||
self.unmatched_braces.push(UnmatchedBrace {
|
||||
expected_delim: tok,
|
||||
found_delim: Some(other),
|
||||
found_span: self.token.span,
|
||||
unclosed_span: unclosed_delimiter,
|
||||
candidate_span: candidate,
|
||||
});
|
||||
} else {
|
||||
self.open_braces.pop();
|
||||
}
|
||||
|
||||
// If the incorrect delimiter matches an earlier opening
|
||||
// delimiter, then don't consume it (it can be used to
|
||||
// close the earlier one). Otherwise, consume it.
|
||||
// E.g., we try to recover from:
|
||||
// fn foo() {
|
||||
// bar(baz(
|
||||
// } // Incorrect delimiter but matches the earlier `{`
|
||||
if !self.open_braces.iter().any(|&(b, _)| b == other) {
|
||||
self.bump();
|
||||
}
|
||||
}
|
||||
token::Eof => {
|
||||
// Silently recover, the EOF token will be seen again
|
||||
// and an error emitted then. Thus we don't pop from
|
||||
// self.open_braces here.
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
Ok(TokenTree::Delimited(delim_span, delim, tts))
|
||||
}
|
||||
token::CloseDelim(delim) => {
|
||||
// An unexpected closing delimiter (i.e., there is no
|
||||
// matching opening delimiter).
|
||||
let token_str = token_to_string(&self.token);
|
||||
let msg = format!("unexpected closing delimiter: `{}`", token_str);
|
||||
let mut err =
|
||||
self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, &msg);
|
||||
|
||||
// Braces are added at the end, so the last element is the biggest block
|
||||
if let Some(parent) = self.matching_block_spans.last() {
|
||||
if let Some(span) = self.last_delim_empty_block_spans.remove(&delim) {
|
||||
// Check if the (empty block) is in the last properly closed block
|
||||
if (parent.0.to(parent.1)).contains(span) {
|
||||
err.span_label(
|
||||
span,
|
||||
"block is empty, you might have not meant to close it",
|
||||
);
|
||||
} else {
|
||||
err.span_label(parent.0, "this opening brace...");
|
||||
|
||||
err.span_label(parent.1, "...matches this closing brace");
|
||||
}
|
||||
} else {
|
||||
err.span_label(parent.0, "this opening brace...");
|
||||
|
||||
err.span_label(parent.1, "...matches this closing brace");
|
||||
}
|
||||
}
|
||||
|
||||
err.span_label(self.token.span, "unexpected closing delimiter");
|
||||
Err(err)
|
||||
}
|
||||
_ => {
|
||||
let tok = self.token.take();
|
||||
let mut spacing = self.bump();
|
||||
if !self.token.is_op() {
|
||||
spacing = Spacing::Alone;
|
||||
}
|
||||
Ok(TokenTree::Token(tok, spacing))
|
||||
token::Eof => {
|
||||
// Silently recover, the EOF token will be seen again
|
||||
// and an error emitted then. Thus we don't pop from
|
||||
// self.open_braces here.
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
TokenTree::Delimited(delim_span, open_delim, tts)
|
||||
}
|
||||
|
||||
fn bump(&mut self) -> Spacing {
|
||||
let (spacing, token) = self.string_reader.next_token();
|
||||
self.token = token;
|
||||
spacing
|
||||
fn close_delim_err(&mut self, delim: Delimiter) -> PErr<'a> {
|
||||
// An unexpected closing delimiter (i.e., there is no
|
||||
// matching opening delimiter).
|
||||
let token_str = token_to_string(&self.token);
|
||||
let msg = format!("unexpected closing delimiter: `{}`", token_str);
|
||||
let mut err =
|
||||
self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, &msg);
|
||||
|
||||
// Braces are added at the end, so the last element is the biggest block
|
||||
if let Some(parent) = self.matching_block_spans.last() {
|
||||
if let Some(span) = self.last_delim_empty_block_spans.remove(&delim) {
|
||||
// Check if the (empty block) is in the last properly closed block
|
||||
if (parent.0.to(parent.1)).contains(span) {
|
||||
err.span_label(span, "block is empty, you might have not meant to close it");
|
||||
} else {
|
||||
err.span_label(parent.0, "this opening brace...");
|
||||
err.span_label(parent.1, "...matches this closing brace");
|
||||
}
|
||||
} else {
|
||||
err.span_label(parent.0, "this opening brace...");
|
||||
err.span_label(parent.1, "...matches this closing brace");
|
||||
}
|
||||
}
|
||||
|
||||
err.span_label(self.token.span, "unexpected closing delimiter");
|
||||
err
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn parse_token_tree_non_delim_non_eof(&mut self) -> TokenTree {
|
||||
// `this_spacing` for the returned token refers to whether the token is
|
||||
// immediately followed by another op token. It is determined by the
|
||||
// next token: its kind and its `preceded_by_whitespace` status.
|
||||
let (next_tok, is_next_tok_preceded_by_whitespace) = self.string_reader.next_token();
|
||||
let this_spacing = if is_next_tok_preceded_by_whitespace || !next_tok.is_op() {
|
||||
Spacing::Alone
|
||||
} else {
|
||||
Spacing::Joint
|
||||
};
|
||||
let this_tok = std::mem::replace(&mut self.token, next_tok);
|
||||
TokenTree::Token(this_tok, this_spacing)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -13,6 +13,7 @@ use std::collections::VecDeque;
|
||||
use std::fmt::{Display, Write};
|
||||
|
||||
use rustc_data_structures::fx::FxHashMap;
|
||||
use rustc_lexer::Cursor;
|
||||
use rustc_lexer::{LiteralKind, TokenKind};
|
||||
use rustc_span::edition::Edition;
|
||||
use rustc_span::symbol::Symbol;
|
||||
@ -408,15 +409,16 @@ enum Highlight<'a> {
|
||||
|
||||
struct TokenIter<'a> {
|
||||
src: &'a str,
|
||||
cursor: Cursor<'a>,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for TokenIter<'a> {
|
||||
type Item = (TokenKind, &'a str);
|
||||
fn next(&mut self) -> Option<(TokenKind, &'a str)> {
|
||||
if self.src.is_empty() {
|
||||
let token = self.cursor.advance_token();
|
||||
if token.kind == TokenKind::Eof {
|
||||
return None;
|
||||
}
|
||||
let token = rustc_lexer::first_token(self.src);
|
||||
let (text, rest) = self.src.split_at(token.len as usize);
|
||||
self.src = rest;
|
||||
Some((token.kind, text))
|
||||
@ -525,7 +527,7 @@ impl<'a> Classifier<'a> {
|
||||
/// Takes as argument the source code to HTML-ify, the rust edition to use and the source code
|
||||
/// file span which will be used later on by the `span_correspondance_map`.
|
||||
fn new(src: &str, file_span: Span, decoration_info: Option<DecorationInfo>) -> Classifier<'_> {
|
||||
let tokens = PeekIter::new(TokenIter { src });
|
||||
let tokens = PeekIter::new(TokenIter { src, cursor: Cursor::new(src) });
|
||||
let decorations = decoration_info.map(Decorations::new);
|
||||
Classifier {
|
||||
tokens,
|
||||
@ -850,6 +852,7 @@ impl<'a> Classifier<'a> {
|
||||
Class::Ident(self.new_span(before, text))
|
||||
}
|
||||
TokenKind::Lifetime { .. } => Class::Lifetime,
|
||||
TokenKind::Eof => panic!("Eof in advance"),
|
||||
};
|
||||
// Anything that didn't return above is the simple case where we the
|
||||
// class just spans a single token, so we can use the `string` method.
|
||||
|
Loading…
x
Reference in New Issue
Block a user