diff --git a/src/librustc_lexer/src/cursor.rs b/src/librustc_lexer/src/cursor.rs index 13d0b07d98b..ed0911379c4 100644 --- a/src/librustc_lexer/src/cursor.rs +++ b/src/librustc_lexer/src/cursor.rs @@ -41,7 +41,7 @@ impl<'a> Cursor<'a> { /// If requested position doesn't exist, `EOF_CHAR` is returned. /// However, getting `EOF_CHAR` doesn't always mean actual end of file, /// it should be checked with `is_eof` method. - pub(crate) fn nth_char(&self, n: usize) -> char { + fn nth_char(&self, n: usize) -> char { self.chars().nth(n).unwrap_or(EOF_CHAR) } diff --git a/src/librustc_lexer/src/lib.rs b/src/librustc_lexer/src/lib.rs index 70df6d210f4..132607031ce 100644 --- a/src/librustc_lexer/src/lib.rs +++ b/src/librustc_lexer/src/lib.rs @@ -141,25 +141,41 @@ pub enum LiteralKind { RawByteStr(UnvalidatedRawStr), } +/// Represents something that looks like a raw string, but may have some +/// problems. Use `.validate()` to convert it into something +/// usable. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct UnvalidatedRawStr { + /// The prefix (`r###"`) is valid valid_start: bool, + /// The number of leading `#` n_start_hashes: usize, + /// The number of trailing `#`. `n_end_hashes` <= `n_start_hashes` n_end_hashes: usize, + /// The offset starting at `r` or `br` where the user may have intended to end the string. + /// Currently, it is the longest sequence of pattern `"#+"`. possible_terminator_offset: Option, } +/// Error produced validating a raw string. Represents cases like: +/// - `r##~"abcde"##`: `LexRawStrError::InvalidStarter` +/// - `r###"abcde"##`: `LexRawStrError::NoTerminator { expected: 3, found: 2, possible_terminator_offset: Some(11)` +/// - Too many `#`s (>65536): `TooManyDelimiters` #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum LexRawStrError { - /// Non # characters between `r` and `"` eg. `r#~"..` + /// Non `#` characters exist between `r` and `"` eg. `r#~"..` InvalidStarter, - /// The string was never terminated. `possible_terminator_offset` is the best guess of where they + /// The string was never terminated. `possible_terminator_offset` is the number of characters after `r` or `br` where they /// may have intended to terminate it. NoTerminator { expected: usize, found: usize, possible_terminator_offset: Option }, - /// More than 65536 # signs + /// More than 65536 `#`s exist. TooManyDelimiters, } +/// Raw String that contains a valid prefix (`#+"`) and postfix (`"#+`) where +/// there are a matching number of `#` characters in both. Note that this will +/// not consume extra trailing `#` characters: `r###"abcde"####` is lexed as a +/// `ValidatedRawString { n_hashes: 3 }` followed by a `#` token. #[derive(Debug, Eq, PartialEq, Copy, Clone)] pub struct ValidatedRawStr { n_hashes: u16, @@ -172,27 +188,26 @@ impl ValidatedRawStr { } impl UnvalidatedRawStr { - pub fn started(&self) -> bool { - self.valid_start - } - pub fn validate(self) -> Result { if !self.valid_start { return Err(LexRawStrError::InvalidStarter); } + // Only up to 65535 `#`s are allowed in raw strings let n_start_safe: u16 = self.n_start_hashes.try_into().map_err(|_| LexRawStrError::TooManyDelimiters)?; - match (self.n_start_hashes, self.n_end_hashes) { - (n_start, n_end) if n_start > n_end => Err(LexRawStrError::NoTerminator { - expected: n_start, + + if self.n_start_hashes > self.n_end_hashes { + Err(LexRawStrError::NoTerminator { + expected: self.n_start_hashes, found: self.n_end_hashes, possible_terminator_offset: self.possible_terminator_offset, - }), - (n_start, n_end) => { - debug_assert_eq!(n_start, n_end); - Ok(ValidatedRawStr { n_hashes: n_start_safe }) - } + }) + } else { + // Since the lexer should never produce a literal with n_end > n_start, if n_start <= n_end, + // they must be equal. + debug_assert_eq!(self.n_start_hashes, self.n_end_hashes); + Ok(ValidatedRawStr { n_hashes: n_start_safe }) } } } @@ -656,7 +671,7 @@ impl Cursor<'_> { false } - /// Eats the double-quoted string an UnvalidatedRawStr + /// Eats the double-quoted string and returns an `UnvalidatedRawStr`. fn raw_double_quoted_string(&mut self, prefix_len: usize) -> UnvalidatedRawStr { debug_assert!(self.prev() == 'r'); let mut valid_start: bool = false; diff --git a/src/librustc_parse/lexer/mod.rs b/src/librustc_parse/lexer/mod.rs index 2f720d95c6d..a367131b3f3 100644 --- a/src/librustc_parse/lexer/mod.rs +++ b/src/librustc_parse/lexer/mod.rs @@ -533,13 +533,12 @@ impl<'a> StringReader<'a> { } if let Some(possible_offset) = possible_offset { - let span = self.mk_sp( - start + BytePos(possible_offset as u32), - start + BytePos(possible_offset as u32) + BytePos(found_terminators as u32), - ); + let lo = start + BytePos(possible_offset as u32); + let hi = lo + BytePos(found_terminators as u32); + let span = self.mk_sp(lo, hi); err.span_suggestion( span, - "you might have intended to terminate the string here", + "consider terminating the string here", "#".repeat(n_hashes), Applicability::MaybeIncorrect, ); diff --git a/src/librustc_parse/lib.rs b/src/librustc_parse/lib.rs index 13fb85db847..8e2a9513d6b 100644 --- a/src/librustc_parse/lib.rs +++ b/src/librustc_parse/lib.rs @@ -4,6 +4,7 @@ #![feature(crate_visibility_modifier)] #![feature(bindings_after_at)] #![feature(try_blocks)] +#![feature(or_patterns)] use rustc_ast::ast; use rustc_ast::token::{self, Nonterminal}; diff --git a/src/librustc_parse/parser/diagnostics.rs b/src/librustc_parse/parser/diagnostics.rs index 7b6840307cb..2fc20e15c5a 100644 --- a/src/librustc_parse/parser/diagnostics.rs +++ b/src/librustc_parse/parser/diagnostics.rs @@ -288,9 +288,12 @@ impl<'a> Parser<'a> { fn check_too_many_raw_str_terminators(&mut self, err: &mut DiagnosticBuilder<'_>) -> bool { let prev_token_raw_str = match self.prev_token { - Token { kind: TokenKind::Literal(Lit { kind: LitKind::StrRaw(n), .. }), .. } => Some(n), Token { - kind: TokenKind::Literal(Lit { kind: LitKind::ByteStrRaw(n), .. }), .. + kind: + TokenKind::Literal(Lit { + kind: LitKind::StrRaw(n) | LitKind::ByteStrRaw(n), .. + }), + .. } => Some(n), _ => None, }; @@ -300,11 +303,11 @@ impl<'a> Parser<'a> { err.set_primary_message("too many `#` when terminating raw string"); err.span_suggestion( self.token.span, - "Remove the extra `#`", + "remove the extra `#`", String::new(), Applicability::MachineApplicable, ); - err.note(&format!("The raw string started with {} `#`s", n_hashes)); + err.note(&format!("the raw string started with {} `#`s", n_hashes)); return true; } } diff --git a/src/test/ui/parser/raw/raw-byte-string-eof.stderr b/src/test/ui/parser/raw/raw-byte-string-eof.stderr index 81344841c27..a76668e8051 100644 --- a/src/test/ui/parser/raw/raw-byte-string-eof.stderr +++ b/src/test/ui/parser/raw/raw-byte-string-eof.stderr @@ -2,7 +2,7 @@ error[E0748]: unterminated raw string --> $DIR/raw-byte-string-eof.rs:2:5 | LL | br##"a"#; - | ^ - help: you might have intended to terminate the string here: `##` + | ^ - help: consider terminating the string here: `##` | | | unterminated raw string | diff --git a/src/test/ui/parser/raw/raw-str-unbalanced.stderr b/src/test/ui/parser/raw/raw-str-unbalanced.stderr index 891f1d6337c..bf8f3a7a5a4 100644 --- a/src/test/ui/parser/raw/raw-str-unbalanced.stderr +++ b/src/test/ui/parser/raw/raw-str-unbalanced.stderr @@ -2,9 +2,9 @@ error: too many `#` when terminating raw string --> $DIR/raw-str-unbalanced.rs:3:9 | LL | "## - | ^ help: Remove the extra `#` + | ^ help: remove the extra `#` | - = note: The raw string started with 1 `#`s + = note: the raw string started with 1 `#`s error: aborting due to previous error diff --git a/src/test/ui/parser/raw/raw_string.stderr b/src/test/ui/parser/raw/raw_string.stderr index e91a16bedc4..cc0eb492700 100644 --- a/src/test/ui/parser/raw/raw_string.stderr +++ b/src/test/ui/parser/raw/raw_string.stderr @@ -2,7 +2,7 @@ error[E0748]: unterminated raw string --> $DIR/raw_string.rs:2:13 | LL | let x = r##"lol"#; - | ^ - help: you might have intended to terminate the string here: `##` + | ^ - help: consider terminating the string here: `##` | | | unterminated raw string |