diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index c3995c7776f..aaeb1bb9bff 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -3,8 +3,7 @@ use crate::ast::{self, LitKind, MetaItemLit, StrStyle}; use crate::token::{self, Token}; use rustc_lexer::unescape::{ - byte_from_char, unescape_byte, unescape_c_string, unescape_char, unescape_literal, MixedUnit, - Mode, + byte_from_char, unescape_byte, unescape_char, unescape_mixed, unescape_unicode, MixedUnit, Mode, }; use rustc_span::symbol::{kw, sym, Symbol}; use rustc_span::Span; @@ -85,7 +84,7 @@ pub fn from_token_lit(lit: token::Lit) -> Result { // Force-inlining here is aggressive but the closure is // called on every char in the string, so it can be hot in // programs with many long strings containing escapes. - unescape_literal( + unescape_unicode( s, Mode::Str, &mut #[inline(always)] @@ -109,7 +108,7 @@ pub fn from_token_lit(lit: token::Lit) -> Result { token::ByteStr => { let s = symbol.as_str(); let mut buf = Vec::with_capacity(s.len()); - unescape_literal(s, Mode::ByteStr, &mut |_, c| match c { + unescape_unicode(s, Mode::ByteStr, &mut |_, c| match c { Ok(c) => buf.push(byte_from_char(c)), Err(err) => { assert!(!err.is_fatal(), "failed to unescape string literal") @@ -126,7 +125,7 @@ pub fn from_token_lit(lit: token::Lit) -> Result { token::CStr => { let s = symbol.as_str(); let mut buf = Vec::with_capacity(s.len()); - unescape_c_string(s, Mode::CStr, &mut |_span, c| match c { + unescape_mixed(s, Mode::CStr, &mut |_span, c| match c { Ok(MixedUnit::Char(c)) => { buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes()) } diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index e4cf7439b97..4da6d35727c 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -80,12 +80,12 @@ pub fn is_fatal(&self) -> bool { } } -/// Takes a contents of a literal (without quotes) and produces a sequence of -/// escaped characters or errors. +/// Takes the contents of a unicode-only (non-mixed-utf8) literal (without +/// quotes) and produces a sequence of escaped characters or errors. /// /// Values are returned by invoking `callback`. For `Char` and `Byte` modes, /// the callback will be called exactly once. -pub fn unescape_literal(src: &str, mode: Mode, callback: &mut F) +pub fn unescape_unicode(src: &str, mode: Mode, callback: &mut F) where F: FnMut(Range, Result), { @@ -132,7 +132,11 @@ fn from(n: u8) -> Self { } } -pub fn unescape_c_string(src: &str, mode: Mode, callback: &mut F) +/// Takes the contents of a mixed-utf8 literal (without quotes) and produces +/// a sequence of escaped characters or errors. +/// +/// Values are returned by invoking `callback`. +pub fn unescape_mixed(src: &str, mode: Mode, callback: &mut F) where F: FnMut(Range, Result), { diff --git a/compiler/rustc_lexer/src/unescape/tests.rs b/compiler/rustc_lexer/src/unescape/tests.rs index 1c25b03fdb2..5b99495f475 100644 --- a/compiler/rustc_lexer/src/unescape/tests.rs +++ b/compiler/rustc_lexer/src/unescape/tests.rs @@ -100,7 +100,7 @@ fn check(literal_text: &str, expected_char: char) { fn test_unescape_str_warn() { fn check(literal: &str, expected: &[(Range, Result)]) { let mut unescaped = Vec::with_capacity(literal.len()); - unescape_literal(literal, Mode::Str, &mut |range, res| unescaped.push((range, res))); + unescape_unicode(literal, Mode::Str, &mut |range, res| unescaped.push((range, res))); assert_eq!(unescaped, expected); } @@ -124,7 +124,7 @@ fn check(literal: &str, expected: &[(Range, Result)]) fn test_unescape_str_good() { fn check(literal_text: &str, expected: &str) { let mut buf = Ok(String::with_capacity(literal_text.len())); - unescape_literal(literal_text, Mode::Str, &mut |range, c| { + unescape_unicode(literal_text, Mode::Str, &mut |range, c| { if let Ok(b) = &mut buf { match c { Ok(c) => b.push(c), @@ -241,7 +241,7 @@ fn check(literal_text: &str, expected_byte: u8) { fn test_unescape_byte_str_good() { fn check(literal_text: &str, expected: &[u8]) { let mut buf = Ok(Vec::with_capacity(literal_text.len())); - unescape_literal(literal_text, Mode::ByteStr, &mut |range, c| { + unescape_unicode(literal_text, Mode::ByteStr, &mut |range, c| { if let Ok(b) = &mut buf { match c { Ok(c) => b.push(byte_from_char(c)), @@ -264,7 +264,7 @@ fn check(literal_text: &str, expected: &[u8]) { fn test_unescape_raw_str() { fn check(literal: &str, expected: &[(Range, Result)]) { let mut unescaped = Vec::with_capacity(literal.len()); - unescape_literal(literal, Mode::RawStr, &mut |range, res| unescaped.push((range, res))); + unescape_unicode(literal, Mode::RawStr, &mut |range, res| unescaped.push((range, res))); assert_eq!(unescaped, expected); } @@ -276,7 +276,7 @@ fn check(literal: &str, expected: &[(Range, Result)]) fn test_unescape_raw_byte_str() { fn check(literal: &str, expected: &[(Range, Result)]) { let mut unescaped = Vec::with_capacity(literal.len()); - unescape_literal(literal, Mode::RawByteStr, &mut |range, res| unescaped.push((range, res))); + unescape_unicode(literal, Mode::RawByteStr, &mut |range, res| unescaped.push((range, res))); assert_eq!(unescaped, expected); } diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index d7ecf577ed6..a491d1969bd 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -400,7 +400,7 @@ fn cook_lexer_literal( .with_code(error_code!(E0762)) .emit() } - self.cook_quoted(token::Char, Mode::Char, start, end, 1, 1) // ' ' + self.cook_unicode(token::Char, Mode::Char, start, end, 1, 1) // ' ' } rustc_lexer::LiteralKind::Byte { terminated } => { if !terminated { @@ -412,7 +412,7 @@ fn cook_lexer_literal( .with_code(error_code!(E0763)) .emit() } - self.cook_quoted(token::Byte, Mode::Byte, start, end, 2, 1) // b' ' + self.cook_unicode(token::Byte, Mode::Byte, start, end, 2, 1) // b' ' } rustc_lexer::LiteralKind::Str { terminated } => { if !terminated { @@ -424,7 +424,7 @@ fn cook_lexer_literal( .with_code(error_code!(E0765)) .emit() } - self.cook_quoted(token::Str, Mode::Str, start, end, 1, 1) // " " + self.cook_unicode(token::Str, Mode::Str, start, end, 1, 1) // " " } rustc_lexer::LiteralKind::ByteStr { terminated } => { if !terminated { @@ -436,7 +436,7 @@ fn cook_lexer_literal( .with_code(error_code!(E0766)) .emit() } - self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" " + self.cook_unicode(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" " } rustc_lexer::LiteralKind::CStr { terminated } => { if !terminated { @@ -448,13 +448,13 @@ fn cook_lexer_literal( .with_code(error_code!(E0767)) .emit() } - self.cook_c_string(token::CStr, Mode::CStr, start, end, 2, 1) // c" " + self.cook_mixed(token::CStr, Mode::CStr, start, end, 2, 1) // c" " } rustc_lexer::LiteralKind::RawStr { n_hashes } => { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); let kind = token::StrRaw(n_hashes); - self.cook_quoted(kind, Mode::RawStr, start, end, 2 + n, 1 + n) // r##" "## + self.cook_unicode(kind, Mode::RawStr, start, end, 2 + n, 1 + n) // r##" "## } else { self.report_raw_str_error(start, 1); } @@ -463,7 +463,7 @@ fn cook_lexer_literal( if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); let kind = token::ByteStrRaw(n_hashes); - self.cook_quoted(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n) // br##" "## + self.cook_unicode(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n) // br##" "## } else { self.report_raw_str_error(start, 2); } @@ -472,7 +472,7 @@ fn cook_lexer_literal( if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); let kind = token::CStrRaw(n_hashes); - self.cook_c_string(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "## + self.cook_mixed(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "## } else { self.report_raw_str_error(start, 2); } @@ -735,7 +735,7 @@ fn cook_common( } } - fn cook_quoted( + fn cook_unicode( &self, kind: token::LitKind, mode: Mode, @@ -745,13 +745,13 @@ fn cook_quoted( postfix_len: u32, ) -> (token::LitKind, Symbol) { self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { - unescape::unescape_literal(src, mode, &mut |span, result| { + unescape::unescape_unicode(src, mode, &mut |span, result| { callback(span, result.map(drop)) }) }) } - fn cook_c_string( + fn cook_mixed( &self, kind: token::LitKind, mode: Mode, @@ -761,7 +761,7 @@ fn cook_c_string( postfix_len: u32, ) -> (token::LitKind, Symbol) { self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { - unescape::unescape_c_string(src, mode, &mut |span, result| { + unescape::unescape_mixed(src, mode, &mut |span, result| { callback(span, result.map(drop)) }) }) diff --git a/compiler/rustc_parse_format/src/lib.rs b/compiler/rustc_parse_format/src/lib.rs index 625764876a6..d76ee161da6 100644 --- a/compiler/rustc_parse_format/src/lib.rs +++ b/compiler/rustc_parse_format/src/lib.rs @@ -1056,7 +1056,7 @@ fn find_width_map_from_snippet( fn unescape_string(string: &str) -> Option { let mut buf = string::String::new(); let mut ok = true; - unescape::unescape_literal(string, unescape::Mode::Str, &mut |_, unescaped_char| { + unescape::unescape_unicode(string, unescape::Mode::Str, &mut |_, unescaped_char| { match unescaped_char { Ok(c) => buf.push(c), Err(_) => ok = false, diff --git a/src/tools/clippy/clippy_dev/src/update_lints.rs b/src/tools/clippy/clippy_dev/src/update_lints.rs index 6b76a44debf..f598f5d3d50 100644 --- a/src/tools/clippy/clippy_dev/src/update_lints.rs +++ b/src/tools/clippy/clippy_dev/src/update_lints.rs @@ -928,7 +928,7 @@ fn remove_line_splices(s: &str) -> String { .and_then(|s| s.strip_suffix('"')) .unwrap_or_else(|| panic!("expected quoted string, found `{s}`")); let mut res = String::with_capacity(s.len()); - unescape::unescape_literal(s, unescape::Mode::Str, &mut |range, ch| { + unescape::unescape_unicode(s, unescape::Mode::Str, &mut |range, ch| { if ch.is_ok() { res.push_str(&s[range]); } diff --git a/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs b/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs index aa25f82ae1d..bf1feb9a7eb 100644 --- a/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs +++ b/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs @@ -379,14 +379,14 @@ fn unescape_string_error_message(text: &str, mode: Mode) -> &'static str { let mut error_message = ""; match mode { Mode::CStr => { - rustc_lexer::unescape::unescape_c_string(text, mode, &mut |_, res| { + rustc_lexer::unescape::unescape_mixed(text, mode, &mut |_, res| { if let Err(e) = res { error_message = error_to_diagnostic_message(e, mode); } }); } Mode::ByteStr | Mode::Str => { - rustc_lexer::unescape::unescape_literal(text, mode, &mut |_, res| { + rustc_lexer::unescape::unescape_unicode(text, mode, &mut |_, res| { if let Err(e) = res { error_message = error_to_diagnostic_message(e, mode); } diff --git a/src/tools/rust-analyzer/crates/syntax/src/ast/token_ext.rs b/src/tools/rust-analyzer/crates/syntax/src/ast/token_ext.rs index 2f75e9677ec..7cd1f1550b9 100644 --- a/src/tools/rust-analyzer/crates/syntax/src/ast/token_ext.rs +++ b/src/tools/rust-analyzer/crates/syntax/src/ast/token_ext.rs @@ -6,7 +6,7 @@ }; use rustc_lexer::unescape::{ - unescape_byte, unescape_c_string, unescape_char, unescape_literal, MixedUnit, Mode, + unescape_byte, unescape_char, unescape_mixed, unescape_unicode, MixedUnit, Mode, }; use crate::{ @@ -193,7 +193,7 @@ fn escaped_char_ranges( let text = &self.text()[text_range_no_quotes - start]; let offset = text_range_no_quotes.start() - start; - unescape_literal(text, Self::MODE, &mut |range, unescaped_char| { + unescape_unicode(text, Self::MODE, &mut |range, unescaped_char| { let text_range = TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap()); cb(text_range + offset, unescaped_char); @@ -226,7 +226,7 @@ pub fn value(&self) -> Option> { let mut buf = String::new(); let mut prev_end = 0; let mut has_error = false; - unescape_literal(text, Self::MODE, &mut |char_range, unescaped_char| match ( + unescape_unicode(text, Self::MODE, &mut |char_range, unescaped_char| match ( unescaped_char, buf.capacity() == 0, ) { @@ -270,7 +270,7 @@ pub fn value(&self) -> Option> { let mut buf: Vec = Vec::new(); let mut prev_end = 0; let mut has_error = false; - unescape_literal(text, Self::MODE, &mut |char_range, unescaped_char| match ( + unescape_unicode(text, Self::MODE, &mut |char_range, unescaped_char| match ( unescaped_char, buf.capacity() == 0, ) { @@ -311,7 +311,7 @@ fn escaped_char_ranges( let text = &self.text()[text_range_no_quotes - start]; let offset = text_range_no_quotes.start() - start; - unescape_c_string(text, Self::MODE, &mut |range, unescaped_char| { + unescape_mixed(text, Self::MODE, &mut |range, unescaped_char| { let text_range = TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap()); // XXX: This method should only be used for highlighting ranges. The unescaped @@ -340,7 +340,7 @@ pub fn value(&self) -> Option> { MixedUnit::Char(c) => buf.extend(c.encode_utf8(&mut [0; 4]).as_bytes()), MixedUnit::HighByte(b) => buf.push(b), }; - unescape_c_string(text, Self::MODE, &mut |char_range, unescaped| match ( + unescape_mixed(text, Self::MODE, &mut |char_range, unescaped| match ( unescaped, buf.capacity() == 0, ) { diff --git a/src/tools/rust-analyzer/crates/syntax/src/validation.rs b/src/tools/rust-analyzer/crates/syntax/src/validation.rs index fadcbaef143..5c5b26f525f 100644 --- a/src/tools/rust-analyzer/crates/syntax/src/validation.rs +++ b/src/tools/rust-analyzer/crates/syntax/src/validation.rs @@ -5,7 +5,7 @@ mod block; use rowan::Direction; -use rustc_lexer::unescape::{self, unescape_c_string, unescape_literal, Mode}; +use rustc_lexer::unescape::{self, unescape_mixed, unescape_unicode, Mode}; use crate::{ algo, @@ -140,7 +140,7 @@ fn unquote(text: &str, prefix_len: usize, end_delimiter: char) -> Option<&str> { ast::LiteralKind::String(s) => { if !s.is_raw() { if let Some(without_quotes) = unquote(text, 1, '"') { - unescape_literal(without_quotes, Mode::Str, &mut |range, char| { + unescape_unicode(without_quotes, Mode::Str, &mut |range, char| { if let Err(err) = char { push_err(1, range.start, err); } @@ -151,7 +151,7 @@ fn unquote(text: &str, prefix_len: usize, end_delimiter: char) -> Option<&str> { ast::LiteralKind::ByteString(s) => { if !s.is_raw() { if let Some(without_quotes) = unquote(text, 2, '"') { - unescape_literal(without_quotes, Mode::ByteStr, &mut |range, char| { + unescape_unicode(without_quotes, Mode::ByteStr, &mut |range, char| { if let Err(err) = char { push_err(1, range.start, err); } @@ -162,7 +162,7 @@ fn unquote(text: &str, prefix_len: usize, end_delimiter: char) -> Option<&str> { ast::LiteralKind::CString(s) => { if !s.is_raw() { if let Some(without_quotes) = unquote(text, 2, '"') { - unescape_c_string(without_quotes, Mode::CStr, &mut |range, char| { + unescape_mixed(without_quotes, Mode::CStr, &mut |range, char| { if let Err(err) = char { push_err(1, range.start, err); } @@ -172,7 +172,7 @@ fn unquote(text: &str, prefix_len: usize, end_delimiter: char) -> Option<&str> { } ast::LiteralKind::Char(_) => { if let Some(without_quotes) = unquote(text, 1, '\'') { - unescape_literal(without_quotes, Mode::Char, &mut |range, char| { + unescape_unicode(without_quotes, Mode::Char, &mut |range, char| { if let Err(err) = char { push_err(1, range.start, err); } @@ -181,7 +181,7 @@ fn unquote(text: &str, prefix_len: usize, end_delimiter: char) -> Option<&str> { } ast::LiteralKind::Byte(_) => { if let Some(without_quotes) = unquote(text, 2, '\'') { - unescape_literal(without_quotes, Mode::Byte, &mut |range, char| { + unescape_unicode(without_quotes, Mode::Byte, &mut |range, char| { if let Err(err) = char { push_err(2, range.start, err); }