From 6be2e5623cb7ae63ca1796759150c0cbc845bbcd Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 24 Jan 2024 16:00:10 +1100 Subject: [PATCH] Use `unescape_unicode` for raw C string literals. They can't contain `\x` escapes, which means they can't contain high bytes, which means we can used `unescape_unicode` instead of `unescape_mixed` to unescape them. This avoids unnecessary used of `MixedUnit`. --- compiler/rustc_lexer/src/unescape.rs | 33 ++++++++++++--------------- compiler/rustc_parse/src/lexer/mod.rs | 2 +- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index 4da6d35727c..03d178eb266 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -97,7 +97,13 @@ pub fn unescape_unicode(src: &str, mode: Mode, callback: &mut F) } Str | ByteStr => unescape_non_raw_common(src, mode, callback), RawStr | RawByteStr => check_raw_common(src, mode, callback), - CStr | RawCStr => unreachable!(), + RawCStr => check_raw_common(src, mode, &mut |r, mut result| { + if let Ok('\0') = result { + result = Err(EscapeError::NulInCStr); + } + callback(r, result) + }), + CStr => unreachable!(), } } @@ -141,24 +147,13 @@ pub fn unescape_mixed(src: &str, mode: Mode, callback: &mut F) F: FnMut(Range, Result), { match mode { - CStr => { - unescape_non_raw_common(src, mode, &mut |r, mut result| { - if let Ok(MixedUnit::Char('\0')) = result { - result = Err(EscapeError::NulInCStr); - } - callback(r, result) - }); - } - RawCStr => { - check_raw_common(src, mode, &mut |r, mut result| { - if let Ok('\0') = result { - result = Err(EscapeError::NulInCStr); - } - // High bytes aren't possible in raw strings. - callback(r, result.map(MixedUnit::Char)) - }); - } - Char | Byte | Str | RawStr | ByteStr | RawByteStr => unreachable!(), + CStr => unescape_non_raw_common(src, mode, &mut |r, mut result| { + if let Ok(MixedUnit::Char('\0')) = result { + result = Err(EscapeError::NulInCStr); + } + callback(r, result) + }), + Char | Byte | Str | RawStr | ByteStr | RawByteStr | RawCStr => unreachable!(), } } diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index a491d1969bd..20ec4a300c1 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -472,7 +472,7 @@ fn cook_lexer_literal( if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); let kind = token::CStrRaw(n_hashes); - self.cook_mixed(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "## + self.cook_unicode(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "## } else { self.report_raw_str_error(start, 2); }