From 56514076ac93e1417c8b4d15e3ce21da0dc6353f Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 23 Jan 2024 12:27:56 +1100 Subject: [PATCH] Rework `CStrUnit`. - Rename it as `MixedUnit`, because it will soon be used in more than just C string literals. - Change the `Byte` variant to `HighByte` and use it only for `\x80`..`\xff` cases. This fixes the old inexactness where ASCII chars could be encoded with either `Byte` or `Char`. - Add useful comments. - Remove `is_ascii`, in favour of `u8::is_ascii`. --- crates/syntax/src/ast/token_ext.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/crates/syntax/src/ast/token_ext.rs b/crates/syntax/src/ast/token_ext.rs index b39006e2ff2..2f75e9677ec 100644 --- a/crates/syntax/src/ast/token_ext.rs +++ b/crates/syntax/src/ast/token_ext.rs @@ -6,7 +6,7 @@ }; use rustc_lexer::unescape::{ - unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit, Mode, + unescape_byte, unescape_c_string, unescape_char, unescape_literal, MixedUnit, Mode, }; use crate::{ @@ -336,10 +336,9 @@ pub fn value(&self) -> Option> { let mut buf = Vec::new(); let mut prev_end = 0; let mut has_error = false; - let mut char_buf = [0u8; 4]; - let mut extend_unit = |buf: &mut Vec, unit: CStrUnit| match unit { - CStrUnit::Byte(b) => buf.push(b), - CStrUnit::Char(c) => buf.extend(c.encode_utf8(&mut char_buf).as_bytes()), + let extend_unit = |buf: &mut Vec, unit: MixedUnit| match unit { + MixedUnit::Char(c) => buf.extend(c.encode_utf8(&mut [0; 4]).as_bytes()), + MixedUnit::HighByte(b) => buf.push(b), }; unescape_c_string(text, Self::MODE, &mut |char_range, unescaped| match ( unescaped,