Rework CStrUnit.

- Rename it as `MixedUnit`, because it will soon be used in more than
  just C string literals.
- Change the `Byte` variant to `HighByte` and use it only for
  `\x80`..`\xff` cases. This fixes the old inexactness where ASCII chars
  could be encoded with either `Byte` or `Char`.
- Add useful comments.
- Remove `is_ascii`, in favour of `u8::is_ascii`.
This commit is contained in:
Nicholas Nethercote 2024-01-23 12:27:56 +11:00
parent 85d56eeb63
commit 56514076ac

View File

@ -6,7 +6,7 @@
};
use rustc_lexer::unescape::{
unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit, Mode,
unescape_byte, unescape_c_string, unescape_char, unescape_literal, MixedUnit, Mode,
};
use crate::{
@ -336,10 +336,9 @@ pub fn value(&self) -> Option<Cow<'_, [u8]>> {
let mut buf = Vec::new();
let mut prev_end = 0;
let mut has_error = false;
let mut char_buf = [0u8; 4];
let mut extend_unit = |buf: &mut Vec<u8>, unit: CStrUnit| match unit {
CStrUnit::Byte(b) => buf.push(b),
CStrUnit::Char(c) => buf.extend(c.encode_utf8(&mut char_buf).as_bytes()),
let extend_unit = |buf: &mut Vec<u8>, unit: MixedUnit| match unit {
MixedUnit::Char(c) => buf.extend(c.encode_utf8(&mut [0; 4]).as_bytes()),
MixedUnit::HighByte(b) => buf.push(b),
};
unescape_c_string(text, Self::MODE, &mut |char_range, unescaped| match (
unescaped,