Auto merge of #111524 - scottmcm:escape-using-ascii, r=cuviper
`ascii::Char`-ify the escaping code in `core` This means that `EscapeIterInner::as_str` no longer needs unsafe code, because the type system ensures the internal buffer is only ASCII, and thus valid UTF-8. Come to think of it, this also gives it a (non-guaranteed) niche. cc `@BurntSushi` as potentially interested `ascii::Char` tracking issue: #110998
This commit is contained in:
commit
e86fd62b6b
@ -91,7 +91,7 @@ pub struct EscapeDefault(escape::EscapeIterInner<4>);
|
|||||||
/// ```
|
/// ```
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
pub fn escape_default(c: u8) -> EscapeDefault {
|
pub fn escape_default(c: u8) -> EscapeDefault {
|
||||||
let mut data = [0; 4];
|
let mut data = [Char::Null; 4];
|
||||||
let range = escape::escape_ascii_into(&mut data, c);
|
let range = escape::escape_ascii_into(&mut data, c);
|
||||||
EscapeDefault(escape::EscapeIterInner::new(data, range))
|
EscapeDefault(escape::EscapeIterInner::new(data, range))
|
||||||
}
|
}
|
||||||
|
@ -392,13 +392,13 @@ impl char {
|
|||||||
#[inline]
|
#[inline]
|
||||||
pub(crate) fn escape_debug_ext(self, args: EscapeDebugExtArgs) -> EscapeDebug {
|
pub(crate) fn escape_debug_ext(self, args: EscapeDebugExtArgs) -> EscapeDebug {
|
||||||
match self {
|
match self {
|
||||||
'\0' => EscapeDebug::backslash(b'0'),
|
'\0' => EscapeDebug::backslash(ascii::Char::Digit0),
|
||||||
'\t' => EscapeDebug::backslash(b't'),
|
'\t' => EscapeDebug::backslash(ascii::Char::SmallT),
|
||||||
'\r' => EscapeDebug::backslash(b'r'),
|
'\r' => EscapeDebug::backslash(ascii::Char::SmallR),
|
||||||
'\n' => EscapeDebug::backslash(b'n'),
|
'\n' => EscapeDebug::backslash(ascii::Char::SmallN),
|
||||||
'\\' => EscapeDebug::backslash(b'\\'),
|
'\\' => EscapeDebug::backslash(ascii::Char::ReverseSolidus),
|
||||||
'"' if args.escape_double_quote => EscapeDebug::backslash(b'"'),
|
'\"' if args.escape_double_quote => EscapeDebug::backslash(ascii::Char::QuotationMark),
|
||||||
'\'' if args.escape_single_quote => EscapeDebug::backslash(b'\''),
|
'\'' if args.escape_single_quote => EscapeDebug::backslash(ascii::Char::Apostrophe),
|
||||||
_ if args.escape_grapheme_extended && self.is_grapheme_extended() => {
|
_ if args.escape_grapheme_extended && self.is_grapheme_extended() => {
|
||||||
EscapeDebug::from_unicode(self.escape_unicode())
|
EscapeDebug::from_unicode(self.escape_unicode())
|
||||||
}
|
}
|
||||||
@ -503,11 +503,11 @@ impl char {
|
|||||||
#[inline]
|
#[inline]
|
||||||
pub fn escape_default(self) -> EscapeDefault {
|
pub fn escape_default(self) -> EscapeDefault {
|
||||||
match self {
|
match self {
|
||||||
'\t' => EscapeDefault::backslash(b't'),
|
'\t' => EscapeDefault::backslash(ascii::Char::SmallT),
|
||||||
'\r' => EscapeDefault::backslash(b'r'),
|
'\r' => EscapeDefault::backslash(ascii::Char::SmallR),
|
||||||
'\n' => EscapeDefault::backslash(b'n'),
|
'\n' => EscapeDefault::backslash(ascii::Char::SmallN),
|
||||||
'\\' | '\'' | '"' => EscapeDefault::backslash(self as u8),
|
'\\' | '\'' | '"' => EscapeDefault::backslash(self.as_ascii().unwrap()),
|
||||||
'\x20'..='\x7e' => EscapeDefault::printable(self as u8),
|
'\x20'..='\x7e' => EscapeDefault::printable(self.as_ascii().unwrap()),
|
||||||
_ => EscapeDefault::from_unicode(self.escape_unicode()),
|
_ => EscapeDefault::from_unicode(self.escape_unicode()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -38,6 +38,7 @@ pub use self::methods::encode_utf16_raw;
|
|||||||
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
|
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
|
||||||
pub use self::methods::encode_utf8_raw;
|
pub use self::methods::encode_utf8_raw;
|
||||||
|
|
||||||
|
use crate::ascii;
|
||||||
use crate::error::Error;
|
use crate::error::Error;
|
||||||
use crate::escape;
|
use crate::escape;
|
||||||
use crate::fmt::{self, Write};
|
use crate::fmt::{self, Write};
|
||||||
@ -152,7 +153,7 @@ pub struct EscapeUnicode(escape::EscapeIterInner<10>);
|
|||||||
|
|
||||||
impl EscapeUnicode {
|
impl EscapeUnicode {
|
||||||
fn new(chr: char) -> Self {
|
fn new(chr: char) -> Self {
|
||||||
let mut data = [0; 10];
|
let mut data = [ascii::Char::Null; 10];
|
||||||
let range = escape::escape_unicode_into(&mut data, chr);
|
let range = escape::escape_unicode_into(&mut data, chr);
|
||||||
Self(escape::EscapeIterInner::new(data, range))
|
Self(escape::EscapeIterInner::new(data, range))
|
||||||
}
|
}
|
||||||
@ -218,14 +219,14 @@ impl fmt::Display for EscapeUnicode {
|
|||||||
pub struct EscapeDefault(escape::EscapeIterInner<10>);
|
pub struct EscapeDefault(escape::EscapeIterInner<10>);
|
||||||
|
|
||||||
impl EscapeDefault {
|
impl EscapeDefault {
|
||||||
fn printable(chr: u8) -> Self {
|
fn printable(chr: ascii::Char) -> Self {
|
||||||
let data = [chr, 0, 0, 0, 0, 0, 0, 0, 0, 0];
|
let data = [chr];
|
||||||
Self(escape::EscapeIterInner::new(data, 0..1))
|
Self(escape::EscapeIterInner::from_array(data))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn backslash(chr: u8) -> Self {
|
fn backslash(chr: ascii::Char) -> Self {
|
||||||
let data = [b'\\', chr, 0, 0, 0, 0, 0, 0, 0, 0];
|
let data = [ascii::Char::ReverseSolidus, chr];
|
||||||
Self(escape::EscapeIterInner::new(data, 0..2))
|
Self(escape::EscapeIterInner::from_array(data))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn from_unicode(esc: EscapeUnicode) -> Self {
|
fn from_unicode(esc: EscapeUnicode) -> Self {
|
||||||
@ -307,9 +308,9 @@ impl EscapeDebug {
|
|||||||
Self(EscapeDebugInner::Char(chr))
|
Self(EscapeDebugInner::Char(chr))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn backslash(chr: u8) -> Self {
|
fn backslash(chr: ascii::Char) -> Self {
|
||||||
let data = [b'\\', chr, 0, 0, 0, 0, 0, 0, 0, 0];
|
let data = [ascii::Char::ReverseSolidus, chr];
|
||||||
let iter = escape::EscapeIterInner::new(data, 0..2);
|
let iter = escape::EscapeIterInner::from_array(data);
|
||||||
Self(EscapeDebugInner::Bytes(iter))
|
Self(EscapeDebugInner::Bytes(iter))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -318,7 +319,7 @@ impl EscapeDebug {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn clear(&mut self) {
|
fn clear(&mut self) {
|
||||||
let bytes = escape::EscapeIterInner::new([0; 10], 0..0);
|
let bytes = escape::EscapeIterInner::from_array([]);
|
||||||
self.0 = EscapeDebugInner::Bytes(bytes);
|
self.0 = EscapeDebugInner::Bytes(bytes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,34 +1,41 @@
|
|||||||
//! Helper code for character escaping.
|
//! Helper code for character escaping.
|
||||||
|
|
||||||
|
use crate::ascii;
|
||||||
use crate::num::NonZeroUsize;
|
use crate::num::NonZeroUsize;
|
||||||
use crate::ops::Range;
|
use crate::ops::Range;
|
||||||
|
|
||||||
const HEX_DIGITS: [u8; 16] = *b"0123456789abcdef";
|
const HEX_DIGITS: [ascii::Char; 16] = *b"0123456789abcdef".as_ascii().unwrap();
|
||||||
|
|
||||||
/// Escapes a byte into provided buffer; returns length of escaped
|
/// Escapes a byte into provided buffer; returns length of escaped
|
||||||
/// representation.
|
/// representation.
|
||||||
pub(crate) fn escape_ascii_into(output: &mut [u8; 4], byte: u8) -> Range<u8> {
|
pub(crate) fn escape_ascii_into(output: &mut [ascii::Char; 4], byte: u8) -> Range<u8> {
|
||||||
|
#[inline]
|
||||||
|
fn backslash(a: ascii::Char) -> ([ascii::Char; 4], u8) {
|
||||||
|
([ascii::Char::ReverseSolidus, a, ascii::Char::Null, ascii::Char::Null], 2)
|
||||||
|
}
|
||||||
|
|
||||||
let (data, len) = match byte {
|
let (data, len) = match byte {
|
||||||
b'\t' => ([b'\\', b't', 0, 0], 2),
|
b'\t' => backslash(ascii::Char::SmallT),
|
||||||
b'\r' => ([b'\\', b'r', 0, 0], 2),
|
b'\r' => backslash(ascii::Char::SmallR),
|
||||||
b'\n' => ([b'\\', b'n', 0, 0], 2),
|
b'\n' => backslash(ascii::Char::SmallN),
|
||||||
b'\\' => ([b'\\', b'\\', 0, 0], 2),
|
b'\\' => backslash(ascii::Char::ReverseSolidus),
|
||||||
b'\'' => ([b'\\', b'\'', 0, 0], 2),
|
b'\'' => backslash(ascii::Char::Apostrophe),
|
||||||
b'"' => ([b'\\', b'"', 0, 0], 2),
|
b'\"' => backslash(ascii::Char::QuotationMark),
|
||||||
b'\x20'..=b'\x7e' => ([byte, 0, 0, 0], 1),
|
_ => if let Some(a) = byte.as_ascii() && !byte.is_ascii_control() {
|
||||||
_ => {
|
([a, ascii::Char::Null, ascii::Char::Null, ascii::Char::Null], 1)
|
||||||
|
} else {
|
||||||
let hi = HEX_DIGITS[usize::from(byte >> 4)];
|
let hi = HEX_DIGITS[usize::from(byte >> 4)];
|
||||||
let lo = HEX_DIGITS[usize::from(byte & 0xf)];
|
let lo = HEX_DIGITS[usize::from(byte & 0xf)];
|
||||||
([b'\\', b'x', hi, lo], 4)
|
([ascii::Char::ReverseSolidus, ascii::Char::SmallX, hi, lo], 4)
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
*output = data;
|
*output = data;
|
||||||
0..(len as u8)
|
0..len
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Escapes a character into provided buffer using `\u{NNNN}` representation.
|
/// Escapes a character into provided buffer using `\u{NNNN}` representation.
|
||||||
pub(crate) fn escape_unicode_into(output: &mut [u8; 10], ch: char) -> Range<u8> {
|
pub(crate) fn escape_unicode_into(output: &mut [ascii::Char; 10], ch: char) -> Range<u8> {
|
||||||
output[9] = b'}';
|
output[9] = ascii::Char::RightCurlyBracket;
|
||||||
|
|
||||||
let ch = ch as u32;
|
let ch = ch as u32;
|
||||||
output[3] = HEX_DIGITS[((ch >> 20) & 15) as usize];
|
output[3] = HEX_DIGITS[((ch >> 20) & 15) as usize];
|
||||||
@ -41,7 +48,8 @@ pub(crate) fn escape_unicode_into(output: &mut [u8; 10], ch: char) -> Range<u8>
|
|||||||
// or-ing 1 ensures that for ch==0 the code computes that one digit should
|
// or-ing 1 ensures that for ch==0 the code computes that one digit should
|
||||||
// be printed.
|
// be printed.
|
||||||
let start = (ch | 1).leading_zeros() as usize / 4 - 2;
|
let start = (ch | 1).leading_zeros() as usize / 4 - 2;
|
||||||
output[start..start + 3].copy_from_slice(b"\\u{");
|
const UNICODE_ESCAPE_PREFIX: &[ascii::Char; 3] = b"\\u{".as_ascii().unwrap();
|
||||||
|
output[start..][..3].copy_from_slice(UNICODE_ESCAPE_PREFIX);
|
||||||
|
|
||||||
(start as u8)..10
|
(start as u8)..10
|
||||||
}
|
}
|
||||||
@ -52,29 +60,34 @@ pub(crate) fn escape_unicode_into(output: &mut [u8; 10], ch: char) -> Range<u8>
|
|||||||
/// limited to u8 to reduce size of the structure.
|
/// limited to u8 to reduce size of the structure.
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub(crate) struct EscapeIterInner<const N: usize> {
|
pub(crate) struct EscapeIterInner<const N: usize> {
|
||||||
// Invariant: data[alive] is all ASCII.
|
// The element type ensures this is always ASCII, and thus also valid UTF-8.
|
||||||
pub(crate) data: [u8; N],
|
pub(crate) data: [ascii::Char; N],
|
||||||
|
|
||||||
// Invariant: alive.start <= alive.end <= N.
|
// Invariant: alive.start <= alive.end <= N.
|
||||||
pub(crate) alive: Range<u8>,
|
pub(crate) alive: Range<u8>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<const N: usize> EscapeIterInner<N> {
|
impl<const N: usize> EscapeIterInner<N> {
|
||||||
pub fn new(data: [u8; N], alive: Range<u8>) -> Self {
|
pub fn new(data: [ascii::Char; N], alive: Range<u8>) -> Self {
|
||||||
const { assert!(N < 256) };
|
const { assert!(N < 256) };
|
||||||
debug_assert!(alive.start <= alive.end && usize::from(alive.end) <= N, "{alive:?}");
|
debug_assert!(alive.start <= alive.end && usize::from(alive.end) <= N, "{alive:?}");
|
||||||
let this = Self { data, alive };
|
Self { data, alive }
|
||||||
debug_assert!(this.as_bytes().is_ascii(), "Expected ASCII, got {:?}", this.as_bytes());
|
|
||||||
this
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn as_bytes(&self) -> &[u8] {
|
pub fn from_array<const M: usize>(array: [ascii::Char; M]) -> Self {
|
||||||
|
const { assert!(M <= N) };
|
||||||
|
|
||||||
|
let mut data = [ascii::Char::Null; N];
|
||||||
|
data[..M].copy_from_slice(&array);
|
||||||
|
Self::new(data, 0..M as u8)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn as_ascii(&self) -> &[ascii::Char] {
|
||||||
&self.data[usize::from(self.alive.start)..usize::from(self.alive.end)]
|
&self.data[usize::from(self.alive.start)..usize::from(self.alive.end)]
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn as_str(&self) -> &str {
|
pub fn as_str(&self) -> &str {
|
||||||
// SAFETY: self.data[self.alive] is all ASCII characters.
|
self.as_ascii().as_str()
|
||||||
unsafe { crate::str::from_utf8_unchecked(self.as_bytes()) }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn len(&self) -> usize {
|
pub fn len(&self) -> usize {
|
||||||
@ -82,11 +95,11 @@ impl<const N: usize> EscapeIterInner<N> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn next(&mut self) -> Option<u8> {
|
pub fn next(&mut self) -> Option<u8> {
|
||||||
self.alive.next().map(|i| self.data[usize::from(i)])
|
self.alive.next().map(|i| self.data[usize::from(i)].as_u8())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn next_back(&mut self) -> Option<u8> {
|
pub fn next_back(&mut self) -> Option<u8> {
|
||||||
self.alive.next_back().map(|i| self.data[usize::from(i)])
|
self.alive.next_back().map(|i| self.data[usize::from(i)].as_u8())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn advance_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
|
pub fn advance_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
|
||||||
|
@ -216,6 +216,7 @@
|
|||||||
#![feature(intra_doc_pointers)]
|
#![feature(intra_doc_pointers)]
|
||||||
#![feature(intrinsics)]
|
#![feature(intrinsics)]
|
||||||
#![feature(lang_items)]
|
#![feature(lang_items)]
|
||||||
|
#![feature(let_chains)]
|
||||||
#![feature(link_llvm_intrinsics)]
|
#![feature(link_llvm_intrinsics)]
|
||||||
#![feature(macro_metavar_expr)]
|
#![feature(macro_metavar_expr)]
|
||||||
#![feature(min_specialization)]
|
#![feature(min_specialization)]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user