Add the basic ascii::Char
type
This commit is contained in:
parent
831c9298c8
commit
8c781b0906
@ -101,6 +101,7 @@
|
||||
#![feature(array_into_iter_constructors)]
|
||||
#![feature(array_methods)]
|
||||
#![feature(array_windows)]
|
||||
#![feature(ascii_char)]
|
||||
#![feature(assert_matches)]
|
||||
#![feature(async_iterator)]
|
||||
#![feature(coerce_unsized)]
|
||||
|
@ -2526,6 +2526,15 @@ impl<T: fmt::Display + ?Sized> ToString for T {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(no_global_oom_handling))]
|
||||
#[unstable(feature = "ascii_char", issue = "110998")]
|
||||
impl ToString for core::ascii::Char {
|
||||
#[inline]
|
||||
fn to_string(&self) -> String {
|
||||
self.as_str().to_owned()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(no_global_oom_handling))]
|
||||
#[stable(feature = "char_to_string_specialization", since = "1.46.0")]
|
||||
impl ToString for char {
|
||||
|
34
library/core/src/array/ascii.rs
Normal file
34
library/core/src/array/ascii.rs
Normal file
@ -0,0 +1,34 @@
|
||||
use crate::ascii;
|
||||
|
||||
#[cfg(not(test))]
|
||||
impl<const N: usize> [u8; N] {
|
||||
/// Converts this array of bytes into a array of ASCII characters,
|
||||
/// or returns `None` if any of the characters is non-ASCII.
|
||||
#[unstable(feature = "ascii_char", issue = "110998")]
|
||||
#[must_use]
|
||||
#[inline]
|
||||
pub fn as_ascii(&self) -> Option<&[ascii::Char; N]> {
|
||||
if self.is_ascii() {
|
||||
// SAFETY: Just checked that it's ASCII
|
||||
Some(unsafe { self.as_ascii_unchecked() })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts this array of bytes into a array of ASCII characters,
|
||||
/// without checking whether they're valid.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Every byte in the array must be in `0..=127`, or else this is UB.
|
||||
#[unstable(feature = "ascii_char", issue = "110998")]
|
||||
#[must_use]
|
||||
#[inline]
|
||||
pub const unsafe fn as_ascii_unchecked(&self) -> &[ascii::Char; N] {
|
||||
let byte_ptr: *const [u8; N] = self;
|
||||
let ascii_ptr = byte_ptr as *const [ascii::Char; N];
|
||||
// SAFETY: The caller promised all the bytes are ASCII
|
||||
unsafe { &*ascii_ptr }
|
||||
}
|
||||
}
|
@ -17,6 +17,7 @@ use crate::ops::{
|
||||
};
|
||||
use crate::slice::{Iter, IterMut};
|
||||
|
||||
mod ascii;
|
||||
mod drain;
|
||||
mod equality;
|
||||
mod iter;
|
||||
|
@ -14,6 +14,10 @@ use crate::iter::FusedIterator;
|
||||
use crate::ops::Range;
|
||||
use crate::str::from_utf8_unchecked;
|
||||
|
||||
mod ascii_char;
|
||||
#[unstable(feature = "ascii_char", issue = "110998")]
|
||||
pub use ascii_char::AsciiChar as Char;
|
||||
|
||||
/// An iterator over the escaped version of a byte.
|
||||
///
|
||||
/// This `struct` is created by the [`escape_default`] function. See its
|
||||
|
565
library/core/src/ascii/ascii_char.rs
Normal file
565
library/core/src/ascii/ascii_char.rs
Normal file
@ -0,0 +1,565 @@
|
||||
//! This uses the name `AsciiChar`, even though it's not exposed that way right now,
|
||||
//! because it avoids a whole bunch of "are you sure you didn't mean `char`?"
|
||||
//! suggestions from rustc if you get anything slightly wrong in here, and overall
|
||||
//! helps with clarity as we're also referring to `char` intentionally in here.
|
||||
|
||||
use crate::fmt;
|
||||
use crate::mem::transmute;
|
||||
|
||||
/// One of the 128 Unicode characters from U+0000 through U+007F,
|
||||
/// often known as the [ASCII] subset.
|
||||
///
|
||||
/// Officially, this is the first [block] in Unicode, _Basic Latin_.
|
||||
/// For details, see the [*C0 Controls and Basic Latin*][chart] code chart.
|
||||
///
|
||||
/// This block was based on older 7-bit character code standards such as
|
||||
/// ANSI X3.4-1977, ISO 646-1973, and [NIST FIPS 1-2].
|
||||
///
|
||||
/// # When to use this
|
||||
///
|
||||
/// The main advantage of this subset is that it's always valid UTF-8. As such,
|
||||
/// the `&[ascii::Char]` -> `&str` conversion function (as well as other related
|
||||
/// ones) are O(1): *no* runtime checks are needed.
|
||||
///
|
||||
/// If you're consuming strings, you should usually handle Unicode and thus
|
||||
/// accept `str`s, not limit yourself to `ascii::Char`s.
|
||||
///
|
||||
/// However, certain formats are intentionally designed to produce ASCII-only
|
||||
/// output in order to be 8-bit-clean. In those cases, it can be simpler and
|
||||
/// faster to generate `ascii::Char`s instead of dealing with the variable width
|
||||
/// properties of general UTF-8 encoded strings, while still allowing the result
|
||||
/// to be used freely with other Rust things that deal in general `str`s.
|
||||
///
|
||||
/// For example, a UUID library might offer a way to produce the string
|
||||
/// representation of a UUID as an `[ascii::Char; 36]` to avoid memory
|
||||
/// allocation yet still allow it to be used as UTF-8 via `as_str` without
|
||||
/// paying for validation (or needing `unsafe` code) the way it would if it
|
||||
/// were provided as a `[u8; 36]`.
|
||||
///
|
||||
/// # Layout
|
||||
///
|
||||
/// This type is guaranteed to have a size and alignment of 1 byte.
|
||||
///
|
||||
/// # Names
|
||||
///
|
||||
/// The variants on this type are [Unicode names][NamesList] of the characters
|
||||
/// in upper camel case, with a few tweaks:
|
||||
/// - For `<control>` characters, the primary alias name is used.
|
||||
/// - `LATIN` is dropped, as this block has no non-latin letters.
|
||||
/// - `LETTER` is dropped, as `CAPITAL`/`SMALL` suffices in this block.
|
||||
/// - `DIGIT`s use a single digit rather than writing out `ZERO`, `ONE`, etc.
|
||||
///
|
||||
/// [ASCII]: https://www.unicode.org/glossary/index.html#ASCII
|
||||
/// [block]: https://www.unicode.org/glossary/index.html#block
|
||||
/// [chart]: https://www.unicode.org/charts/PDF/U0000.pdf
|
||||
/// [NIST FIPS 1-2]: https://nvlpubs.nist.gov/nistpubs/Legacy/FIPS/fipspub1-2-1977.pdf
|
||||
/// [NamesList]: https://www.unicode.org/Public/15.0.0/ucd/NamesList.txt
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
|
||||
#[unstable(feature = "ascii_char", issue = "110998")]
|
||||
#[repr(u8)]
|
||||
pub enum AsciiChar {
|
||||
/// U+0000
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Null = 0,
|
||||
/// U+0001
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
StartOfHeading = 1,
|
||||
/// U+0002
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
StartOfText = 2,
|
||||
/// U+0003
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
EndOfText = 3,
|
||||
/// U+0004
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
EndOfTransmission = 4,
|
||||
/// U+0005
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Enquiry = 5,
|
||||
/// U+0006
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Acknowledge = 6,
|
||||
/// U+0007
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Bell = 7,
|
||||
/// U+0008
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Backspace = 8,
|
||||
/// U+0009
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CharacterTabulation = 9,
|
||||
/// U+000A
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
LineFeed = 10,
|
||||
/// U+000B
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
LineTabulation = 11,
|
||||
/// U+000C
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
FormFeed = 12,
|
||||
/// U+000D
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CarriageReturn = 13,
|
||||
/// U+000E
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
ShiftOut = 14,
|
||||
/// U+000F
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
ShiftIn = 15,
|
||||
/// U+0010
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
DataLinkEscape = 16,
|
||||
/// U+0011
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
DeviceControlOne = 17,
|
||||
/// U+0012
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
DeviceControlTwo = 18,
|
||||
/// U+0013
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
DeviceControlThree = 19,
|
||||
/// U+0014
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
DeviceControlFour = 20,
|
||||
/// U+0015
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
NegativeAcknowledge = 21,
|
||||
/// U+0016
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
SynchronousIdle = 22,
|
||||
/// U+0017
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
EndOfTransmissionBlock = 23,
|
||||
/// U+0018
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Cancel = 24,
|
||||
/// U+0019
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
EndOfMedium = 25,
|
||||
/// U+001A
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Substitute = 26,
|
||||
/// U+001B
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Escape = 27,
|
||||
/// U+001C
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
InformationSeparatorFour = 28,
|
||||
/// U+001D
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
InformationSeparatorThree = 29,
|
||||
/// U+001E
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
InformationSeparatorTwo = 30,
|
||||
/// U+001F
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
InformationSeparatorOne = 31,
|
||||
/// U+0020
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Space = 32,
|
||||
/// U+0021
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
ExclamationMark = 33,
|
||||
/// U+0022
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
QuotationMark = 34,
|
||||
/// U+0023
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
NumberSign = 35,
|
||||
/// U+0024
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
DollarSign = 36,
|
||||
/// U+0025
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
PercentSign = 37,
|
||||
/// U+0026
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Ampersand = 38,
|
||||
/// U+0027
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Apostrophe = 39,
|
||||
/// U+0028
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
LeftParenthesis = 40,
|
||||
/// U+0029
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
RightParenthesis = 41,
|
||||
/// U+002A
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Asterisk = 42,
|
||||
/// U+002B
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
PlusSign = 43,
|
||||
/// U+002C
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Comma = 44,
|
||||
/// U+002D
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
HyphenMinus = 45,
|
||||
/// U+002E
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
FullStop = 46,
|
||||
/// U+002F
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Solidus = 47,
|
||||
/// U+0030
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Digit0 = 48,
|
||||
/// U+0031
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Digit1 = 49,
|
||||
/// U+0032
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Digit2 = 50,
|
||||
/// U+0033
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Digit3 = 51,
|
||||
/// U+0034
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Digit4 = 52,
|
||||
/// U+0035
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Digit5 = 53,
|
||||
/// U+0036
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Digit6 = 54,
|
||||
/// U+0037
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Digit7 = 55,
|
||||
/// U+0038
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Digit8 = 56,
|
||||
/// U+0039
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Digit9 = 57,
|
||||
/// U+003A
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Colon = 58,
|
||||
/// U+003B
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Semicolon = 59,
|
||||
/// U+003C
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
LessThanSign = 60,
|
||||
/// U+003D
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
EqualsSign = 61,
|
||||
/// U+003E
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
GreaterThanSign = 62,
|
||||
/// U+003F
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
QuestionMark = 63,
|
||||
/// U+0040
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CommercialAt = 64,
|
||||
/// U+0041
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CapitalA = 65,
|
||||
/// U+0042
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CapitalB = 66,
|
||||
/// U+0043
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CapitalC = 67,
|
||||
/// U+0044
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CapitalD = 68,
|
||||
/// U+0045
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CapitalE = 69,
|
||||
/// U+0046
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CapitalF = 70,
|
||||
/// U+0047
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CapitalG = 71,
|
||||
/// U+0048
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CapitalH = 72,
|
||||
/// U+0049
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CapitalI = 73,
|
||||
/// U+004A
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CapitalJ = 74,
|
||||
/// U+004B
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CapitalK = 75,
|
||||
/// U+004C
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CapitalL = 76,
|
||||
/// U+004D
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CapitalM = 77,
|
||||
/// U+004E
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CapitalN = 78,
|
||||
/// U+004F
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CapitalO = 79,
|
||||
/// U+0050
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CapitalP = 80,
|
||||
/// U+0051
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CapitalQ = 81,
|
||||
/// U+0052
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CapitalR = 82,
|
||||
/// U+0053
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CapitalS = 83,
|
||||
/// U+0054
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CapitalT = 84,
|
||||
/// U+0055
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CapitalU = 85,
|
||||
/// U+0056
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CapitalV = 86,
|
||||
/// U+0057
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CapitalW = 87,
|
||||
/// U+0058
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CapitalX = 88,
|
||||
/// U+0059
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CapitalY = 89,
|
||||
/// U+005A
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CapitalZ = 90,
|
||||
/// U+005B
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
LeftSquareBracket = 91,
|
||||
/// U+005C
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
ReverseSolidus = 92,
|
||||
/// U+005D
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
RightSquareBracket = 93,
|
||||
/// U+005E
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
CircumflexAccent = 94,
|
||||
/// U+005F
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
LowLine = 95,
|
||||
/// U+0060
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
GraveAccent = 96,
|
||||
/// U+0061
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
SmallA = 97,
|
||||
/// U+0062
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
SmallB = 98,
|
||||
/// U+0063
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
SmallC = 99,
|
||||
/// U+0064
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
SmallD = 100,
|
||||
/// U+0065
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
SmallE = 101,
|
||||
/// U+0066
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
SmallF = 102,
|
||||
/// U+0067
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
SmallG = 103,
|
||||
/// U+0068
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
SmallH = 104,
|
||||
/// U+0069
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
SmallI = 105,
|
||||
/// U+006A
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
SmallJ = 106,
|
||||
/// U+006B
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
SmallK = 107,
|
||||
/// U+006C
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
SmallL = 108,
|
||||
/// U+006D
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
SmallM = 109,
|
||||
/// U+006E
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
SmallN = 110,
|
||||
/// U+006F
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
SmallO = 111,
|
||||
/// U+0070
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
SmallP = 112,
|
||||
/// U+0071
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
SmallQ = 113,
|
||||
/// U+0072
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
SmallR = 114,
|
||||
/// U+0073
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
SmallS = 115,
|
||||
/// U+0074
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
SmallT = 116,
|
||||
/// U+0075
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
SmallU = 117,
|
||||
/// U+0076
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
SmallV = 118,
|
||||
/// U+0077
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
SmallW = 119,
|
||||
/// U+0078
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
SmallX = 120,
|
||||
/// U+0079
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
SmallY = 121,
|
||||
/// U+007A
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
SmallZ = 122,
|
||||
/// U+007B
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
LeftCurlyBracket = 123,
|
||||
/// U+007C
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
VerticalLine = 124,
|
||||
/// U+007D
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
RightCurlyBracket = 125,
|
||||
/// U+007E
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Tilde = 126,
|
||||
/// U+007F
|
||||
#[unstable(feature = "ascii_char_variants", issue = "110998")]
|
||||
Delete = 127,
|
||||
}
|
||||
|
||||
impl AsciiChar {
|
||||
/// Creates an ascii character from the byte `b`,
|
||||
/// or returns `None` if it's too large.
|
||||
#[unstable(feature = "ascii_char", issue = "110998")]
|
||||
#[inline]
|
||||
pub const fn from_u8(b: u8) -> Option<Self> {
|
||||
if b <= 127 {
|
||||
// SAFETY: Just checked that `b` is in-range
|
||||
Some(unsafe { Self::from_u8_unchecked(b) })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates an ASCII character from the byte `b`,
|
||||
/// without checking whether it's valid.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// `b` must be in `0..=127`, or else this is UB.
|
||||
#[unstable(feature = "ascii_char", issue = "110998")]
|
||||
#[inline]
|
||||
pub const unsafe fn from_u8_unchecked(b: u8) -> Self {
|
||||
// SAFETY: Our safety precondition is that `b` is in-range.
|
||||
unsafe { transmute(b) }
|
||||
}
|
||||
|
||||
/// When passed the *number* `0`, `1`, …, `9`, returns the *character*
|
||||
/// `'0'`, `'1'`, …, `'9'` respectively.
|
||||
///
|
||||
/// If `d >= 10`, returns `None`.
|
||||
#[unstable(feature = "ascii_char", issue = "110998")]
|
||||
#[inline]
|
||||
pub const fn digit(d: u8) -> Option<Self> {
|
||||
if d < 10 {
|
||||
// SAFETY: Just checked it's in-range.
|
||||
Some(unsafe { Self::digit_unchecked(d) })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// When passed the *number* `0`, `1`, …, `9`, returns the *character*
|
||||
/// `'0'`, `'1'`, …, `'9'` respectively, without checking that it's in-range.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// This is immediate UB if called with `d > 64`.
|
||||
///
|
||||
/// If `d >= 10` and `d <= 64`, this is allowed to return any value or panic.
|
||||
/// Notably, it should not be expected to return hex digits, or any other
|
||||
/// reasonable extension of the decimal digits.
|
||||
///
|
||||
/// (This lose safety condition is intended to simplify soundness proofs
|
||||
/// when writing code using this method, since the implementation doesn't
|
||||
/// need something really specific, not to make those other arguments do
|
||||
/// something useful. It might be tightened before stabilization.)
|
||||
#[unstable(feature = "ascii_char", issue = "110998")]
|
||||
#[inline]
|
||||
pub const unsafe fn digit_unchecked(d: u8) -> Self {
|
||||
debug_assert!(d < 10);
|
||||
|
||||
// SAFETY: `'0'` through `'9'` are U+00030 through U+0039,
|
||||
// so because `d` must be 64 or less the addition can return at most
|
||||
// 112 (0x70), which doesn't overflow and is within the ASCII range.
|
||||
unsafe {
|
||||
let byte = b'0'.unchecked_add(d);
|
||||
Self::from_u8_unchecked(byte)
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets this ASCII character as a byte.
|
||||
#[unstable(feature = "ascii_char", issue = "110998")]
|
||||
#[inline]
|
||||
pub const fn as_u8(self) -> u8 {
|
||||
self as u8
|
||||
}
|
||||
|
||||
/// Gets this ASCII character as a `char` Unicode Scalar Value.
|
||||
#[unstable(feature = "ascii_char", issue = "110998")]
|
||||
#[inline]
|
||||
pub const fn as_char(self) -> char {
|
||||
self as u8 as char
|
||||
}
|
||||
|
||||
/// Views this ASCII character as a one-code-unit UTF-8 `str`.
|
||||
#[unstable(feature = "ascii_char", issue = "110998")]
|
||||
#[inline]
|
||||
pub const fn as_str(&self) -> &str {
|
||||
crate::slice::from_ref(self).as_str()
|
||||
}
|
||||
}
|
||||
|
||||
impl [AsciiChar] {
|
||||
/// Views this slice of ASCII characters as a UTF-8 `str`.
|
||||
#[unstable(feature = "ascii_char", issue = "110998")]
|
||||
#[inline]
|
||||
pub const fn as_str(&self) -> &str {
|
||||
let ascii_ptr: *const Self = self;
|
||||
let str_ptr = ascii_ptr as *const str;
|
||||
// SAFETY: Each ASCII codepoint in UTF-8 is encoded as one single-byte
|
||||
// code unit having the same value as the ASCII byte.
|
||||
unsafe { &*str_ptr }
|
||||
}
|
||||
|
||||
/// Views this slice of ASCII characters as a slice of `u8` bytes.
|
||||
#[unstable(feature = "ascii_char", issue = "110998")]
|
||||
#[inline]
|
||||
pub const fn as_bytes(&self) -> &[u8] {
|
||||
self.as_str().as_bytes()
|
||||
}
|
||||
}
|
||||
|
||||
#[unstable(feature = "ascii_char", issue = "110998")]
|
||||
impl fmt::Display for AsciiChar {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
<str as fmt::Display>::fmt(self.as_str(), f)
|
||||
}
|
||||
}
|
@ -1,5 +1,6 @@
|
||||
//! impl char {}
|
||||
|
||||
use crate::ascii;
|
||||
use crate::slice;
|
||||
use crate::str::from_utf8_unchecked_mut;
|
||||
use crate::unicode::printable::is_printable;
|
||||
@ -1116,6 +1117,24 @@ impl char {
|
||||
*self as u32 <= 0x7F
|
||||
}
|
||||
|
||||
/// Returns `Some` if the value is within the ASCII range,
|
||||
/// or `None` if it's not.
|
||||
///
|
||||
/// This is preferred to [`Self::is_ascii`] when you're passing the value
|
||||
/// along to something else that can take [`ascii::Char`] rather than
|
||||
/// needing to check again for itself whether the value is in ASCII.
|
||||
#[must_use]
|
||||
#[unstable(feature = "ascii_char", issue = "110998")]
|
||||
#[inline]
|
||||
pub const fn as_ascii(&self) -> Option<ascii::Char> {
|
||||
if self.is_ascii() {
|
||||
// SAFETY: Just checked that this is ASCII.
|
||||
Some(unsafe { ascii::Char::from_u8_unchecked(*self as u8) })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Makes a copy of the value in its ASCII upper case equivalent.
|
||||
///
|
||||
/// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
|
||||
|
@ -472,7 +472,16 @@ impl u8 {
|
||||
#[rustc_const_stable(feature = "const_u8_is_ascii", since = "1.43.0")]
|
||||
#[inline]
|
||||
pub const fn is_ascii(&self) -> bool {
|
||||
*self & 128 == 0
|
||||
*self <= 127
|
||||
}
|
||||
|
||||
/// If the value of this byte is within the ASCII range, returns it as an
|
||||
/// [ASCII character](ascii::Char). Otherwise, returns `None`.
|
||||
#[must_use]
|
||||
#[unstable(feature = "ascii_char", issue = "110998")]
|
||||
#[inline]
|
||||
pub const fn as_ascii(&self) -> Option<ascii::Char> {
|
||||
ascii::Char::from_u8(*self)
|
||||
}
|
||||
|
||||
/// Makes a copy of the value in its ASCII upper case equivalent.
|
||||
|
@ -16,6 +16,36 @@ impl [u8] {
|
||||
is_ascii(self)
|
||||
}
|
||||
|
||||
/// If this slice [`is_ascii`](Self::is_ascii), returns it as a slice of
|
||||
/// [ASCII characters](`ascii::Char`), otherwise returns `None`.
|
||||
#[unstable(feature = "ascii_char", issue = "110998")]
|
||||
#[must_use]
|
||||
#[inline]
|
||||
pub fn as_ascii(&self) -> Option<&[ascii::Char]> {
|
||||
if self.is_ascii() {
|
||||
// SAFETY: Just checked that it's ASCII
|
||||
Some(unsafe { self.as_ascii_unchecked() })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts this slice of bytes into a slice of ASCII characters,
|
||||
/// without checking whether they're valid.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Every byte in the slice must be in `0..=127`, or else this is UB.
|
||||
#[unstable(feature = "ascii_char", issue = "110998")]
|
||||
#[must_use]
|
||||
#[inline]
|
||||
pub const unsafe fn as_ascii_unchecked(&self) -> &[ascii::Char] {
|
||||
let byte_ptr: *const [u8] = self;
|
||||
let ascii_ptr = byte_ptr as *const [ascii::Char];
|
||||
// SAFETY: The caller promised all the bytes are ASCII
|
||||
unsafe { &*ascii_ptr }
|
||||
}
|
||||
|
||||
/// Checks that two slices are an ASCII case-insensitive match.
|
||||
///
|
||||
/// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
|
||||
|
@ -16,6 +16,7 @@ mod validations;
|
||||
use self::pattern::Pattern;
|
||||
use self::pattern::{DoubleEndedSearcher, ReverseSearcher, Searcher};
|
||||
|
||||
use crate::ascii;
|
||||
use crate::char::{self, EscapeDebugExtArgs};
|
||||
use crate::mem;
|
||||
use crate::slice::{self, SliceIndex};
|
||||
@ -2366,6 +2367,16 @@ impl str {
|
||||
self.as_bytes().is_ascii()
|
||||
}
|
||||
|
||||
/// If this string slice [`is_ascii`](Self::is_ascii), returns it as a slice
|
||||
/// of [ASCII characters](`ascii::Char`), otherwise returns `None`.
|
||||
#[unstable(feature = "ascii_char", issue = "110998")]
|
||||
#[must_use]
|
||||
#[inline]
|
||||
pub fn as_ascii(&self) -> Option<&[ascii::Char]> {
|
||||
// Like in `is_ascii`, we can work on the bytes directly.
|
||||
self.as_bytes().as_ascii()
|
||||
}
|
||||
|
||||
/// Checks that two strings are an ASCII case-insensitive match.
|
||||
///
|
||||
/// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
|
||||
|
@ -16,6 +16,9 @@
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
pub use core::ascii::{escape_default, EscapeDefault};
|
||||
|
||||
#[unstable(feature = "ascii_char", issue = "110998")]
|
||||
pub use core::ascii::Char;
|
||||
|
||||
/// Extension methods for ASCII-subset only operations.
|
||||
///
|
||||
/// Be aware that operations on seemingly non-ASCII characters can sometimes
|
||||
|
37
tests/codegen/ascii-char.rs
Normal file
37
tests/codegen/ascii-char.rs
Normal file
@ -0,0 +1,37 @@
|
||||
// compile-flags: -C opt-level=1
|
||||
// ignore-debug (the extra assertions get in the way)
|
||||
|
||||
#![crate_type = "lib"]
|
||||
#![feature(ascii_char)]
|
||||
|
||||
use std::ascii::Char as AsciiChar;
|
||||
|
||||
// CHECK-LABEL: i8 @unwrap_digit_from_remainder(i32
|
||||
#[no_mangle]
|
||||
pub fn unwrap_digit_from_remainder(v: u32) -> AsciiChar {
|
||||
// CHECK-NOT: icmp
|
||||
// CHECK-NOT: panic
|
||||
|
||||
// CHECK: %[[R:.+]] = urem i32 %v, 10
|
||||
// CHECK-NEXT: %[[T:.+]] = trunc i32 %[[R]] to i8
|
||||
// CHECK-NEXT: %[[D:.+]] = or i8 %[[T]], 48
|
||||
// CHECK-NEXT: ret i8 %[[D]]
|
||||
|
||||
// CHECK-NOT: icmp
|
||||
// CHECK-NOT: panic
|
||||
AsciiChar::digit((v % 10) as u8).unwrap()
|
||||
}
|
||||
|
||||
// CHECK-LABEL: i8 @unwrap_from_masked(i8
|
||||
#[no_mangle]
|
||||
pub fn unwrap_from_masked(b: u8) -> AsciiChar {
|
||||
// CHECK-NOT: icmp
|
||||
// CHECK-NOT: panic
|
||||
|
||||
// CHECK: %[[M:.+]] = and i8 %b, 127
|
||||
// CHECK-NEXT: ret i8 %[[M]]
|
||||
|
||||
// CHECK-NOT: icmp
|
||||
// CHECK-NOT: panic
|
||||
AsciiChar::from_u8(b & 0x7f).unwrap()
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user