diff --git a/library/core/src/char/decode.rs b/library/core/src/char/decode.rs index 8b9f979b573..794c9c13cc3 100644 --- a/library/core/src/char/decode.rs +++ b/library/core/src/char/decode.rs @@ -91,7 +91,7 @@ fn next(&mut self) -> Option> { None => self.iter.next()?, }; - if u < 0xD800 || 0xDFFF < u { + if !u.is_utf16_surrogate() { // SAFETY: not a surrogate Some(Ok(unsafe { from_u32_unchecked(u as u32) })) } else if u >= 0xDC00 { @@ -125,7 +125,7 @@ fn size_hint(&self) -> (usize, Option) { // buf is empty, no additional elements from it. None => (0, 0), // `u` is a non surrogate, so it's always an additional character. - Some(u) if u < 0xD800 || 0xDFFF < u => (1, 1), + Some(u) if !u.is_utf16_surrogate() => (1, 1), // `u` is a leading surrogate (it can never be a trailing surrogate and // it's a surrogate due to the previous branch) and `self.iter` is empty. // diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs index 0860e1bf4ca..1a85e2ef7b6 100644 --- a/library/core/src/lib.rs +++ b/library/core/src/lib.rs @@ -93,7 +93,7 @@ #![warn(missing_docs)] #![allow(explicit_outlives_requirements)] // -// Library features for const fns: +// Library features: #![feature(const_align_offset)] #![feature(const_align_of_val)] #![feature(const_alloc_layout)] @@ -146,6 +146,8 @@ #![feature(ptr_metadata)] #![feature(slice_ptr_get)] #![feature(str_internals)] +#![feature(utf16_extra)] +#![feature(utf16_extra_const)] #![feature(variant_count)] #![feature(const_array_from_ref)] #![feature(const_slice_from_ref)] diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs index 07fd317e074..dca8ffa4e2c 100644 --- a/library/core/src/num/mod.rs +++ b/library/core/src/num/mod.rs @@ -820,6 +820,31 @@ impl u16 { uint_impl! { u16, u16, i16, NonZeroU16, 16, 65535, 4, "0xa003", "0x3a", "0x1234", "0x3412", "0x2c48", "[0x34, 0x12]", "[0x12, 0x34]", "", "" } widening_impl! { u16, u32, 16, unsigned } + + /// Checks if the value is a Unicode surrogate code point, which are disallowed values for [`char`]. + /// + /// # Examples + /// + /// ``` + /// #![feature(utf16_extra)] + /// + /// let low_non_surrogate = 0xA000u16; + /// let low_surrogate = 0xD800u16; + /// let high_surrogate = 0xDC00u16; + /// let high_non_surrogate = 0xE000u16; + /// + /// assert!(!low_non_surrogate.is_utf16_surrogate()); + /// assert!(low_surrogate.is_utf16_surrogate()); + /// assert!(high_surrogate.is_utf16_surrogate()); + /// assert!(!high_non_surrogate.is_utf16_surrogate()); + /// ``` + #[must_use] + #[unstable(feature = "utf16_extra", issue = "94919")] + #[rustc_const_unstable(feature = "utf16_extra_const", issue = "94919")] + #[inline] + pub const fn is_utf16_surrogate(self) -> bool { + matches!(self, 0xD800..=0xDFFF) + } } #[lang = "u32"]