From 3855b8bb609bbaaa3871797b40a78757f88d9b12 Mon Sep 17 00:00:00 2001 From: George Bateman Date: Tue, 25 Jul 2023 22:27:15 +0100 Subject: [PATCH] Make {integer}::from_str_radix constant --- library/core/src/lib.rs | 1 + library/core/src/num/error.rs | 3 +- library/core/src/num/int_macros.rs | 26 -- library/core/src/num/mod.rs | 311 ++++++++++++------- library/core/src/num/nonzero.rs | 3 +- library/core/src/num/uint_macros.rs | 27 -- library/core/tests/lib.rs | 1 + library/core/tests/num/mod.rs | 10 + tests/ui/consts/const-eval/parse_ints.rs | 10 + tests/ui/consts/const-eval/parse_ints.stderr | 31 ++ 10 files changed, 248 insertions(+), 175 deletions(-) create mode 100644 tests/ui/consts/const-eval/parse_ints.rs create mode 100644 tests/ui/consts/const-eval/parse_ints.stderr diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs index 18dd3440b6e..476e230472b 100644 --- a/library/core/src/lib.rs +++ b/library/core/src/lib.rs @@ -137,6 +137,7 @@ #![feature(const_heap)] #![feature(const_hint_assert_unchecked)] #![feature(const_index_range_slice_index)] +#![feature(const_int_from_str)] #![feature(const_intrinsic_copy)] #![feature(const_intrinsic_forget)] #![feature(const_ipv4)] diff --git a/library/core/src/num/error.rs b/library/core/src/num/error.rs index 14e99578a7c..a2d7e6f7b07 100644 --- a/library/core/src/num/error.rs +++ b/library/core/src/num/error.rs @@ -113,8 +113,9 @@ pub enum IntErrorKind { impl ParseIntError { /// Outputs the detailed cause of parsing an integer failing. #[must_use] + #[rustc_const_unstable(feature = "const_int_from_str", issue = "59133")] #[stable(feature = "int_error_matching", since = "1.55.0")] - pub fn kind(&self) -> &IntErrorKind { + pub const fn kind(&self) -> &IntErrorKind { &self.kind } } diff --git a/library/core/src/num/int_macros.rs b/library/core/src/num/int_macros.rs index 2fec8ef2381..2f5184da885 100644 --- a/library/core/src/num/int_macros.rs +++ b/library/core/src/num/int_macros.rs @@ -60,32 +60,6 @@ macro_rules! int_impl { #[stable(feature = "int_bits_const", since = "1.53.0")] pub const BITS: u32 = <$UnsignedT>::BITS; - /// Converts a string slice in a given base to an integer. - /// - /// The string is expected to be an optional `+` or `-` sign followed by digits. - /// Leading and trailing whitespace represent an error. Digits are a subset of these characters, - /// depending on `radix`: - /// - /// * `0-9` - /// * `a-z` - /// * `A-Z` - /// - /// # Panics - /// - /// This function panics if `radix` is not in the range from 2 to 36. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - #[doc = concat!("assert_eq!(", stringify!($SelfT), "::from_str_radix(\"A\", 16), Ok(10));")] - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - pub fn from_str_radix(src: &str, radix: u32) -> Result { - from_str_radix(src, radix) - } - /// Returns the number of ones in the binary representation of `self`. /// /// # Examples diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs index 03c977abbbb..9e519dad432 100644 --- a/library/core/src/num/mod.rs +++ b/library/core/src/num/mod.rs @@ -6,7 +6,6 @@ use crate::hint; use crate::intrinsics; use crate::mem; -use crate::ops::{Add, Mul, Sub}; use crate::str::FromStr; // Used because the `?` operator is not allowed in a const context. @@ -1386,51 +1385,19 @@ pub enum FpCategory { Normal, } -#[doc(hidden)] -trait FromStrRadixHelper: - PartialOrd + Copy + Add + Sub + Mul -{ - const MIN: Self; - fn from_u32(u: u32) -> Self; - fn checked_mul(&self, other: u32) -> Option; - fn checked_sub(&self, other: u32) -> Option; - fn checked_add(&self, other: u32) -> Option; -} - macro_rules! from_str_radix_int_impl { ($($t:ty)*) => {$( #[stable(feature = "rust1", since = "1.0.0")] impl FromStr for $t { type Err = ParseIntError; fn from_str(src: &str) -> Result { - from_str_radix(src, 10) + <$t>::from_str_radix(src, 10) } } )*} } from_str_radix_int_impl! { isize i8 i16 i32 i64 i128 usize u8 u16 u32 u64 u128 } -macro_rules! impl_helper_for { - ($($t:ty)*) => ($(impl FromStrRadixHelper for $t { - const MIN: Self = Self::MIN; - #[inline] - fn from_u32(u: u32) -> Self { u as Self } - #[inline] - fn checked_mul(&self, other: u32) -> Option { - Self::checked_mul(*self, other as Self) - } - #[inline] - fn checked_sub(&self, other: u32) -> Option { - Self::checked_sub(*self, other as Self) - } - #[inline] - fn checked_add(&self, other: u32) -> Option { - Self::checked_add(*self, other as Self) - } - })*) -} -impl_helper_for! { i8 i16 i32 i64 i128 isize u8 u16 u32 u64 u128 usize } - /// Determines if a string of text of that length of that radix could be guaranteed to be /// stored in the given type T. /// Note that if the radix is known to the compiler, it is just the check of digits.len that @@ -1438,92 +1405,198 @@ fn checked_add(&self, other: u32) -> Option { #[doc(hidden)] #[inline(always)] #[unstable(issue = "none", feature = "std_internals")] -pub fn can_not_overflow(radix: u32, is_signed_ty: bool, digits: &[u8]) -> bool { +pub const fn can_not_overflow(radix: u32, is_signed_ty: bool, digits: &[u8]) -> bool { radix <= 16 && digits.len() <= mem::size_of::() * 2 - is_signed_ty as usize } -fn from_str_radix(src: &str, radix: u32) -> Result { - use self::IntErrorKind::*; - use self::ParseIntError as PIE; - - assert!( - (2..=36).contains(&radix), - "from_str_radix_int: must lie in the range `[2, 36]` - found {}", - radix - ); - - if src.is_empty() { - return Err(PIE { kind: Empty }); - } - - let is_signed_ty = T::from_u32(0) > T::MIN; - - // all valid digits are ascii, so we will just iterate over the utf8 bytes - // and cast them to chars. .to_digit() will safely return None for anything - // other than a valid ascii digit for the given radix, including the first-byte - // of multi-byte sequences - let src = src.as_bytes(); - - let (is_positive, digits) = match src[0] { - b'+' | b'-' if src[1..].is_empty() => { - return Err(PIE { kind: InvalidDigit }); - } - b'+' => (true, &src[1..]), - b'-' if is_signed_ty => (false, &src[1..]), - _ => (true, src), - }; - - let mut result = T::from_u32(0); - - if can_not_overflow::(radix, is_signed_ty, digits) { - // If the len of the str is short compared to the range of the type - // we are parsing into, then we can be certain that an overflow will not occur. - // This bound is when `radix.pow(digits.len()) - 1 <= T::MAX` but the condition - // above is a faster (conservative) approximation of this. - // - // Consider radix 16 as it has the highest information density per digit and will thus overflow the earliest: - // `u8::MAX` is `ff` - any str of len 2 is guaranteed to not overflow. - // `i8::MAX` is `7f` - only a str of len 1 is guaranteed to not overflow. - macro_rules! run_unchecked_loop { - ($unchecked_additive_op:expr) => { - for &c in digits { - result = result * T::from_u32(radix); - let x = (c as char).to_digit(radix).ok_or(PIE { kind: InvalidDigit })?; - result = $unchecked_additive_op(result, T::from_u32(x)); - } - }; - } - if is_positive { - run_unchecked_loop!(::add) - } else { - run_unchecked_loop!(::sub) - }; - } else { - macro_rules! run_checked_loop { - ($checked_additive_op:ident, $overflow_err:expr) => { - for &c in digits { - // When `radix` is passed in as a literal, rather than doing a slow `imul` - // the compiler can use shifts if `radix` can be expressed as a - // sum of powers of 2 (x*10 can be written as x*8 + x*2). - // When the compiler can't use these optimisations, - // the latency of the multiplication can be hidden by issuing it - // before the result is needed to improve performance on - // modern out-of-order CPU as multiplication here is slower - // than the other instructions, we can get the end result faster - // doing multiplication first and let the CPU spends other cycles - // doing other computation and get multiplication result later. - let mul = result.checked_mul(radix); - let x = (c as char).to_digit(radix).ok_or(PIE { kind: InvalidDigit })?; - result = mul.ok_or_else($overflow_err)?; - result = T::$checked_additive_op(&result, x).ok_or_else($overflow_err)?; - } - }; - } - if is_positive { - run_checked_loop!(checked_add, || PIE { kind: PosOverflow }) - } else { - run_checked_loop!(checked_sub, || PIE { kind: NegOverflow }) - }; - } - Ok(result) +#[track_caller] +const fn from_str_radix_panic_ct(_radix: u32) -> ! { + panic!("from_str_radix_int: must lie in the range `[2, 36]`"); } + +#[track_caller] +fn from_str_radix_panic_rt(radix: u32) -> ! { + panic!("from_str_radix_int: must lie in the range `[2, 36]` - found {}", radix); +} + +#[cfg_attr(not(feature = "panic_immediate_abort"), inline(never))] +#[cfg_attr(feature = "panic_immediate_abort", inline)] +#[cold] +#[track_caller] +const fn from_str_radix_assert(radix: u32) { + if 2 > radix || radix > 36 { + // The only difference between these two functions is their panic message. + intrinsics::const_eval_select((radix,), from_str_radix_panic_ct, from_str_radix_panic_rt); + } +} + +macro_rules! from_str_radix { + ($($int_ty:ty)+) => {$( + impl $int_ty { + /// Converts a string slice in a given base to an integer. + /// + /// The string is expected to be an optional `+` sign + /// followed by digits. + /// Leading and trailing whitespace represent an error. + /// Digits are a subset of these characters, depending on `radix`: + /// + /// * `0-9` + /// * `a-z` + /// * `A-Z` + /// + /// # Panics + /// + /// This function panics if `radix` is not in the range from 2 to 36. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + #[doc = concat!("assert_eq!(", stringify!($int_ty), "::from_str_radix(\"A\", 16), Ok(10));")] + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[rustc_const_unstable(feature = "const_int_from_str", issue = "59133")] + pub const fn from_str_radix(src: &str, radix: u32) -> Result<$int_ty, ParseIntError> { + use self::IntErrorKind::*; + use self::ParseIntError as PIE; + + from_str_radix_assert(radix); + + if src.is_empty() { + return Err(PIE { kind: Empty }); + } + + #[allow(unused_comparisons)] + let is_signed_ty = 0 > <$int_ty>::MIN; + + // all valid digits are ascii, so we will just iterate over the utf8 bytes + // and cast them to chars. .to_digit() will safely return None for anything + // other than a valid ascii digit for the given radix, including the first-byte + // of multi-byte sequences + let src = src.as_bytes(); + + let (is_positive, mut digits) = match src { + [b'+' | b'-'] => { + return Err(PIE { kind: InvalidDigit }); + } + [b'+', rest @ ..] => (true, rest), + [b'-', rest @ ..] if is_signed_ty => (false, rest), + _ => (true, src), + }; + + let mut result = 0; + + macro_rules! unwrap_or_PIE { + ($option:expr, $kind:ident) => { + match $option { + Some(value) => value, + None => return Err(PIE { kind: $kind }), + } + }; + } + + if can_not_overflow::<$int_ty>(radix, is_signed_ty, digits) { + // If the len of the str is short compared to the range of the type + // we are parsing into, then we can be certain that an overflow will not occur. + // This bound is when `radix.pow(digits.len()) - 1 <= T::MAX` but the condition + // above is a faster (conservative) approximation of this. + // + // Consider radix 16 as it has the highest information density per digit and will thus overflow the earliest: + // `u8::MAX` is `ff` - any str of len 2 is guaranteed to not overflow. + // `i8::MAX` is `7f` - only a str of len 1 is guaranteed to not overflow. + macro_rules! run_unchecked_loop { + ($unchecked_additive_op:tt) => {{ + while let [c, rest @ ..] = digits { + result = result * (radix as $int_ty); + let x = unwrap_or_PIE!((*c as char).to_digit(radix), InvalidDigit); + result = result $unchecked_additive_op (x as $int_ty); + digits = rest; + } + }}; + } + if is_positive { + run_unchecked_loop!(+) + } else { + run_unchecked_loop!(-) + }; + } else { + macro_rules! run_checked_loop { + ($checked_additive_op:ident, $overflow_err:ident) => {{ + while let [c, rest @ ..] = digits { + // When `radix` is passed in as a literal, rather than doing a slow `imul` + // the compiler can use shifts if `radix` can be expressed as a + // sum of powers of 2 (x*10 can be written as x*8 + x*2). + // When the compiler can't use these optimisations, + // the latency of the multiplication can be hidden by issuing it + // before the result is needed to improve performance on + // modern out-of-order CPU as multiplication here is slower + // than the other instructions, we can get the end result faster + // doing multiplication first and let the CPU spends other cycles + // doing other computation and get multiplication result later. + let mul = result.checked_mul(radix as $int_ty); + let x = unwrap_or_PIE!((*c as char).to_digit(radix), InvalidDigit) as $int_ty; + result = unwrap_or_PIE!(mul, $overflow_err); + result = unwrap_or_PIE!(<$int_ty>::$checked_additive_op(result, x), $overflow_err); + digits = rest; + } + }}; + } + if is_positive { + run_checked_loop!(checked_add, PosOverflow) + } else { + run_checked_loop!(checked_sub, NegOverflow) + }; + } + Ok(result) + } + } + )+} +} + +from_str_radix! { i8 u8 i16 u16 i32 u32 i64 u64 i128 u128 } + +// Re-use the relevant implementation of from_str_radix for isize and usize to avoid outputting two +// identical functions. +macro_rules! from_str_radix_size_impl { + ($($t:ident $size:ty),*) => {$( + impl $size { + /// Converts a string slice in a given base to an integer. + /// + /// The string is expected to be an optional `+` sign + /// followed by digits. + /// Leading and trailing whitespace represent an error. + /// Digits are a subset of these characters, depending on `radix`: + /// + /// * `0-9` + /// * `a-z` + /// * `A-Z` + /// + /// # Panics + /// + /// This function panics if `radix` is not in the range from 2 to 36. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + #[doc = concat!("assert_eq!(", stringify!($size), "::from_str_radix(\"A\", 16), Ok(10));")] + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[rustc_const_unstable(feature = "const_int_from_str", issue = "59133")] + pub const fn from_str_radix(src: &str, radix: u32) -> Result<$size, ParseIntError> { + match <$t>::from_str_radix(src, radix) { + Ok(x) => Ok(x as $size), + Err(e) => Err(e), + } + } + })*} +} + +#[cfg(target_pointer_width = "16")] +from_str_radix_size_impl! { i16 isize, u16 usize } +#[cfg(target_pointer_width = "32")] +from_str_radix_size_impl! { i32 isize, u32 usize } +#[cfg(target_pointer_width = "64")] +from_str_radix_size_impl! { i64 isize, u64 usize } diff --git a/library/core/src/num/nonzero.rs b/library/core/src/num/nonzero.rs index 1171407c07a..62ea7abf652 100644 --- a/library/core/src/num/nonzero.rs +++ b/library/core/src/num/nonzero.rs @@ -11,7 +11,6 @@ use crate::str::FromStr; use crate::ub_checks; -use super::from_str_radix; use super::{IntErrorKind, ParseIntError}; /// A marker trait for primitive types which can be zero. @@ -804,7 +803,7 @@ pub const fn saturating_pow(self, other: u32) -> Self { impl FromStr for $Ty { type Err = ParseIntError; fn from_str(src: &str) -> Result { - Self::new(from_str_radix(src, 10)?) + Self::new(<$Int>::from_str_radix(src, 10)?) .ok_or(ParseIntError { kind: IntErrorKind::Zero }) diff --git a/library/core/src/num/uint_macros.rs b/library/core/src/num/uint_macros.rs index f76f110fc4e..3f4b5955d62 100644 --- a/library/core/src/num/uint_macros.rs +++ b/library/core/src/num/uint_macros.rs @@ -58,33 +58,6 @@ macro_rules! uint_impl { #[stable(feature = "int_bits_const", since = "1.53.0")] pub const BITS: u32 = Self::MAX.count_ones(); - /// Converts a string slice in a given base to an integer. - /// - /// The string is expected to be an optional `+` sign - /// followed by digits. - /// Leading and trailing whitespace represent an error. - /// Digits are a subset of these characters, depending on `radix`: - /// - /// * `0-9` - /// * `a-z` - /// * `A-Z` - /// - /// # Panics - /// - /// This function panics if `radix` is not in the range from 2 to 36. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - #[doc = concat!("assert_eq!(", stringify!($SelfT), "::from_str_radix(\"A\", 16), Ok(10));")] - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - pub fn from_str_radix(src: &str, radix: u32) -> Result { - from_str_radix(src, radix) - } - /// Returns the number of ones in the binary representation of `self`. /// /// # Examples diff --git a/library/core/tests/lib.rs b/library/core/tests/lib.rs index f6d975c2c1e..52d2b798c91 100644 --- a/library/core/tests/lib.rs +++ b/library/core/tests/lib.rs @@ -16,6 +16,7 @@ #![feature(const_hash)] #![feature(const_heap)] #![feature(const_intrinsic_copy)] +#![feature(const_int_from_str)] #![feature(const_maybe_uninit_as_mut_ptr)] #![feature(const_nonnull_new)] #![feature(const_pointer_is_aligned)] diff --git a/library/core/tests/num/mod.rs b/library/core/tests/num/mod.rs index 863da9b18a2..0fed854318d 100644 --- a/library/core/tests/num/mod.rs +++ b/library/core/tests/num/mod.rs @@ -214,6 +214,16 @@ fn test_infallible_try_from_int_error() { assert!(func(0).is_ok()); } +const _TEST_CONST_PARSE: () = { + let Ok(-0x8000) = i16::from_str_radix("-8000", 16) else { panic!() }; + let Ok(12345) = u64::from_str_radix("12345", 10) else { panic!() }; + if let Err(e) = i8::from_str_radix("+", 10) { + let IntErrorKind::InvalidDigit = e.kind() else { panic!() }; + } else { + panic!() + } +}; + macro_rules! test_impl_from { ($fn_name:ident, bool, $target: ty) => { #[test] diff --git a/tests/ui/consts/const-eval/parse_ints.rs b/tests/ui/consts/const-eval/parse_ints.rs new file mode 100644 index 00000000000..ff9fc47e65c --- /dev/null +++ b/tests/ui/consts/const-eval/parse_ints.rs @@ -0,0 +1,10 @@ +#![feature(const_int_from_str)] + +const _OK: () = match i32::from_str_radix("-1234", 10) { + Ok(x) => assert!(x == -1234), + Err(_) => panic!(), +}; +const _TOO_LOW: () = { u64::from_str_radix("12345ABCD", 1); }; +const _TOO_HIGH: () = { u64::from_str_radix("12345ABCD", 37); }; + +fn main () {} diff --git a/tests/ui/consts/const-eval/parse_ints.stderr b/tests/ui/consts/const-eval/parse_ints.stderr new file mode 100644 index 00000000000..9e49fe433a1 --- /dev/null +++ b/tests/ui/consts/const-eval/parse_ints.stderr @@ -0,0 +1,31 @@ +error[E0080]: evaluation of constant value failed + --> $SRC_DIR/core/src/num/mod.rs:LL:COL + | + = note: the evaluated program panicked at 'from_str_radix_int: must lie in the range `[2, 36]`', $SRC_DIR/core/src/num/mod.rs:LL:COL + | +note: inside `core::num::::from_str_radix` + --> $SRC_DIR/core/src/num/mod.rs:LL:COL +note: inside `_TOO_LOW` + --> $DIR/parse_ints.rs:7:24 + | +LL | const _TOO_LOW: () = { u64::from_str_radix("12345ABCD", 1); }; + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + = note: this error originates in the macro `from_str_radix` (in Nightly builds, run with -Z macro-backtrace for more info) + +error[E0080]: evaluation of constant value failed + --> $SRC_DIR/core/src/num/mod.rs:LL:COL + | + = note: the evaluated program panicked at 'from_str_radix_int: must lie in the range `[2, 36]`', $SRC_DIR/core/src/num/mod.rs:LL:COL + | +note: inside `core::num::::from_str_radix` + --> $SRC_DIR/core/src/num/mod.rs:LL:COL +note: inside `_TOO_HIGH` + --> $DIR/parse_ints.rs:8:25 + | +LL | const _TOO_HIGH: () = { u64::from_str_radix("12345ABCD", 37); }; + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + = note: this error originates in the macro `from_str_radix` (in Nightly builds, run with -Z macro-backtrace for more info) + +error: aborting due to 2 previous errors + +For more information about this error, try `rustc --explain E0080`.