Auto merge of #95399 - gilescope:plan_b, r=scottmcm
Faster parsing for lower numbers for radix up to 16 (cont.) ( Continuation of https://github.com/rust-lang/rust/pull/83371 ) With LingMan's change I think this is potentially ready.
This commit is contained in:
commit
4e1927db3c
@ -5,6 +5,7 @@
|
||||
use crate::ascii;
|
||||
use crate::intrinsics;
|
||||
use crate::mem;
|
||||
use crate::ops::{Add, Mul, Sub};
|
||||
use crate::str::FromStr;
|
||||
|
||||
// Used because the `?` operator is not allowed in a const context.
|
||||
@ -954,9 +955,10 @@ pub enum FpCategory {
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
trait FromStrRadixHelper: PartialOrd + Copy {
|
||||
fn min_value() -> Self;
|
||||
fn max_value() -> Self;
|
||||
trait FromStrRadixHelper:
|
||||
PartialOrd + Copy + Add<Output = Self> + Sub<Output = Self> + Mul<Output = Self>
|
||||
{
|
||||
const MIN: Self;
|
||||
fn from_u32(u: u32) -> Self;
|
||||
fn checked_mul(&self, other: u32) -> Option<Self>;
|
||||
fn checked_sub(&self, other: u32) -> Option<Self>;
|
||||
@ -976,12 +978,9 @@ fn from_str(src: &str) -> Result<Self, ParseIntError> {
|
||||
}
|
||||
from_str_radix_int_impl! { isize i8 i16 i32 i64 i128 usize u8 u16 u32 u64 u128 }
|
||||
|
||||
macro_rules! doit {
|
||||
macro_rules! impl_helper_for {
|
||||
($($t:ty)*) => ($(impl FromStrRadixHelper for $t {
|
||||
#[inline]
|
||||
fn min_value() -> Self { Self::MIN }
|
||||
#[inline]
|
||||
fn max_value() -> Self { Self::MAX }
|
||||
const MIN: Self = Self::MIN;
|
||||
#[inline]
|
||||
fn from_u32(u: u32) -> Self { u as Self }
|
||||
#[inline]
|
||||
@ -998,7 +997,18 @@ fn checked_add(&self, other: u32) -> Option<Self> {
|
||||
}
|
||||
})*)
|
||||
}
|
||||
doit! { i8 i16 i32 i64 i128 isize u8 u16 u32 u64 u128 usize }
|
||||
impl_helper_for! { i8 i16 i32 i64 i128 isize u8 u16 u32 u64 u128 usize }
|
||||
|
||||
/// Determins if a string of text of that length of that radix could be guaranteed to be
|
||||
/// stored in the given type T.
|
||||
/// Note that if the radix is known to the compiler, it is just the check of digits.len that
|
||||
/// is done at runtime.
|
||||
#[doc(hidden)]
|
||||
#[inline(always)]
|
||||
#[unstable(issue = "none", feature = "std_internals")]
|
||||
pub fn can_not_overflow<T>(radix: u32, is_signed_ty: bool, digits: &[u8]) -> bool {
|
||||
radix <= 16 && digits.len() <= mem::size_of::<T>() * 2 - is_signed_ty as usize
|
||||
}
|
||||
|
||||
fn from_str_radix<T: FromStrRadixHelper>(src: &str, radix: u32) -> Result<T, ParseIntError> {
|
||||
use self::IntErrorKind::*;
|
||||
@ -1014,7 +1024,7 @@ fn from_str_radix<T: FromStrRadixHelper>(src: &str, radix: u32) -> Result<T, Par
|
||||
return Err(PIE { kind: Empty });
|
||||
}
|
||||
|
||||
let is_signed_ty = T::from_u32(0) > T::min_value();
|
||||
let is_signed_ty = T::from_u32(0) > T::MIN;
|
||||
|
||||
// all valid digits are ascii, so we will just iterate over the utf8 bytes
|
||||
// and cast them to chars. .to_digit() will safely return None for anything
|
||||
@ -1032,38 +1042,56 @@ fn from_str_radix<T: FromStrRadixHelper>(src: &str, radix: u32) -> Result<T, Par
|
||||
};
|
||||
|
||||
let mut result = T::from_u32(0);
|
||||
if is_positive {
|
||||
// The number is positive
|
||||
for &c in digits {
|
||||
let x = match (c as char).to_digit(radix) {
|
||||
Some(x) => x,
|
||||
None => return Err(PIE { kind: InvalidDigit }),
|
||||
};
|
||||
result = match result.checked_mul(radix) {
|
||||
Some(result) => result,
|
||||
None => return Err(PIE { kind: PosOverflow }),
|
||||
};
|
||||
result = match result.checked_add(x) {
|
||||
Some(result) => result,
|
||||
None => return Err(PIE { kind: PosOverflow }),
|
||||
|
||||
if can_not_overflow::<T>(radix, is_signed_ty, digits) {
|
||||
// If the len of the str is short compared to the range of the type
|
||||
// we are parsing into, then we can be certain that an overflow will not occur.
|
||||
// This bound is when `radix.pow(digits.len()) - 1 <= T::MAX` but the condition
|
||||
// above is a faster (conservative) approximation of this.
|
||||
//
|
||||
// Consider radix 16 as it has the highest information density per digit and will thus overflow the earliest:
|
||||
// `u8::MAX` is `ff` - any str of len 2 is guaranteed to not overflow.
|
||||
// `i8::MAX` is `7f` - only a str of len 1 is guaranteed to not overflow.
|
||||
macro_rules! run_unchecked_loop {
|
||||
($unchecked_additive_op:expr) => {
|
||||
for &c in digits {
|
||||
result = result * T::from_u32(radix);
|
||||
let x = (c as char).to_digit(radix).ok_or(PIE { kind: InvalidDigit })?;
|
||||
result = $unchecked_additive_op(result, T::from_u32(x));
|
||||
}
|
||||
};
|
||||
}
|
||||
if is_positive {
|
||||
run_unchecked_loop!(<T as core::ops::Add>::add)
|
||||
} else {
|
||||
run_unchecked_loop!(<T as core::ops::Sub>::sub)
|
||||
};
|
||||
} else {
|
||||
// The number is negative
|
||||
for &c in digits {
|
||||
let x = match (c as char).to_digit(radix) {
|
||||
Some(x) => x,
|
||||
None => return Err(PIE { kind: InvalidDigit }),
|
||||
};
|
||||
result = match result.checked_mul(radix) {
|
||||
Some(result) => result,
|
||||
None => return Err(PIE { kind: NegOverflow }),
|
||||
};
|
||||
result = match result.checked_sub(x) {
|
||||
Some(result) => result,
|
||||
None => return Err(PIE { kind: NegOverflow }),
|
||||
macro_rules! run_checked_loop {
|
||||
($checked_additive_op:ident, $overflow_err:expr) => {
|
||||
for &c in digits {
|
||||
// When `radix` is passed in as a literal, rather than doing a slow `imul`
|
||||
// the compiler can use shifts if `radix` can be expressed as a
|
||||
// sum of powers of 2 (x*10 can be written as x*8 + x*2).
|
||||
// When the compiler can't use these optimisations,
|
||||
// the latency of the multiplication can be hidden by issuing it
|
||||
// before the result is needed to improve performance on
|
||||
// modern out-of-order CPU as multiplication here is slower
|
||||
// than the other instructions, we can get the end result faster
|
||||
// doing multiplication first and let the CPU spends other cycles
|
||||
// doing other computation and get multiplication result later.
|
||||
let mul = result.checked_mul(radix);
|
||||
let x = (c as char).to_digit(radix).ok_or(PIE { kind: InvalidDigit })?;
|
||||
result = mul.ok_or_else($overflow_err)?;
|
||||
result = T::$checked_additive_op(&result, x).ok_or_else($overflow_err)?;
|
||||
}
|
||||
};
|
||||
}
|
||||
if is_positive {
|
||||
run_checked_loop!(checked_add, || PIE { kind: PosOverflow })
|
||||
} else {
|
||||
run_checked_loop!(checked_sub, || PIE { kind: NegOverflow })
|
||||
};
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
@ -53,6 +53,7 @@
|
||||
#![feature(numfmt)]
|
||||
#![feature(step_trait)]
|
||||
#![feature(str_internals)]
|
||||
#![feature(std_internals)]
|
||||
#![feature(test)]
|
||||
#![feature(trusted_len)]
|
||||
#![feature(try_blocks)]
|
||||
|
@ -2,7 +2,7 @@
|
||||
use core::convert::{TryFrom, TryInto};
|
||||
use core::fmt::Debug;
|
||||
use core::marker::Copy;
|
||||
use core::num::{IntErrorKind, ParseIntError, TryFromIntError};
|
||||
use core::num::{can_not_overflow, IntErrorKind, ParseIntError, TryFromIntError};
|
||||
use core::ops::{Add, Div, Mul, Rem, Sub};
|
||||
use core::option::Option;
|
||||
use core::option::Option::None;
|
||||
@ -120,6 +120,75 @@ fn test_int_from_str_overflow() {
|
||||
test_parse::<i64>("-9223372036854775809", Err(IntErrorKind::NegOverflow));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_can_not_overflow() {
|
||||
fn can_overflow<T>(radix: u32, input: &str) -> bool
|
||||
where
|
||||
T: std::convert::TryFrom<i8>,
|
||||
{
|
||||
!can_not_overflow::<T>(radix, T::try_from(-1_i8).is_ok(), input.as_bytes())
|
||||
}
|
||||
|
||||
// Positive tests:
|
||||
assert!(!can_overflow::<i8>(16, "F"));
|
||||
assert!(!can_overflow::<u8>(16, "FF"));
|
||||
|
||||
assert!(!can_overflow::<i8>(10, "9"));
|
||||
assert!(!can_overflow::<u8>(10, "99"));
|
||||
|
||||
// Negative tests:
|
||||
|
||||
// Not currently in std lib (issue: #27728)
|
||||
fn format_radix<T>(mut x: T, radix: T) -> String
|
||||
where
|
||||
T: std::ops::Rem<Output = T>,
|
||||
T: std::ops::Div<Output = T>,
|
||||
T: std::cmp::PartialEq,
|
||||
T: std::default::Default,
|
||||
T: Copy,
|
||||
T: Default,
|
||||
u32: TryFrom<T>,
|
||||
{
|
||||
let mut result = vec![];
|
||||
|
||||
loop {
|
||||
let m = x % radix;
|
||||
x = x / radix;
|
||||
result.push(
|
||||
std::char::from_digit(m.try_into().ok().unwrap(), radix.try_into().ok().unwrap())
|
||||
.unwrap(),
|
||||
);
|
||||
if x == T::default() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
result.into_iter().rev().collect()
|
||||
}
|
||||
|
||||
macro_rules! check {
|
||||
($($t:ty)*) => ($(
|
||||
for base in 2..=36 {
|
||||
let num = (<$t>::MAX as u128) + 1;
|
||||
|
||||
// Calcutate the string length for the smallest overflowing number:
|
||||
let max_len_string = format_radix(num, base as u128);
|
||||
// Ensure that that string length is deemed to potentially overflow:
|
||||
assert!(can_overflow::<$t>(base, &max_len_string));
|
||||
}
|
||||
)*)
|
||||
}
|
||||
|
||||
check! { i8 i16 i32 i64 i128 isize usize u8 u16 u32 u64 }
|
||||
|
||||
// Check u128 separately:
|
||||
for base in 2..=36 {
|
||||
let num = u128::MAX as u128;
|
||||
let max_len_string = format_radix(num, base as u128);
|
||||
// base 16 fits perfectly for u128 and won't overflow:
|
||||
assert_eq!(can_overflow::<u128>(base, &max_len_string), base != 16);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_leading_plus() {
|
||||
test_parse::<u8>("+127", Ok(127));
|
||||
|
Loading…
Reference in New Issue
Block a user