rust/library/core/tests/num/int_log.rs
Falk Hüffner d53c483502 Speed up integer log10.
This is achieved with a branchless bit-twiddling implementation of the
case x < 100_000, and using this as building block.

Benchmark on an Intel i7-8700K (Coffee Lake):

name                                   old ns/iter  new ns/iter  diff ns/iter   diff %  speedup
num::int_log::u8_log10_predictable     165          169                     4    2.42%   x 0.98
num::int_log::u8_log10_random          438          423                   -15   -3.42%   x 1.04
num::int_log::u8_log10_random_small    438          423                   -15   -3.42%   x 1.04
num::int_log::u16_log10_predictable    633          417                  -216  -34.12%   x 1.52
num::int_log::u16_log10_random         908          471                  -437  -48.13%   x 1.93
num::int_log::u16_log10_random_small   945          471                  -474  -50.16%   x 2.01
num::int_log::u32_log10_predictable    1,496        1,340                -156  -10.43%   x 1.12
num::int_log::u32_log10_random         1,076        873                  -203  -18.87%   x 1.23
num::int_log::u32_log10_random_small   1,145        874                  -271  -23.67%   x 1.31
num::int_log::u64_log10_predictable    4,005        3,171                -834  -20.82%   x 1.26
num::int_log::u64_log10_random         1,247        1,021                -226  -18.12%   x 1.22
num::int_log::u64_log10_random_small   1,265        921                  -344  -27.19%   x 1.37
num::int_log::u128_log10_predictable   39,667       39,579                -88   -0.22%   x 1.00
num::int_log::u128_log10_random        6,456        6,696                 240    3.72%   x 0.96
num::int_log::u128_log10_random_small  4,108        3,903                -205   -4.99%   x 1.05

Benchmark on an M1 Mac Mini:

name                                   old ns/iter  new ns/iter  diff ns/iter   diff %  speedup
num::int_log::u8_log10_predictable     143          130                   -13   -9.09%   x 1.10
num::int_log::u8_log10_random          375          325                   -50  -13.33%   x 1.15
num::int_log::u8_log10_random_small    376          325                   -51  -13.56%   x 1.16
num::int_log::u16_log10_predictable    500          322                  -178  -35.60%   x 1.55
num::int_log::u16_log10_random         794          405                  -389  -48.99%   x 1.96
num::int_log::u16_log10_random_small   1,035        405                  -630  -60.87%   x 2.56
num::int_log::u32_log10_predictable    1,144        894                  -250  -21.85%   x 1.28
num::int_log::u32_log10_random         832          786                   -46   -5.53%   x 1.06
num::int_log::u32_log10_random_small   832          787                   -45   -5.41%   x 1.06
num::int_log::u64_log10_predictable    2,681        2,057                -624  -23.27%   x 1.30
num::int_log::u64_log10_random         1,015        806                  -209  -20.59%   x 1.26
num::int_log::u64_log10_random_small   1,004        795                  -209  -20.82%   x 1.26
num::int_log::u128_log10_predictable   56,825       56,526               -299   -0.53%   x 1.01
num::int_log::u128_log10_random        9,056        8,861                -195   -2.15%   x 1.02
num::int_log::u128_log10_random_small  1,528        1,527                  -1   -0.07%   x 1.00

The 128 bit case remains ridiculously slow because llvm fails to optimize division by
a constant 128-bit value to multiplications. This could be worked around but it seems
preferable to fix this in llvm.

From u32 up, table lookup (like suggested here
https://github.com/rust-lang/rust/issues/70887#issuecomment-881099813) is still
faster, but requires a hardware leading_zero to be viable, and might clog up the
cache.
2021-09-09 18:14:47 +02:00

157 lines
4.8 KiB
Rust

//! This tests the `Integer::{log,log2,log10}` methods. These tests are in a
//! separate file because there's both a large number of them, and not all tests
//! can be run on Android. This is because in Android `log2` uses an imprecise
//! approximation:https://github.com/rust-lang/rust/blob/4825e12fc9c79954aa0fe18f5521efa6c19c7539/src/libstd/sys/unix/android.rs#L27-L53
#[test]
fn checked_log() {
assert_eq!(999u32.checked_log(10), Some(2));
assert_eq!(1000u32.checked_log(10), Some(3));
assert_eq!(555u32.checked_log(13), Some(2));
assert_eq!(63u32.checked_log(4), Some(2));
assert_eq!(64u32.checked_log(4), Some(3));
assert_eq!(10460353203u64.checked_log(3), Some(21));
assert_eq!(10460353202u64.checked_log(3), Some(20));
assert_eq!(147808829414345923316083210206383297601u128.checked_log(3), Some(80));
assert_eq!(147808829414345923316083210206383297600u128.checked_log(3), Some(79));
assert_eq!(22528399544939174411840147874772641u128.checked_log(19683), Some(8));
assert_eq!(22528399544939174411840147874772631i128.checked_log(19683), Some(7));
assert_eq!(0u8.checked_log(4), None);
assert_eq!(0u16.checked_log(4), None);
assert_eq!(0i8.checked_log(4), None);
assert_eq!(0i16.checked_log(4), None);
for i in i16::MIN..=0 {
assert_eq!(i.checked_log(4), None);
}
for i in 1..=i16::MAX {
assert_eq!(i.checked_log(13), Some((i as f32).log(13.0) as u32));
}
for i in 1..=u16::MAX {
assert_eq!(i.checked_log(13), Some((i as f32).log(13.0) as u32));
}
}
#[test]
fn checked_log2() {
assert_eq!(5u32.checked_log2(), Some(2));
assert_eq!(0u64.checked_log2(), None);
assert_eq!(128i32.checked_log2(), Some(7));
assert_eq!((-55i16).checked_log2(), None);
assert_eq!(0u8.checked_log2(), None);
assert_eq!(0u16.checked_log2(), None);
assert_eq!(0i8.checked_log2(), None);
assert_eq!(0i16.checked_log2(), None);
for i in 1..=u8::MAX {
assert_eq!(i.checked_log2(), Some((i as f32).log2() as u32));
}
for i in 1..=u16::MAX {
// Guard against Android's imprecise f32::log2 implementation.
if i != 8192 && i != 32768 {
assert_eq!(i.checked_log2(), Some((i as f32).log2() as u32));
}
}
for i in i8::MIN..=0 {
assert_eq!(i.checked_log2(), None);
}
for i in 1..=i8::MAX {
assert_eq!(i.checked_log2(), Some((i as f32).log2() as u32));
}
for i in i16::MIN..=0 {
assert_eq!(i.checked_log2(), None);
}
for i in 1..=i16::MAX {
// Guard against Android's imprecise f32::log2 implementation.
if i != 8192 {
assert_eq!(i.checked_log2(), Some((i as f32).log2() as u32));
}
}
}
// Validate cases that fail on Android's imprecise float log2 implementation.
#[test]
#[cfg(not(target_os = "android"))]
fn checked_log2_not_android() {
assert_eq!(8192u16.checked_log2(), Some((8192f32).log2() as u32));
assert_eq!(32768u16.checked_log2(), Some((32768f32).log2() as u32));
assert_eq!(8192i16.checked_log2(), Some((8192f32).log2() as u32));
}
#[test]
fn checked_log10() {
assert_eq!(0u8.checked_log10(), None);
assert_eq!(0u16.checked_log10(), None);
assert_eq!(0i8.checked_log10(), None);
assert_eq!(0i16.checked_log10(), None);
for i in i16::MIN..=0 {
assert_eq!(i.checked_log10(), None);
}
for i in 1..=i16::MAX {
assert_eq!(i.checked_log10(), Some((i as f32).log10() as u32));
}
for i in 1..=u16::MAX {
assert_eq!(i.checked_log10(), Some((i as f32).log10() as u32));
}
for i in 1..=100_000u32 {
assert_eq!(i.checked_log10(), Some((i as f32).log10() as u32));
}
}
macro_rules! log10_loop {
($T:ty, $log10_max:expr) => {
assert_eq!(<$T>::MAX.log10(), $log10_max);
for i in 0..=$log10_max {
let p = (10 as $T).pow(i as u32);
if p >= 10 {
assert_eq!((p - 9).log10(), i - 1);
assert_eq!((p - 1).log10(), i - 1);
}
assert_eq!(p.log10(), i);
assert_eq!((p + 1).log10(), i);
if p >= 10 {
assert_eq!((p + 9).log10(), i);
}
// also check `x.log(10)`
if p >= 10 {
assert_eq!((p - 9).log(10), i - 1);
assert_eq!((p - 1).log(10), i - 1);
}
assert_eq!(p.log(10), i);
assert_eq!((p + 1).log(10), i);
if p >= 10 {
assert_eq!((p + 9).log(10), i);
}
}
};
}
#[test]
fn log10_u8() {
log10_loop! { u8, 2 }
}
#[test]
fn log10_u16() {
log10_loop! { u16, 4 }
}
#[test]
fn log10_u32() {
log10_loop! { u32, 9 }
}
#[test]
fn log10_u64() {
log10_loop! { u64, 19 }
}
#[test]
fn log10_u128() {
log10_loop! { u128, 38 }
}