Auto merge of #103913 - Neutron3529:patch-1, r=thomcc
Improve performance of `rem_euclid()` for signed integers such code is copy from https://github.com/rust-lang/rust/blob/master/library/std/src/f32.rs and https://github.com/rust-lang/rust/blob/master/library/std/src/f64.rs using `r+rhs.abs()` is faster than calc it with an if clause. Bench result: ``` $ cargo bench Compiling div-euclid v0.1.0 (/me/div-euclid) Finished bench [optimized] target(s) in 1.01s Running unittests src/lib.rs (target/release/deps/div_euclid-7a4530ca7817d1ef) running 7 tests test tests::it_works ... ignored test tests::bench_aaabs ... bench: 10,498,793 ns/iter (+/- 104,360) test tests::bench_aadefault ... bench: 11,061,862 ns/iter (+/- 94,107) test tests::bench_abs ... bench: 10,477,193 ns/iter (+/- 81,942) test tests::bench_default ... bench: 10,622,983 ns/iter (+/- 25,119) test tests::bench_zzabs ... bench: 10,481,971 ns/iter (+/- 43,787) test tests::bench_zzdefault ... bench: 11,074,976 ns/iter (+/- 29,633) test result: ok. 0 passed; 0 failed; 1 ignored; 6 measured; 0 filtered out; finished in 19.35s ``` It seems that, default `rem_euclid` triggered a branch prediction, thus `bench_default` is faster than `bench_aadefault` and `bench_aadefault`, which shuffles the order of calculations. but all of them slower than what it was in `f64`'s and `f32`'s `rem_euclid`, thus I submit this PR. bench code: ```rust #![feature(test)] extern crate test; fn rem_euclid(a:i32,rhs:i32)->i32{ let r = a % rhs; if r < 0 { r + rhs.abs() } else { r } } #[cfg(test)] mod tests { use super::*; use test::Bencher; use rand::prelude::*; use rand::rngs::SmallRng; const N:i32=1000; #[test] fn it_works() { let a: i32 = 7; // or any other integer type let b = 4; let d:Vec<i32>=(-N..=N).collect(); let n:Vec<i32>=(-N..0).chain(1..=N).collect(); for i in &d { for j in &n { assert_eq!(i.rem_euclid(*j),rem_euclid(*i,*j)); } } assert_eq!(rem_euclid(a,b), 3); assert_eq!(rem_euclid(-a,b), 1); assert_eq!(rem_euclid(a,-b), 3); assert_eq!(rem_euclid(-a,-b), 1); } #[bench] fn bench_aaabs(b: &mut Bencher) { let mut d:Vec<i32>=(-N..=N).collect(); let mut n:Vec<i32>=(-N..0).chain(1..=N).collect(); let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]); n.shuffle(&mut rng); d.shuffle(&mut rng); n.shuffle(&mut rng); b.iter(||{ let mut res=0; for i in &d { for j in &n { res+=rem_euclid(*i,*j); } } res }); } #[bench] fn bench_aadefault(b: &mut Bencher) { let mut d:Vec<i32>=(-N..=N).collect(); let mut n:Vec<i32>=(-N..0).chain(1..=N).collect(); let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]); n.shuffle(&mut rng); d.shuffle(&mut rng); n.shuffle(&mut rng); b.iter(||{ let mut res=0; for i in &d { for j in &n { res+=i.rem_euclid(*j); } } res }); } #[bench] fn bench_abs(b: &mut Bencher) { let d:Vec<i32>=(-N..=N).collect(); let n:Vec<i32>=(-N..0).chain(1..=N).collect(); b.iter(||{ let mut res=0; for i in &d { for j in &n { res+=rem_euclid(*i,*j); } } res }); } #[bench] fn bench_default(b: &mut Bencher) { let d:Vec<i32>=(-N..=N).collect(); let n:Vec<i32>=(-N..0).chain(1..=N).collect(); b.iter(||{ let mut res=0; for i in &d { for j in &n { res+=i.rem_euclid(*j); } } res }); } #[bench] fn bench_zzabs(b: &mut Bencher) { let mut d:Vec<i32>=(-N..=N).collect(); let mut n:Vec<i32>=(-N..0).chain(1..=N).collect(); let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]); d.shuffle(&mut rng); n.shuffle(&mut rng); d.shuffle(&mut rng); b.iter(||{ let mut res=0; for i in &d { for j in &n { res+=rem_euclid(*i,*j); } } res }); } #[bench] fn bench_zzdefault(b: &mut Bencher) { let mut d:Vec<i32>=(-N..=N).collect(); let mut n:Vec<i32>=(-N..0).chain(1..=N).collect(); let mut rng=SmallRng::from_seed([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,21]); d.shuffle(&mut rng); n.shuffle(&mut rng); d.shuffle(&mut rng); b.iter(||{ let mut res=0; for i in &d { for j in &n { res+=i.rem_euclid(*j); } } res }); } } ```
This commit is contained in:
commit
6284998a26
@ -2072,11 +2072,15 @@ pub const fn div_euclid(self, rhs: Self) -> Self {
|
||||
pub const fn rem_euclid(self, rhs: Self) -> Self {
|
||||
let r = self % rhs;
|
||||
if r < 0 {
|
||||
if rhs < 0 {
|
||||
r - rhs
|
||||
} else {
|
||||
r + rhs
|
||||
}
|
||||
// Semantically equivalent to `if rhs < 0 { r - rhs } else { r + rhs }`.
|
||||
// If `rhs` is not `Self::MIN`, then `r + abs(rhs)` will not overflow
|
||||
// and is clearly equivalent, because `r` is negative.
|
||||
// Otherwise, `rhs` is `Self::MIN`, then we have
|
||||
// `r.wrapping_add(Self::MIN.wrapping_abs())`, which evaluates
|
||||
// to `r.wrapping_add(Self::MIN)`, which is equivalent to
|
||||
// `r - Self::MIN`, which is what we wanted (and will not overflow
|
||||
// for negative `r`).
|
||||
r.wrapping_add(rhs.wrapping_abs())
|
||||
} else {
|
||||
r
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user