From c2258d6d042353bfa6daad6008bcd3c0af3f13de Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Wed, 8 Apr 2015 14:25:22 +1000 Subject: [PATCH] Optimise Iterator::{max, max_by, min, min_by}. The main change in this patch is removing the use of `Option` inside the inner loops of those functions to avoid comparisons where one branch will only trigger on the first pass through the loop. The included benchmarks go from: test bench_max ... bench: 372 ns/iter (+/- 118) test bench_max_by ... bench: 428 ns/iter (+/- 33) test bench_max_by2 ... bench: 7128 ns/iter (+/- 326) to: test bench_max ... bench: 317 ns/iter (+/- 64) test bench_max_by ... bench: 356 ns/iter (+/- 270) test bench_max_by2 ... bench: 1387 ns/iter (+/- 183) Problem noticed in http://www.reddit.com/r/rust/comments/31syce/using_iterators_to_find_the_index_of_the_min_or/ --- src/libcore/iter.rs | 93 +++++++++++++++++++++++++---------------- src/libcoretest/iter.rs | 31 ++++++++++++++ 2 files changed, 88 insertions(+), 36 deletions(-) diff --git a/src/libcore/iter.rs b/src/libcore/iter.rs index 939d9b4ab0e..8feae0bfb09 100644 --- a/src/libcore/iter.rs +++ b/src/libcore/iter.rs @@ -743,12 +743,12 @@ pub trait Iterator { #[stable(feature = "rust1", since = "1.0.0")] fn max(self) -> Option where Self: Sized, Self::Item: Ord { - self.fold(None, |max, y| { - match max { - None => Some(y), - Some(x) => Some(cmp::max(x, y)) - } - }) + select_fold1(self, + |_| (), + // switch to y even if it is only equal, to preserve + // stability. + |_, x, _, y| *x <= *y) + .map(|(_, x)| x) } /// Consumes the entire iterator to return the minimum element. @@ -766,12 +766,12 @@ pub trait Iterator { #[stable(feature = "rust1", since = "1.0.0")] fn min(self) -> Option where Self: Sized, Self::Item: Ord { - self.fold(None, |min, y| { - match min { - None => Some(y), - Some(x) => Some(cmp::min(x, y)) - } - }) + select_fold1(self, + |_| (), + // only switch to y if it is strictly smaller, to + // preserve stability. + |_, x, _, y| *x > *y) + .map(|(_, x)| x) } /// `min_max` finds the minimum and maximum elements in the iterator. @@ -869,21 +869,16 @@ pub trait Iterator { #[inline] #[unstable(feature = "core", reason = "may want to produce an Ordering directly; see #15311")] - fn max_by(self, mut f: F) -> Option where + fn max_by(self, f: F) -> Option where Self: Sized, F: FnMut(&Self::Item) -> B, { - self.fold(None, |max: Option<(Self::Item, B)>, y| { - let y_val = f(&y); - match max { - None => Some((y, y_val)), - Some((x, x_val)) => if y_val >= x_val { - Some((y, y_val)) - } else { - Some((x, x_val)) - } - } - }).map(|(x, _)| x) + select_fold1(self, + f, + // switch to y even if it is only equal, to preserve + // stability. + |x_p, _, y_p, _| x_p <= y_p) + .map(|(_, x)| x) } /// Return the element that gives the minimum value from the @@ -903,21 +898,16 @@ pub trait Iterator { #[inline] #[unstable(feature = "core", reason = "may want to produce an Ordering directly; see #15311")] - fn min_by(self, mut f: F) -> Option where + fn min_by(self, f: F) -> Option where Self: Sized, F: FnMut(&Self::Item) -> B, { - self.fold(None, |min: Option<(Self::Item, B)>, y| { - let y_val = f(&y); - match min { - None => Some((y, y_val)), - Some((x, x_val)) => if x_val <= y_val { - Some((x, x_val)) - } else { - Some((y, y_val)) - } - } - }).map(|(x, _)| x) + select_fold1(self, + f, + // only switch to y if it is strictly smaller, to + // preserve stability. + |x_p, _, y_p, _| x_p > y_p) + .map(|(_, x)| x) } /// Change the direction of the iterator @@ -1024,6 +1014,37 @@ pub trait Iterator { } } +/// Select an element from an iterator based on the given projection +/// and "comparison" function. +/// +/// This is an idiosyncratic helper to try to factor out the +/// commonalities of {max,min}{,_by}. In particular, this avoids +/// having to implement optimisations several times. +#[inline] +fn select_fold1(mut it: I, + mut f_proj: FProj, + mut f_cmp: FCmp) -> Option<(B, I::Item)> + where I: Iterator, + FProj: FnMut(&I::Item) -> B, + FCmp: FnMut(&B, &I::Item, &B, &I::Item) -> bool +{ + // start with the first element as our selection. This avoids + // having to use `Option`s inside the loop, translating to a + // sizeable performance gain (6x in one case). + it.next().map(|mut sel| { + let mut sel_p = f_proj(&sel); + + for x in it { + let x_p = f_proj(&x); + if f_cmp(&sel_p, &sel, &x_p, &x) { + sel = x; + sel_p = x_p; + } + } + (sel_p, sel) + }) +} + #[stable(feature = "rust1", since = "1.0.0")] impl<'a, I: Iterator + ?Sized> Iterator for &'a mut I { type Item = I::Item; diff --git a/src/libcoretest/iter.rs b/src/libcoretest/iter.rs index af80d347f02..ef050d46e1f 100644 --- a/src/libcoretest/iter.rs +++ b/src/libcoretest/iter.rs @@ -901,3 +901,34 @@ fn bench_multiple_take(b: &mut Bencher) { } }); } + +fn scatter(x: i32) -> i32 { (x * 31) % 127 } + +#[bench] +fn bench_max_by(b: &mut Bencher) { + b.iter(|| { + let it = 0..100; + it.max_by(|&x| scatter(x)) + }) +} + +// http://www.reddit.com/r/rust/comments/31syce/using_iterators_to_find_the_index_of_the_min_or/ +#[bench] +fn bench_max_by2(b: &mut Bencher) { + fn max_index_iter(array: &[i32]) -> usize { + array.iter().enumerate().max_by(|&(_, item)| item).unwrap().0 + } + + let mut data = vec![0i32; 1638]; + data[514] = 9999; + + b.iter(|| max_index_iter(&data)); +} + +#[bench] +fn bench_max(b: &mut Bencher) { + b.iter(|| { + let it = 0..100; + it.map(scatter).max() + }) +}