Specialize StepBy<Range<{integer}>>

For ranges < usize we determine the number of items
StepBy would yield and then store that in the range.end
instead of the actual end. This significantly
simplifies calculation of the loop induction variable
especially in cases where StepBy::step (an usize)
could overflow the Range's item type
This commit is contained in:
The 8472 2023-05-22 16:14:19 +02:00
parent 4051305389
commit 070ce235f2
3 changed files with 430 additions and 38 deletions

View File

@ -2,6 +2,7 @@ use core::borrow::Borrow;
use core::iter::*;
use core::mem;
use core::num::Wrapping;
use core::ops::Range;
use test::{black_box, Bencher};
#[bench]
@ -69,6 +70,57 @@ fn bench_max(b: &mut Bencher) {
})
}
#[bench]
fn bench_range_step_by_sum_reducible(b: &mut Bencher) {
let r = 0u32..1024;
b.iter(|| {
let r = black_box(r.clone()).step_by(8);
let mut sum: u32 = 0;
for i in r {
sum += i;
}
sum
})
}
#[bench]
fn bench_range_step_by_loop_u32(b: &mut Bencher) {
let r = 0..(u16::MAX as u32);
b.iter(|| {
let r = black_box(r.clone()).step_by(64);
let mut sum: u32 = 0;
for i in r {
let i = i ^ i.wrapping_sub(1);
sum = sum.wrapping_add(i);
}
sum
})
}
#[bench]
fn bench_range_step_by_fold_usize(b: &mut Bencher) {
let r: Range<usize> = 0..(u16::MAX as usize);
b.iter(|| {
let r = black_box(r.clone());
r.step_by(64)
.map(|x: usize| x ^ (x.wrapping_sub(1)))
.fold(0usize, |acc, i| acc.wrapping_add(i))
})
}
#[bench]
fn bench_range_step_by_fold_u16(b: &mut Bencher) {
let r: Range<u16> = 0..u16::MAX;
b.iter(|| {
let r = black_box(r.clone());
r.step_by(64).map(|x: u16| x ^ (x.wrapping_sub(1))).fold(0u16, |acc, i| acc.wrapping_add(i))
})
}
pub fn copy_zip(xs: &[u8], ys: &mut [u8]) {
for (a, b) in ys.iter_mut().zip(xs) {
*a = *b;

View File

@ -1,4 +1,9 @@
use crate::{intrinsics, iter::from_fn, ops::Try};
use crate::convert::TryFrom;
use crate::{
intrinsics,
iter::from_fn,
ops::{Range, Try},
};
/// An iterator for stepping iterators by a custom amount.
///
@ -17,8 +22,10 @@ pub struct StepBy<I> {
}
impl<I> StepBy<I> {
#[inline]
pub(in crate::iter) fn new(iter: I, step: usize) -> StepBy<I> {
assert!(step != 0);
let iter = <I as SpecRangeSetup<I>>::setup(iter, step);
StepBy { iter, step: step - 1, first_take: true }
}
}
@ -32,16 +39,154 @@ where
#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.first_take {
self.first_take = false;
self.iter.next()
} else {
self.iter.nth(self.step)
}
self.spec_next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.spec_size_hint()
}
#[inline]
fn nth(&mut self, n: usize) -> Option<Self::Item> {
self.spec_nth(n)
}
fn try_fold<Acc, F, R>(&mut self, acc: Acc, f: F) -> R
where
F: FnMut(Acc, Self::Item) -> R,
R: Try<Output = Acc>,
{
self.spec_try_fold(acc, f)
}
#[inline]
fn fold<Acc, F>(self, acc: Acc, f: F) -> Acc
where
F: FnMut(Acc, Self::Item) -> Acc,
{
self.spec_fold(acc, f)
}
}
impl<I> StepBy<I>
where
I: ExactSizeIterator,
{
// The zero-based index starting from the end of the iterator of the
// last element. Used in the `DoubleEndedIterator` implementation.
fn next_back_index(&self) -> usize {
let rem = self.iter.len() % (self.step + 1);
if self.first_take {
if rem == 0 { self.step } else { rem - 1 }
} else {
rem
}
}
}
#[stable(feature = "double_ended_step_by_iterator", since = "1.38.0")]
impl<I> DoubleEndedIterator for StepBy<I>
where
I: DoubleEndedIterator + ExactSizeIterator,
{
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
self.spec_next_back()
}
#[inline]
fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
self.spec_nth_back(n)
}
fn try_rfold<Acc, F, R>(&mut self, init: Acc, f: F) -> R
where
F: FnMut(Acc, Self::Item) -> R,
R: Try<Output = Acc>,
{
self.spec_try_rfold(init, f)
}
#[inline]
fn rfold<Acc, F>(self, init: Acc, f: F) -> Acc
where
Self: Sized,
F: FnMut(Acc, Self::Item) -> Acc,
{
self.spec_rfold(init, f)
}
}
// StepBy can only make the iterator shorter, so the len will still fit.
#[stable(feature = "iterator_step_by", since = "1.28.0")]
impl<I> ExactSizeIterator for StepBy<I> where I: ExactSizeIterator {}
trait SpecRangeSetup<T> {
fn setup(inner: T, step: usize) -> T;
}
impl<T> SpecRangeSetup<T> for T {
#[inline]
default fn setup(inner: T, _step: usize) -> T {
inner
}
}
trait StepByImpl<I> {
type Item;
fn spec_next(&mut self) -> Option<Self::Item>;
fn spec_size_hint(&self) -> (usize, Option<usize>);
fn spec_nth(&mut self, n: usize) -> Option<Self::Item>;
fn spec_try_fold<Acc, F, R>(&mut self, acc: Acc, f: F) -> R
where
F: FnMut(Acc, Self::Item) -> R,
R: Try<Output = Acc>;
fn spec_fold<Acc, F>(self, acc: Acc, f: F) -> Acc
where
F: FnMut(Acc, Self::Item) -> Acc;
}
trait StepByBackImpl<I> {
type Item;
fn spec_next_back(&mut self) -> Option<Self::Item>
where
I: DoubleEndedIterator + ExactSizeIterator;
fn spec_nth_back(&mut self, n: usize) -> Option<Self::Item>
where
I: DoubleEndedIterator + ExactSizeIterator;
fn spec_try_rfold<Acc, F, R>(&mut self, init: Acc, f: F) -> R
where
I: DoubleEndedIterator + ExactSizeIterator,
F: FnMut(Acc, Self::Item) -> R,
R: Try<Output = Acc>;
fn spec_rfold<Acc, F>(self, init: Acc, f: F) -> Acc
where
I: DoubleEndedIterator + ExactSizeIterator,
F: FnMut(Acc, Self::Item) -> Acc;
}
impl<I: Iterator> StepByImpl<I> for StepBy<I> {
type Item = I::Item;
#[inline]
default fn spec_next(&mut self) -> Option<I::Item> {
let step_size = if self.first_take { 0 } else { self.step };
self.first_take = false;
self.iter.nth(step_size)
}
#[inline]
default fn spec_size_hint(&self) -> (usize, Option<usize>) {
#[inline]
fn first_size(step: usize) -> impl Fn(usize) -> usize {
move |n| if n == 0 { 0 } else { 1 + (n - 1) / (step + 1) }
@ -64,7 +209,7 @@ where
}
#[inline]
fn nth(&mut self, mut n: usize) -> Option<Self::Item> {
default fn spec_nth(&mut self, mut n: usize) -> Option<I::Item> {
if self.first_take {
self.first_take = false;
let first = self.iter.next();
@ -108,7 +253,7 @@ where
}
}
fn try_fold<Acc, F, R>(&mut self, mut acc: Acc, mut f: F) -> R
default fn spec_try_fold<Acc, F, R>(&mut self, mut acc: Acc, mut f: F) -> R
where
F: FnMut(Acc, Self::Item) -> R,
R: Try<Output = Acc>,
@ -128,7 +273,7 @@ where
from_fn(nth(&mut self.iter, self.step)).try_fold(acc, f)
}
fn fold<Acc, F>(mut self, mut acc: Acc, mut f: F) -> Acc
default fn spec_fold<Acc, F>(mut self, mut acc: Acc, mut f: F) -> Acc
where
F: FnMut(Acc, Self::Item) -> Acc,
{
@ -148,34 +293,16 @@ where
}
}
impl<I> StepBy<I>
where
I: ExactSizeIterator,
{
// The zero-based index starting from the end of the iterator of the
// last element. Used in the `DoubleEndedIterator` implementation.
fn next_back_index(&self) -> usize {
let rem = self.iter.len() % (self.step + 1);
if self.first_take {
if rem == 0 { self.step } else { rem - 1 }
} else {
rem
}
}
}
impl<I: DoubleEndedIterator + ExactSizeIterator> StepByBackImpl<I> for StepBy<I> {
type Item = I::Item;
#[stable(feature = "double_ended_step_by_iterator", since = "1.38.0")]
impl<I> DoubleEndedIterator for StepBy<I>
where
I: DoubleEndedIterator + ExactSizeIterator,
{
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
default fn spec_next_back(&mut self) -> Option<Self::Item> {
self.iter.nth_back(self.next_back_index())
}
#[inline]
fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
default fn spec_nth_back(&mut self, n: usize) -> Option<I::Item> {
// `self.iter.nth_back(usize::MAX)` does the right thing here when `n`
// is out of bounds because the length of `self.iter` does not exceed
// `usize::MAX` (because `I: ExactSizeIterator`) and `nth_back` is
@ -184,7 +311,7 @@ where
self.iter.nth_back(n)
}
fn try_rfold<Acc, F, R>(&mut self, init: Acc, mut f: F) -> R
default fn spec_try_rfold<Acc, F, R>(&mut self, init: Acc, mut f: F) -> R
where
F: FnMut(Acc, Self::Item) -> R,
R: Try<Output = Acc>,
@ -207,10 +334,10 @@ where
}
#[inline]
fn rfold<Acc, F>(mut self, init: Acc, mut f: F) -> Acc
default fn spec_rfold<Acc, F>(mut self, init: Acc, mut f: F) -> Acc
where
Self: Sized,
F: FnMut(Acc, Self::Item) -> Acc,
F: FnMut(Acc, I::Item) -> Acc,
{
#[inline]
fn nth_back<I: DoubleEndedIterator>(
@ -230,6 +357,164 @@ where
}
}
// StepBy can only make the iterator shorter, so the len will still fit.
#[stable(feature = "iterator_step_by", since = "1.28.0")]
impl<I> ExactSizeIterator for StepBy<I> where I: ExactSizeIterator {}
macro_rules! spec_int_ranges {
($($t:ty)*) => ($(
const _: () = assert!(usize::BITS >= <$t>::BITS);
impl SpecRangeSetup<Range<$t>> for Range<$t> {
#[inline]
fn setup(mut r: Range<$t>, step: usize) -> Range<$t> {
let inner_len = r.size_hint().0;
// If step exceeds $t::MAX, then the count will be at most 1 and
// thus always fit into $t.
let yield_count = inner_len.div_ceil(step);
// Turn the range end into an iteration counter
r.end = yield_count as $t;
r
}
}
impl StepByImpl<Range<$t>> for StepBy<Range<$t>> {
#[inline]
fn spec_next(&mut self) -> Option<$t> {
// if a step size larger than the type has been specified fall back to
// t::MAX, in which case remaining will be at most 1.
// The `+ 1` can't overflow since the constructor substracted 1 from the original value.
let step = <$t>::try_from(self.step + 1).unwrap_or(<$t>::MAX);
let remaining = self.iter.end;
if remaining > 0 {
let val = self.iter.start;
// this can only overflow during the last step, after which the value
// will not be used
self.iter.start = val.wrapping_add(step);
self.iter.end = remaining - 1;
Some(val)
} else {
None
}
}
fn spec_size_hint(&self) -> (usize, Option<usize>) {
let remaining = self.iter.end as usize;
(remaining, Some(remaining))
}
// The methods below are all copied from the Iterator trait default impls.
// We have to repeat them here so that the specialization overrides the StepByImpl defaults
fn spec_nth(&mut self, n: usize) -> Option<Self::Item> {
self.advance_by(n).ok()?;
self.next()
}
fn spec_try_fold<Acc, F, R>(&mut self, init: Acc, mut f: F) -> R
where
F: FnMut(Acc, Self::Item) -> R,
R: Try<Output = Acc>
{
let mut accum = init;
while let Some(x) = self.next() {
accum = f(accum, x)?;
}
try { accum }
}
#[inline]
fn spec_fold<Acc, F>(self, init: Acc, mut f: F) -> Acc
where
F: FnMut(Acc, Self::Item) -> Acc
{
// if a step size larger than the type has been specified fall back to
// t::MAX, in which case remaining will be at most 1.
let step = <$t>::try_from(self.step + 1).unwrap_or(<$t>::MAX);
let remaining = self.iter.end;
let mut acc = init;
let mut val = self.iter.start;
for _ in 0..remaining {
acc = f(acc, val);
// this can only overflow during the last step, after which the value
// will no longer be used
val = val.wrapping_add(step);
}
acc
}
}
)*)
}
macro_rules! spec_int_ranges_r {
($($t:ty)*) => ($(
const _: () = assert!(usize::BITS >= <$t>::BITS);
impl StepByBackImpl<Range<$t>> for StepBy<Range<$t>> {
fn spec_next_back(&mut self) -> Option<Self::Item>
where Range<$t>: DoubleEndedIterator + ExactSizeIterator,
{
let step = (self.step + 1) as $t;
let remaining = self.iter.end;
if remaining > 0 {
let start = self.iter.start;
self.iter.end = remaining - 1;
Some(start + step * (remaining - 1))
} else {
None
}
}
// The methods below are all copied from the Iterator trait default impls.
// We have to repeat them here so that the specialization overrides the StepByImplBack defaults
fn spec_nth_back(&mut self, n: usize) -> Option<Self::Item>
where Self: DoubleEndedIterator,
{
if self.advance_back_by(n).is_err() {
return None;
}
self.next_back()
}
fn spec_try_rfold<Acc, F, R>(&mut self, init: Acc, mut f: F) -> R
where
Self: DoubleEndedIterator,
F: FnMut(Acc, Self::Item) -> R,
R: Try<Output = Acc>
{
let mut accum = init;
while let Some(x) = self.next_back() {
accum = f(accum, x)?;
}
try { accum }
}
fn spec_rfold<Acc, F>(mut self, init: Acc, mut f: F) -> Acc
where
Self: DoubleEndedIterator,
F: FnMut(Acc, Self::Item) -> Acc
{
let mut accum = init;
while let Some(x) = self.next_back() {
accum = f(accum, x);
}
accum
}
}
)*)
}
#[cfg(target_pointer_width = "64")]
spec_int_ranges!(u8 u16 u32 u64 usize);
// DoubleEndedIterator requires ExactSizeIterator, which isn't implemented for Range<u64>
#[cfg(target_pointer_width = "64")]
spec_int_ranges_r!(u8 u16 u32 usize);
#[cfg(target_pointer_width = "32")]
spec_int_ranges!(u8 u16 u32 usize);
#[cfg(target_pointer_width = "32")]
spec_int_ranges_r!(u8 u16 u32 usize);
#[cfg(target_pointer_width = "16")]
spec_int_ranges!(u8 u16 usize);
#[cfg(target_pointer_width = "16")]
spec_int_ranges_r!(u8 u16 usize);

View File

@ -244,3 +244,58 @@ fn test_step_by_skip() {
assert_eq!((0..=50).step_by(10).nth(3), Some(30));
assert_eq!((200..=255u8).step_by(10).nth(3), Some(230));
}
struct DeOpt<I: Iterator>(I);
impl<I: Iterator> Iterator for DeOpt<I> {
type Item = I::Item;
fn next(&mut self) -> core::option::Option<Self::Item> {
self.0.next()
}
}
impl<I: DoubleEndedIterator> DoubleEndedIterator for DeOpt<I> {
fn next_back(&mut self) -> core::option::Option<Self::Item> {
self.0.next_back()
}
}
#[test]
fn test_step_by_fold_range_specialization() {
macro_rules! t {
($range:expr, $var: ident, $body:tt) => {
{
// run the same tests for the non-optimized version
let mut $var = DeOpt($range);
$body
}
{
let mut $var = $range;
$body
}
}
}
t!((1usize..5).step_by(1), r, {
assert_eq!(r.next_back(), Some(4));
assert_eq!(r.sum::<usize>(), 6);
});
t!((0usize..4).step_by(2), r, {
assert_eq!(r.next(), Some(0));
assert_eq!(r.sum::<usize>(), 2);
});
t!((0usize..5).step_by(2), r, {
assert_eq!(r.next(), Some(0));
assert_eq!(r.sum::<usize>(), 6);
});
t!((usize::MAX - 6 .. usize::MAX).step_by(5), r, {
assert_eq!(r.next(), Some(usize::MAX - 6));
assert_eq!(r.sum::<usize>(), usize::MAX - 1);
});
}