Rollup merge of #103166 - the8472:copied-next-chunk, r=m-ou-se

Optimize `slice_iter.copied().next_chunk()`

```
OLD:
test iter::bench_copied_array_chunks                               ... bench:         371 ns/iter (+/- 7)
NEW:
test iter::bench_copied_array_chunks                               ... bench:          31 ns/iter (+/- 0)
```

The default `next_chunk` implementation suffers from having to assemble the array byte by byte via `next()`, checking the `Option<&T>` and then dereferencing `&T`. The specialization copies the chunk directly from the slice.
This commit is contained in:
Matthias Krüger 2022-10-19 07:15:30 +02:00 committed by GitHub
commit d6eb7bca09
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 95 additions and 0 deletions

View File

@ -1,4 +1,6 @@
use core::iter::*;
use core::mem;
use core::num::Wrapping;
use test::{black_box, Bencher};
#[bench]
@ -398,3 +400,21 @@ fn bench_trusted_random_access_adapters(b: &mut Bencher) {
acc
})
}
/// Exercises the iter::Copied specialization for slice::Iter
#[bench]
fn bench_copied_array_chunks(b: &mut Bencher) {
let v = vec![1u8; 1024];
b.iter(|| {
black_box(&v)
.iter()
.copied()
.array_chunks::<{ mem::size_of::<u64>() }>()
.map(|ary| {
let d = u64::from_ne_bytes(ary);
Wrapping(d.rotate_left(7).wrapping_add(1))
})
.sum::<Wrapping<u64>>()
})
}

View File

@ -4,6 +4,7 @@
#![feature(int_log)]
#![feature(test)]
#![feature(trusted_random_access)]
#![feature(iter_array_chunks)]
extern crate test;

View File

@ -2,7 +2,10 @@
zip::try_get_unchecked, TrustedRandomAccess, TrustedRandomAccessNoCoerce,
};
use crate::iter::{FusedIterator, TrustedLen};
use crate::mem::MaybeUninit;
use crate::mem::SizedTypeProperties;
use crate::ops::Try;
use crate::{array, ptr};
/// An iterator that copies the elements of an underlying iterator.
///
@ -44,6 +47,15 @@ fn next(&mut self) -> Option<T> {
self.it.next().copied()
}
fn next_chunk<const N: usize>(
&mut self,
) -> Result<[Self::Item; N], array::IntoIter<Self::Item, N>>
where
Self: Sized,
{
<I as SpecNextChunk<'_, N, T>>::spec_next_chunk(&mut self.it)
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.it.size_hint()
}
@ -166,3 +178,65 @@ unsafe impl<'a, I, T: 'a> TrustedLen for Copied<I>
T: Copy,
{
}
trait SpecNextChunk<'a, const N: usize, T: 'a>: Iterator<Item = &'a T>
where
T: Copy,
{
fn spec_next_chunk(&mut self) -> Result<[T; N], array::IntoIter<T, N>>;
}
impl<'a, const N: usize, I, T: 'a> SpecNextChunk<'a, N, T> for I
where
I: Iterator<Item = &'a T>,
T: Copy,
{
default fn spec_next_chunk(&mut self) -> Result<[T; N], array::IntoIter<T, N>> {
array::iter_next_chunk(&mut self.map(|e| *e))
}
}
impl<'a, const N: usize, T: 'a> SpecNextChunk<'a, N, T> for crate::slice::Iter<'a, T>
where
T: Copy,
{
fn spec_next_chunk(&mut self) -> Result<[T; N], array::IntoIter<T, N>> {
let mut raw_array = MaybeUninit::uninit_array();
let len = self.len();
if T::IS_ZST {
if len < N {
let _ = self.advance_by(len);
// SAFETY: ZSTs can be conjured ex nihilo; only the amount has to be correct
return Err(unsafe { array::IntoIter::new_unchecked(raw_array, 0..len) });
}
let _ = self.advance_by(N);
// SAFETY: ditto
return Ok(unsafe { MaybeUninit::array_assume_init(raw_array) });
}
if len < N {
// SAFETY: `len` indicates that this many elements are available and we just checked that
// it fits into the array.
unsafe {
ptr::copy_nonoverlapping(
self.as_ref().as_ptr(),
raw_array.as_mut_ptr() as *mut T,
len,
);
let _ = self.advance_by(len);
return Err(array::IntoIter::new_unchecked(raw_array, 0..len));
}
}
// SAFETY: `len` is larger than the array size. Copy a fixed amount here to fully initialize
// the array.
unsafe {
ptr::copy_nonoverlapping(self.as_ref().as_ptr(), raw_array.as_mut_ptr() as *mut T, N);
let _ = self.advance_by(N);
Ok(MaybeUninit::array_assume_init(raw_array))
}
}
}