Optimized vec::IntoIter::next_chunk impl
``` test vec::bench_next_chunk ... bench: 696 ns/iter (+/- 22) x86_64v1, pr test vec::bench_next_chunk ... bench: 309 ns/iter (+/- 4) znver2, default test vec::bench_next_chunk ... bench: 17,272 ns/iter (+/- 117) znver2, pr test vec::bench_next_chunk ... bench: 211 ns/iter (+/- 3) ``` The znver2 default impl seems to be slow due to inlining decisions. It goes through `core::array::iter_next_chunk` which has a deeper call tree.
This commit is contained in:
parent
7425fb293f
commit
2f9f2e507e
@ -2,6 +2,7 @@
|
||||
// See https://github.com/rust-lang/rust/issues/73535#event-3477699747
|
||||
#![cfg(not(target_os = "android"))]
|
||||
#![feature(btree_drain_filter)]
|
||||
#![feature(iter_next_chunk)]
|
||||
#![feature(map_first_last)]
|
||||
#![feature(repr_simd)]
|
||||
#![feature(slice_partition_dedup)]
|
||||
|
@ -762,3 +762,23 @@ fn bench_retain_whole_100000(b: &mut Bencher) {
|
||||
let mut v = black_box(vec![826u32; 100000]);
|
||||
b.iter(|| v.retain(|x| *x == 826u32));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_next_chunk(b: &mut Bencher) {
|
||||
let v = vec![13u8; 2048];
|
||||
|
||||
b.iter(|| {
|
||||
const CHUNK: usize = 8;
|
||||
|
||||
let mut sum = [0u32; CHUNK];
|
||||
let mut iter = black_box(v.clone()).into_iter();
|
||||
|
||||
while let Ok(chunk) = iter.next_chunk::<CHUNK>() {
|
||||
for i in 0..CHUNK {
|
||||
sum[i] += chunk[i] as u32;
|
||||
}
|
||||
}
|
||||
|
||||
sum
|
||||
})
|
||||
}
|
||||
|
@ -90,6 +90,7 @@
|
||||
#![feature(alloc_layout_extra)]
|
||||
#![feature(allocator_api)]
|
||||
#![feature(array_chunks)]
|
||||
#![feature(array_into_iter_constructors)]
|
||||
#![feature(array_methods)]
|
||||
#![feature(array_windows)]
|
||||
#![feature(assert_matches)]
|
||||
@ -120,8 +121,11 @@
|
||||
#![feature(hasher_prefixfree_extras)]
|
||||
#![feature(inplace_iteration)]
|
||||
#![feature(iter_advance_by)]
|
||||
#![feature(iter_next_chunk)]
|
||||
#![feature(layout_for_ptr)]
|
||||
#![feature(maybe_uninit_array_assume_init)]
|
||||
#![feature(maybe_uninit_slice)]
|
||||
#![feature(maybe_uninit_uninit_array)]
|
||||
#![cfg_attr(test, feature(new_uninit))]
|
||||
#![feature(nonnull_slice_from_raw_parts)]
|
||||
#![feature(pattern)]
|
||||
|
@ -2,13 +2,14 @@
|
||||
use super::AsVecIntoIter;
|
||||
use crate::alloc::{Allocator, Global};
|
||||
use crate::raw_vec::RawVec;
|
||||
use core::array;
|
||||
use core::fmt;
|
||||
use core::intrinsics::arith_offset;
|
||||
use core::iter::{
|
||||
FusedIterator, InPlaceIterable, SourceIter, TrustedLen, TrustedRandomAccessNoCoerce,
|
||||
};
|
||||
use core::marker::PhantomData;
|
||||
use core::mem::{self, ManuallyDrop};
|
||||
use core::mem::{self, ManuallyDrop, MaybeUninit};
|
||||
#[cfg(not(no_global_oom_handling))]
|
||||
use core::ops::Deref;
|
||||
use core::ptr::{self, NonNull};
|
||||
@ -124,7 +125,6 @@ pub(super) fn forget_allocation_drop_remaining(&mut self) {
|
||||
}
|
||||
|
||||
/// Forgets to Drop the remaining elements while still allowing the backing allocation to be freed.
|
||||
#[cfg(not(no_global_oom_handling))]
|
||||
pub(crate) fn forget_remaining_elements(&mut self) {
|
||||
self.ptr = self.end;
|
||||
}
|
||||
@ -204,6 +204,43 @@ fn count(self) -> usize {
|
||||
self.len()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn next_chunk<const N: usize>(&mut self) -> Result<[T; N], core::array::IntoIter<T, N>> {
|
||||
let mut raw_ary = MaybeUninit::uninit_array();
|
||||
|
||||
let len = self.len();
|
||||
|
||||
if mem::size_of::<T>() == 0 {
|
||||
if len < N {
|
||||
self.forget_remaining_elements();
|
||||
// Safety: ZSTs can be conjured ex nihilo, only the amount has to be correct
|
||||
return Err(unsafe { array::IntoIter::new_unchecked(raw_ary, 0..len) });
|
||||
}
|
||||
|
||||
self.ptr = unsafe { arith_offset(self.ptr as *const i8, N as isize) as *mut T };
|
||||
// Safety: ditto
|
||||
return Ok(unsafe { MaybeUninit::array_assume_init(raw_ary) });
|
||||
}
|
||||
|
||||
if len < N {
|
||||
// Safety: `len` indicates that this many elements are available and we just checked that
|
||||
// it fits into the array.
|
||||
unsafe {
|
||||
ptr::copy_nonoverlapping(self.ptr, raw_ary.as_mut_ptr() as *mut T, len);
|
||||
self.forget_remaining_elements();
|
||||
return Err(array::IntoIter::new_unchecked(raw_ary, 0..len));
|
||||
}
|
||||
}
|
||||
|
||||
// Safety: `len` is larger than the array size. Copy a fixed amount here to fully initialize
|
||||
// the array.
|
||||
return unsafe {
|
||||
ptr::copy_nonoverlapping(self.ptr, raw_ary.as_mut_ptr() as *mut T, N);
|
||||
self.ptr = self.ptr.add(N);
|
||||
Ok(MaybeUninit::array_assume_init(raw_ary))
|
||||
};
|
||||
}
|
||||
|
||||
unsafe fn __iterator_get_unchecked(&mut self, i: usize) -> Self::Item
|
||||
where
|
||||
Self: TrustedRandomAccessNoCoerce,
|
||||
|
Loading…
Reference in New Issue
Block a user