Optimize vec::retain performance

This simply moves the loops into the inner function which leads to better results.


```
old:

test vec::bench_retain_100000                            ... bench:     203,828 ns/iter (+/- 2,101)
test vec::bench_retain_iter_100000                       ... bench:      63,324 ns/iter (+/- 12,305)
test vec::bench_retain_whole_100000                      ... bench:      42,989 ns/iter (+/- 291)


new:

test vec::bench_retain_100000                            ... bench:      42,180 ns/iter (+/- 451)
test vec::bench_retain_iter_100000                       ... bench:      65,167 ns/iter (+/- 11,971)
test vec::bench_retain_whole_100000                      ... bench:      33,736 ns/iter (+/- 12,404)
```
This commit is contained in:
The 8472 2021-12-04 16:19:22 +01:00
parent d0f38cc4b4
commit 67180ef056

View File

@ -1520,49 +1520,46 @@ fn drop(&mut self) {
let mut g = BackshiftOnDrop { v: self, processed_len: 0, deleted_cnt: 0, original_len };
// process_one return a bool indicates whether the processing element should be retained.
#[inline(always)]
fn process_one<F, T, A: Allocator, const DELETED: bool>(
fn process_loop<F, T, A: Allocator, const DELETED: bool>(
original_len: usize,
f: &mut F,
g: &mut BackshiftOnDrop<'_, T, A>,
) -> bool
where
) where
F: FnMut(&mut T) -> bool,
{
// SAFETY: Unchecked element must be valid.
let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.processed_len) };
if !f(cur) {
// Advance early to avoid double drop if `drop_in_place` panicked.
g.processed_len += 1;
g.deleted_cnt += 1;
// SAFETY: We never touch this element again after dropped.
unsafe { ptr::drop_in_place(cur) };
// We already advanced the counter.
return false;
}
if DELETED {
// SAFETY: `deleted_cnt` > 0, so the hole slot must not overlap with current element.
// We use copy for move, and never touch this element again.
unsafe {
let hole_slot = g.v.as_mut_ptr().add(g.processed_len - g.deleted_cnt);
ptr::copy_nonoverlapping(cur, hole_slot, 1);
while g.processed_len != original_len {
// SAFETY: Unchecked element must be valid.
let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.processed_len) };
if !f(cur) {
// Advance early to avoid double drop if `drop_in_place` panicked.
g.processed_len += 1;
g.deleted_cnt += 1;
// SAFETY: We never touch this element again after dropped.
unsafe { ptr::drop_in_place(cur) };
// We already advanced the counter.
if DELETED {
continue;
} else {
break;
}
}
if DELETED {
// SAFETY: `deleted_cnt` > 0, so the hole slot must not overlap with current element.
// We use copy for move, and never touch this element again.
unsafe {
let hole_slot = g.v.as_mut_ptr().add(g.processed_len - g.deleted_cnt);
ptr::copy_nonoverlapping(cur, hole_slot, 1);
}
}
g.processed_len += 1;
}
g.processed_len += 1;
return true;
}
// Stage 1: Nothing was deleted.
while g.processed_len != original_len {
if !process_one::<F, T, A, false>(&mut f, &mut g) {
break;
}
}
process_loop::<F, T, A, false>(original_len, &mut f, &mut g);
// Stage 2: Some elements were deleted.
while g.processed_len != original_len {
process_one::<F, T, A, true>(&mut f, &mut g);
}
process_loop::<F, T, A, true>(original_len, &mut f, &mut g);
// All item are processed. This can be optimized to `set_len` by LLVM.
drop(g);