Vec::dedup optimization - panic gracefully

This commit is contained in:
Soveu 2021-02-17 17:21:12 +01:00
parent 1825810a89
commit c114894b90

View File

@ -1523,43 +1523,92 @@ pub fn dedup_by<F>(&mut self, mut same_bucket: F)
return;
}
let ptr = self.as_mut_ptr();
/* Offset of the element we want to check if it is duplicate */
let mut read: usize = 1;
/* Offset of the place where we want to place the non-duplicate
* when we find it. */
let mut write: usize = 1;
/* INVARIANT: vec.len() > read >= write > write-1 >= 0 */
struct FillGapOnDrop<'a, T, A: core::alloc::Allocator> {
/* Offset of the element we want to check if it is duplicate */
read: usize,
/* Offset of the place where we want to place the non-duplicate
* when we find it. */
write: usize,
/* The Vec that would need correction if `same_bucket` panicked */
vec: &'a mut Vec<T, A>,
}
impl<'a, T, A: core::alloc::Allocator> Drop for FillGapOnDrop<'a, T, A> {
fn drop(&mut self) {
/* This code gets executed either at the end of `dedup_by` or
* when `same_bucket` panics */
/* SAFETY (if finishing successfully): self.read == len, so
* no data is copied and length is set correctly */
/* SAFETY (if panicing): invariant guarantees that `read - write`
* and `len - read` never overflow and that the copy is always
* in-bounds. */
unsafe {
let ptr = self.vec.as_mut_ptr();
let len = self.vec.len();
/* How many items were left when `same_bucket` paniced.
* Basically vec[read..].len() */
let items_left = len - self.read;
/* Pointer to first item in vec[write..write+items_left] slice */
let dropped_ptr = ptr.add(self.write);
/* Pointer to first item in vec[read..] slice */
let valid_ptr = ptr.add(self.read);
/* Copy `vec[read..]` to `vec[write..write+items_left]`.
* The slices can overlap, so `copy_nonoverlapping` cannot be used */
ptr::copy(valid_ptr, dropped_ptr, items_left);
/* How many items have been already dropped
* Basically vec[read..write].len() */
let dropped = self.read - self.write;
self.vec.set_len(len - dropped);
}
}
}
let mut gap = FillGapOnDrop { read: 1, write: 1, vec: self };
let ptr = gap.vec.as_mut_ptr();
/* Drop items while going through Vec, it should be more efficient than
* doing slice partition_dedup + truncate */
/* INVARIANT: len > read >= write > write-1 >= 0
* SAFETY: Because of the invariant, read_ptr, prev_ptr and write_ptr
/* SAFETY: Because of the invariant, read_ptr, prev_ptr and write_ptr
* are always in-bounds and read_ptr never aliases prev_ptr */
unsafe {
while read < len {
let read_ptr = ptr.add(read);
let prev_ptr = ptr.add(write.wrapping_sub(1));
while gap.read < len {
let read_ptr = ptr.add(gap.read);
let prev_ptr = ptr.add(gap.write.wrapping_sub(1));
if same_bucket(&mut *read_ptr, &mut *prev_ptr) {
/* We have found duplicate, drop it in-place */
ptr::drop_in_place(read_ptr);
} else {
let write_ptr = ptr.add(write);
let write_ptr = ptr.add(gap.write);
/* Looks like doing just `copy` can be faster than
* conditional `copy_nonoverlapping` */
ptr::copy(read_ptr, write_ptr, 1);
/* We have filled that place, so go further */
write += 1;
gap.write += 1;
}
read += 1;
gap.read += 1;
}
/* `write` items are inside vec, rest is already dropped */
self.set_len(write);
/* Technically we could let `gap` clean up with its Drop, but
* when `same_bucket` is guaranteed to not panic, this bloats a little
* the codegen, so we just do it manually */
gap.vec.set_len(gap.write);
mem::forget(gap);
}
}