diff --git a/library/alloc/benches/vec.rs b/library/alloc/benches/vec.rs index c1d3e1bdfe7..8ebfe313dc5 100644 --- a/library/alloc/benches/vec.rs +++ b/library/alloc/benches/vec.rs @@ -658,13 +658,17 @@ fn random_sorted_fill(mut seed: u32, buf: &mut [u32]) { buf.sort(); } -fn bench_vec_dedup_old(b: &mut Bencher, sz: usize) { +// Measures performance of slice dedup impl. +// This was used to justify separate implementation of dedup for Vec. +// This algorithm was used for Vecs prior to Rust 1.52. +fn bench_dedup_slice_truncate(b: &mut Bencher, sz: usize) { let mut template = vec![0u32; sz]; b.bytes = std::mem::size_of_val(template.as_slice()) as u64; random_sorted_fill(0x43, &mut template); let mut vec = template.clone(); b.iter(|| { + let vec = black_box(&mut vec); let len = { let (dedup, _) = vec.partition_dedup(); dedup.len() @@ -672,59 +676,143 @@ fn bench_vec_dedup_old(b: &mut Bencher, sz: usize) { vec.truncate(len); black_box(vec.first()); + let vec = black_box(vec); vec.clear(); vec.extend_from_slice(&template); }); } -fn bench_vec_dedup_new(b: &mut Bencher, sz: usize) { +// Measures performance of Vec::dedup on random data. +fn bench_vec_dedup_random(b: &mut Bencher, sz: usize) { let mut template = vec![0u32; sz]; b.bytes = std::mem::size_of_val(template.as_slice()) as u64; random_sorted_fill(0x43, &mut template); let mut vec = template.clone(); b.iter(|| { + let vec = black_box(&mut vec); vec.dedup(); black_box(vec.first()); + let vec = black_box(vec); + vec.clear(); + vec.extend_from_slice(&template); + }); +} + +// Measures performance of Vec::dedup when there is no items removed +fn bench_vec_dedup_none(b: &mut Bencher, sz: usize) { + let mut template = vec![0u32; sz]; + b.bytes = std::mem::size_of_val(template.as_slice()) as u64; + template.chunks_exact_mut(2).for_each(|w| { + w[0] = black_box(0); + w[1] = black_box(5); + }); + + let mut vec = template.clone(); + b.iter(|| { + let vec = black_box(&mut vec); + vec.dedup(); + black_box(vec.first()); + // Unlike other benches of `dedup` + // this doesn't reinitialize vec + // because we measure how efficient dedup is + // when no memory written + }); +} + +// Measures performance of Vec::dedup when there is all items removed +fn bench_vec_dedup_all(b: &mut Bencher, sz: usize) { + let mut template = vec![0u32; sz]; + b.bytes = std::mem::size_of_val(template.as_slice()) as u64; + template.iter_mut().for_each(|w| { + *w = black_box(0); + }); + + let mut vec = template.clone(); + b.iter(|| { + let vec = black_box(&mut vec); + vec.dedup(); + black_box(vec.first()); + let vec = black_box(vec); vec.clear(); vec.extend_from_slice(&template); }); } #[bench] -fn bench_dedup_old_100(b: &mut Bencher) { - bench_vec_dedup_old(b, 100); +fn bench_dedup_slice_truncate_100(b: &mut Bencher) { + bench_dedup_slice_truncate(b, 100); } #[bench] -fn bench_dedup_new_100(b: &mut Bencher) { - bench_vec_dedup_new(b, 100); +fn bench_dedup_random_100(b: &mut Bencher) { + bench_vec_dedup_random(b, 100); } #[bench] -fn bench_dedup_old_1000(b: &mut Bencher) { - bench_vec_dedup_old(b, 1000); -} -#[bench] -fn bench_dedup_new_1000(b: &mut Bencher) { - bench_vec_dedup_new(b, 1000); +fn bench_dedup_none_100(b: &mut Bencher) { + bench_vec_dedup_none(b, 100); } #[bench] -fn bench_dedup_old_10000(b: &mut Bencher) { - bench_vec_dedup_old(b, 10000); -} -#[bench] -fn bench_dedup_new_10000(b: &mut Bencher) { - bench_vec_dedup_new(b, 10000); +fn bench_dedup_all_100(b: &mut Bencher) { + bench_vec_dedup_all(b, 100); } #[bench] -fn bench_dedup_old_100000(b: &mut Bencher) { - bench_vec_dedup_old(b, 100000); +fn bench_dedup_slice_truncate_1000(b: &mut Bencher) { + bench_dedup_slice_truncate(b, 1000); } #[bench] -fn bench_dedup_new_100000(b: &mut Bencher) { - bench_vec_dedup_new(b, 100000); +fn bench_dedup_random_1000(b: &mut Bencher) { + bench_vec_dedup_random(b, 1000); +} + +#[bench] +fn bench_dedup_none_1000(b: &mut Bencher) { + bench_vec_dedup_none(b, 1000); +} + +#[bench] +fn bench_dedup_all_1000(b: &mut Bencher) { + bench_vec_dedup_all(b, 1000); +} + +#[bench] +fn bench_dedup_slice_truncate_10000(b: &mut Bencher) { + bench_dedup_slice_truncate(b, 10000); +} +#[bench] +fn bench_dedup_random_10000(b: &mut Bencher) { + bench_vec_dedup_random(b, 10000); +} + +#[bench] +fn bench_dedup_none_10000(b: &mut Bencher) { + bench_vec_dedup_none(b, 10000); +} + +#[bench] +fn bench_dedup_all_10000(b: &mut Bencher) { + bench_vec_dedup_all(b, 10000); +} + +#[bench] +fn bench_dedup_slice_truncate_100000(b: &mut Bencher) { + bench_dedup_slice_truncate(b, 100000); +} +#[bench] +fn bench_dedup_random_100000(b: &mut Bencher) { + bench_vec_dedup_random(b, 100000); +} + +#[bench] +fn bench_dedup_none_100000(b: &mut Bencher) { + bench_vec_dedup_none(b, 100000); +} + +#[bench] +fn bench_dedup_all_100000(b: &mut Bencher) { + bench_vec_dedup_all(b, 100000); } #[bench] diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs index bcf4163e39c..fca85c6123b 100644 --- a/library/alloc/src/vec/mod.rs +++ b/library/alloc/src/vec/mod.rs @@ -1777,7 +1777,32 @@ pub fn dedup_by(&mut self, mut same_bucket: F) return; } - /* INVARIANT: vec.len() > read >= write > write-1 >= 0 */ + // Check if we ever want to remove anything. + // This allows to use copy_non_overlapping in next cycle. + // And avoids any memory writes if we don't need to remove anything. + let mut first_duplicate_idx: usize = 1; + let start = self.as_mut_ptr(); + while first_duplicate_idx != len { + let found_duplicate = unsafe { + // SAFETY: first_duplicate always in range [1..len) + // Note that we start iteration from 1 so we never overflow. + let prev = start.add(first_duplicate_idx.wrapping_sub(1)); + let current = start.add(first_duplicate_idx); + // We explicitly say in docs that references are reversed. + same_bucket(&mut *current, &mut *prev) + }; + if found_duplicate { + break; + } + first_duplicate_idx += 1; + } + // Don't need to remove anything. + // We cannot get bigger than len. + if first_duplicate_idx == len { + return; + } + + /* INVARIANT: vec.len() > read > write > write-1 >= 0 */ struct FillGapOnDrop<'a, T, A: core::alloc::Allocator> { /* Offset of the element we want to check if it is duplicate */ read: usize, @@ -1823,31 +1848,39 @@ fn drop(&mut self) { } } - let mut gap = FillGapOnDrop { read: 1, write: 1, vec: self }; - let ptr = gap.vec.as_mut_ptr(); - /* Drop items while going through Vec, it should be more efficient than * doing slice partition_dedup + truncate */ + // Construct gap first and then drop item to avoid memory corruption if `T::drop` panics. + let mut gap = + FillGapOnDrop { read: first_duplicate_idx + 1, write: first_duplicate_idx, vec: self }; + unsafe { + // SAFETY: we checked that first_duplicate_idx in bounds before. + // If drop panics, `gap` would remove this item without drop. + ptr::drop_in_place(start.add(first_duplicate_idx)); + } + /* SAFETY: Because of the invariant, read_ptr, prev_ptr and write_ptr * are always in-bounds and read_ptr never aliases prev_ptr */ unsafe { while gap.read < len { - let read_ptr = ptr.add(gap.read); - let prev_ptr = ptr.add(gap.write.wrapping_sub(1)); + let read_ptr = start.add(gap.read); + let prev_ptr = start.add(gap.write.wrapping_sub(1)); - if same_bucket(&mut *read_ptr, &mut *prev_ptr) { + // We explicitly say in docs that references are reversed. + let found_duplicate = same_bucket(&mut *read_ptr, &mut *prev_ptr); + if found_duplicate { // Increase `gap.read` now since the drop may panic. gap.read += 1; /* We have found duplicate, drop it in-place */ ptr::drop_in_place(read_ptr); } else { - let write_ptr = ptr.add(gap.write); + let write_ptr = start.add(gap.write); - /* Because `read_ptr` can be equal to `write_ptr`, we either - * have to use `copy` or conditional `copy_nonoverlapping`. - * Looks like the first option is faster. */ - ptr::copy(read_ptr, write_ptr, 1); + /* read_ptr cannot be equal to write_ptr because at this point + * we guaranteed to skip at least one element (before loop starts). + */ + ptr::copy_nonoverlapping(read_ptr, write_ptr, 1); /* We have filled that place, so go further */ gap.write += 1;