Auto merge of #52553 - Pazzaz:vecdeque-append, r=SimonSapin
Non-naive implementation of `VecDeque.append`
Replaces the old, simple implementation with a more manual (and **unsafe** 😱) one. I've added 1 more test and verified that it covers all 6 code paths in the function.
This new implementation was about 60% faster than the old naive one when I tried benchmarking it.
This commit is contained in:
commit
d5b6b95aef
@ -23,3 +23,8 @@ path = "../liballoc/tests/lib.rs"
|
||||
[[bench]]
|
||||
name = "collectionsbenches"
|
||||
path = "../liballoc/benches/lib.rs"
|
||||
|
||||
[[bench]]
|
||||
name = "vec_deque_append_bench"
|
||||
path = "../liballoc/benches/vec_deque_append.rs"
|
||||
harness = false
|
||||
|
48
src/liballoc/benches/vec_deque_append.rs
Normal file
48
src/liballoc/benches/vec_deque_append.rs
Normal file
@ -0,0 +1,48 @@
|
||||
// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
#![feature(duration_as_u128)]
|
||||
use std::{collections::VecDeque, time::Instant};
|
||||
|
||||
const VECDEQUE_LEN: i32 = 100000;
|
||||
const WARMUP_N: usize = 100;
|
||||
const BENCH_N: usize = 1000;
|
||||
|
||||
fn main() {
|
||||
let a: VecDeque<i32> = (0..VECDEQUE_LEN).collect();
|
||||
let b: VecDeque<i32> = (0..VECDEQUE_LEN).collect();
|
||||
|
||||
for _ in 0..WARMUP_N {
|
||||
let mut c = a.clone();
|
||||
let mut d = b.clone();
|
||||
c.append(&mut d);
|
||||
}
|
||||
|
||||
let mut durations = Vec::with_capacity(BENCH_N);
|
||||
|
||||
for _ in 0..BENCH_N {
|
||||
let mut c = a.clone();
|
||||
let mut d = b.clone();
|
||||
let before = Instant::now();
|
||||
c.append(&mut d);
|
||||
let after = Instant::now();
|
||||
durations.push(after.duration_since(before));
|
||||
}
|
||||
|
||||
let l = durations.len();
|
||||
durations.sort();
|
||||
|
||||
assert!(BENCH_N % 2 == 0);
|
||||
let median = (durations[(l / 2) - 1] + durations[l / 2]) / 2;
|
||||
println!(
|
||||
"\ncustom-bench vec_deque_append {:?} ns/iter\n",
|
||||
median.as_nanos()
|
||||
);
|
||||
}
|
@ -202,6 +202,23 @@ unsafe fn copy_nonoverlapping(&self, dst: usize, src: usize, len: usize) {
|
||||
len);
|
||||
}
|
||||
|
||||
/// Returns a pair of slices which contain the contents of the buffer not used by the VecDeque.
|
||||
#[inline]
|
||||
unsafe fn unused_as_mut_slices<'a>(&'a mut self) -> (&'a mut [T], &'a mut [T]) {
|
||||
let head = self.head;
|
||||
let tail = self.tail;
|
||||
let buf = self.buffer_as_mut_slice();
|
||||
if head != tail {
|
||||
// In buf, head..tail contains the VecDeque and tail..head is unused.
|
||||
// So calling `ring_slices` with tail and head swapped returns unused slices.
|
||||
RingSlices::ring_slices(buf, tail, head)
|
||||
} else {
|
||||
// Swapping doesn't help when head == tail.
|
||||
let (before, after) = buf.split_at_mut(head);
|
||||
(after, before)
|
||||
}
|
||||
}
|
||||
|
||||
/// Copies a potentially wrapping block of memory len long from src to dest.
|
||||
/// (abs(dst - src) + len) must be no larger than cap() (There must be at
|
||||
/// most one continuous overlapping region between src and dest).
|
||||
@ -1834,8 +1851,148 @@ pub fn split_off(&mut self, at: usize) -> Self {
|
||||
#[inline]
|
||||
#[stable(feature = "append", since = "1.4.0")]
|
||||
pub fn append(&mut self, other: &mut Self) {
|
||||
// naive impl
|
||||
self.extend(other.drain(..));
|
||||
// Copies all values from `src_slice` to the start of `dst_slice`.
|
||||
unsafe fn copy_whole_slice<T>(src_slice: &[T], dst_slice: &mut [T]) {
|
||||
let len = src_slice.len();
|
||||
ptr::copy_nonoverlapping(src_slice.as_ptr(), dst_slice[..len].as_mut_ptr(), len);
|
||||
}
|
||||
|
||||
let src_total = other.len();
|
||||
|
||||
// Guarantees there is space in `self` for `other`.
|
||||
self.reserve(src_total);
|
||||
|
||||
self.head = {
|
||||
let original_head = self.head;
|
||||
|
||||
// The goal is to copy all values from `other` into `self`. To avoid any
|
||||
// mismatch, all valid values in `other` are retrieved...
|
||||
let (src_high, src_low) = other.as_slices();
|
||||
// and unoccupied parts of self are retrieved.
|
||||
let (dst_high, dst_low) = unsafe { self.unused_as_mut_slices() };
|
||||
|
||||
// Then all that is needed is to copy all values from
|
||||
// src (src_high and src_low) to dst (dst_high and dst_low).
|
||||
//
|
||||
// other [o o o . . . . . o o o o]
|
||||
// [5 6 7] [1 2 3 4]
|
||||
// src_low src_high
|
||||
//
|
||||
// self [. . . . . . o o o o . .]
|
||||
// [3 4 5 6 7 .] [1 2]
|
||||
// dst_low dst_high
|
||||
//
|
||||
// Values are not copied one by one but as slices in `copy_whole_slice`.
|
||||
// What slices are used depends on various properties of src and dst.
|
||||
// There are 6 cases in total:
|
||||
// 1. `src` is contiguous and fits in dst_high
|
||||
// 2. `src` is contiguous and does not fit in dst_high
|
||||
// 3. `src` is discontiguous and fits in dst_high
|
||||
// 4. `src` is discontiguous and does not fit in dst_high
|
||||
// + src_high is smaller than dst_high
|
||||
// 5. `src` is discontiguous and does not fit in dst_high
|
||||
// + dst_high is smaller than src_high
|
||||
// 6. `src` is discontiguous and does not fit in dst_high
|
||||
// + dst_high is the same size as src_high
|
||||
let src_contiguous = src_low.is_empty();
|
||||
let dst_high_fits_src = dst_high.len() >= src_total;
|
||||
match (src_contiguous, dst_high_fits_src) {
|
||||
(true, true) => {
|
||||
// 1.
|
||||
// other [. . . o o o . . . . . .]
|
||||
// [] [1 1 1]
|
||||
//
|
||||
// self [. o o o o o . . . . . .]
|
||||
// [.] [1 1 1 . . .]
|
||||
|
||||
unsafe {
|
||||
copy_whole_slice(src_high, dst_high);
|
||||
}
|
||||
original_head + src_total
|
||||
}
|
||||
(true, false) => {
|
||||
// 2.
|
||||
// other [. . . o o o o o . . . .]
|
||||
// [] [1 1 2 2 2]
|
||||
//
|
||||
// self [. . . . . . . o o o . .]
|
||||
// [2 2 2 . . . .] [1 1]
|
||||
|
||||
let (src_1, src_2) = src_high.split_at(dst_high.len());
|
||||
unsafe {
|
||||
copy_whole_slice(src_1, dst_high);
|
||||
copy_whole_slice(src_2, dst_low);
|
||||
}
|
||||
src_total - dst_high.len()
|
||||
}
|
||||
(false, true) => {
|
||||
// 3.
|
||||
// other [o o . . . . . . . o o o]
|
||||
// [2 2] [1 1 1]
|
||||
//
|
||||
// self [. o o . . . . . . . . .]
|
||||
// [.] [1 1 1 2 2 . . . .]
|
||||
|
||||
let (dst_1, dst_2) = dst_high.split_at_mut(src_high.len());
|
||||
unsafe {
|
||||
copy_whole_slice(src_high, dst_1);
|
||||
copy_whole_slice(src_low, dst_2);
|
||||
}
|
||||
original_head + src_total
|
||||
}
|
||||
(false, false) => {
|
||||
if src_high.len() < dst_high.len() {
|
||||
// 4.
|
||||
// other [o o o . . . . . . o o o]
|
||||
// [2 3 3] [1 1 1]
|
||||
//
|
||||
// self [. . . . . . o o . . . .]
|
||||
// [3 3 . . . .] [1 1 1 2]
|
||||
|
||||
let (dst_1, dst_2) = dst_high.split_at_mut(src_high.len());
|
||||
let (src_2, src_3) = src_low.split_at(dst_2.len());
|
||||
unsafe {
|
||||
copy_whole_slice(src_high, dst_1);
|
||||
copy_whole_slice(src_2, dst_2);
|
||||
copy_whole_slice(src_3, dst_low);
|
||||
}
|
||||
src_3.len()
|
||||
} else if src_high.len() > dst_high.len() {
|
||||
// 5.
|
||||
// other [o o o . . . . . o o o o]
|
||||
// [3 3 3] [1 1 2 2]
|
||||
//
|
||||
// self [. . . . . . o o o o . .]
|
||||
// [2 2 3 3 3 .] [1 1]
|
||||
|
||||
let (src_1, src_2) = src_high.split_at(dst_high.len());
|
||||
let (dst_2, dst_3) = dst_low.split_at_mut(src_2.len());
|
||||
unsafe {
|
||||
copy_whole_slice(src_1, dst_high);
|
||||
copy_whole_slice(src_2, dst_2);
|
||||
copy_whole_slice(src_low, dst_3);
|
||||
}
|
||||
dst_2.len() + src_low.len()
|
||||
} else {
|
||||
// 6.
|
||||
// other [o o . . . . . . . o o o]
|
||||
// [2 2] [1 1 1]
|
||||
//
|
||||
// self [. . . . . . . o o . . .]
|
||||
// [2 2 . . . . .] [1 1 1]
|
||||
|
||||
unsafe {
|
||||
copy_whole_slice(src_high, dst_high);
|
||||
copy_whole_slice(src_low, dst_low);
|
||||
}
|
||||
src_low.len()
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Some values now exist in both `other` and `self` but are made inaccessible in `other`.
|
||||
other.tail = other.head;
|
||||
}
|
||||
|
||||
/// Retains only the elements specified by the predicate.
|
||||
|
@ -928,6 +928,107 @@ fn test_append() {
|
||||
assert_eq!(a.iter().cloned().collect::<Vec<_>>(), []);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_append_permutations() {
|
||||
fn construct_vec_deque(
|
||||
push_back: usize,
|
||||
pop_back: usize,
|
||||
push_front: usize,
|
||||
pop_front: usize,
|
||||
) -> VecDeque<usize> {
|
||||
let mut out = VecDeque::new();
|
||||
for a in 0..push_back {
|
||||
out.push_back(a);
|
||||
}
|
||||
for b in 0..push_front {
|
||||
out.push_front(push_back + b);
|
||||
}
|
||||
for _ in 0..pop_back {
|
||||
out.pop_back();
|
||||
}
|
||||
for _ in 0..pop_front {
|
||||
out.pop_front();
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
const MAX: usize = 5;
|
||||
|
||||
// Many different permutations of both the `VecDeque` getting appended to
|
||||
// and the one getting appended are generated to check `append`.
|
||||
// This ensures all 6 code paths of `append` are tested.
|
||||
for src_push_back in 0..MAX {
|
||||
for src_push_front in 0..MAX {
|
||||
// doesn't pop more values than are pushed
|
||||
for src_pop_back in 0..(src_push_back + src_push_front) {
|
||||
for src_pop_front in 0..(src_push_back + src_push_front - src_pop_back) {
|
||||
|
||||
let src = construct_vec_deque(
|
||||
src_push_back,
|
||||
src_pop_back,
|
||||
src_push_front,
|
||||
src_pop_front,
|
||||
);
|
||||
|
||||
for dst_push_back in 0..MAX {
|
||||
for dst_push_front in 0..MAX {
|
||||
for dst_pop_back in 0..(dst_push_back + dst_push_front) {
|
||||
for dst_pop_front
|
||||
in 0..(dst_push_back + dst_push_front - dst_pop_back)
|
||||
{
|
||||
let mut dst = construct_vec_deque(
|
||||
dst_push_back,
|
||||
dst_pop_back,
|
||||
dst_push_front,
|
||||
dst_pop_front,
|
||||
);
|
||||
let mut src = src.clone();
|
||||
|
||||
// Assert that appending `src` to `dst` gives the same order
|
||||
// of values as iterating over both in sequence.
|
||||
let correct = dst
|
||||
.iter()
|
||||
.chain(src.iter())
|
||||
.cloned()
|
||||
.collect::<Vec<usize>>();
|
||||
dst.append(&mut src);
|
||||
assert_eq!(dst, correct);
|
||||
assert!(src.is_empty());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct DropCounter<'a> {
|
||||
count: &'a mut u32,
|
||||
}
|
||||
|
||||
impl<'a> Drop for DropCounter<'a> {
|
||||
fn drop(&mut self) {
|
||||
*self.count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_append_double_drop() {
|
||||
let (mut count_a, mut count_b) = (0, 0);
|
||||
{
|
||||
let mut a = VecDeque::new();
|
||||
let mut b = VecDeque::new();
|
||||
a.push_back(DropCounter { count: &mut count_a });
|
||||
b.push_back(DropCounter { count: &mut count_b });
|
||||
|
||||
a.append(&mut b);
|
||||
}
|
||||
assert_eq!(count_a, 1);
|
||||
assert_eq!(count_b, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_retain() {
|
||||
let mut buf = VecDeque::new();
|
||||
|
Loading…
Reference in New Issue
Block a user