Auto merge of #52553 - Pazzaz:vecdeque-append, r=SimonSapin

Non-naive implementation of `VecDeque.append`

Replaces the old, simple implementation with a more manual (and **unsafe** 😱) one. I've added 1 more test and verified that it covers all 6 code paths in the function.

This new implementation was about 60% faster than the old naive one when I tried benchmarking it.
This commit is contained in:
bors 2018-08-18 08:56:12 +00:00
commit d5b6b95aef
4 changed files with 313 additions and 2 deletions

View File

@ -23,3 +23,8 @@ path = "../liballoc/tests/lib.rs"
[[bench]]
name = "collectionsbenches"
path = "../liballoc/benches/lib.rs"
[[bench]]
name = "vec_deque_append_bench"
path = "../liballoc/benches/vec_deque_append.rs"
harness = false

View File

@ -0,0 +1,48 @@
// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#![feature(duration_as_u128)]
use std::{collections::VecDeque, time::Instant};
const VECDEQUE_LEN: i32 = 100000;
const WARMUP_N: usize = 100;
const BENCH_N: usize = 1000;
fn main() {
let a: VecDeque<i32> = (0..VECDEQUE_LEN).collect();
let b: VecDeque<i32> = (0..VECDEQUE_LEN).collect();
for _ in 0..WARMUP_N {
let mut c = a.clone();
let mut d = b.clone();
c.append(&mut d);
}
let mut durations = Vec::with_capacity(BENCH_N);
for _ in 0..BENCH_N {
let mut c = a.clone();
let mut d = b.clone();
let before = Instant::now();
c.append(&mut d);
let after = Instant::now();
durations.push(after.duration_since(before));
}
let l = durations.len();
durations.sort();
assert!(BENCH_N % 2 == 0);
let median = (durations[(l / 2) - 1] + durations[l / 2]) / 2;
println!(
"\ncustom-bench vec_deque_append {:?} ns/iter\n",
median.as_nanos()
);
}

View File

@ -202,6 +202,23 @@ unsafe fn copy_nonoverlapping(&self, dst: usize, src: usize, len: usize) {
len);
}
/// Returns a pair of slices which contain the contents of the buffer not used by the VecDeque.
#[inline]
unsafe fn unused_as_mut_slices<'a>(&'a mut self) -> (&'a mut [T], &'a mut [T]) {
let head = self.head;
let tail = self.tail;
let buf = self.buffer_as_mut_slice();
if head != tail {
// In buf, head..tail contains the VecDeque and tail..head is unused.
// So calling `ring_slices` with tail and head swapped returns unused slices.
RingSlices::ring_slices(buf, tail, head)
} else {
// Swapping doesn't help when head == tail.
let (before, after) = buf.split_at_mut(head);
(after, before)
}
}
/// Copies a potentially wrapping block of memory len long from src to dest.
/// (abs(dst - src) + len) must be no larger than cap() (There must be at
/// most one continuous overlapping region between src and dest).
@ -1834,8 +1851,148 @@ pub fn split_off(&mut self, at: usize) -> Self {
#[inline]
#[stable(feature = "append", since = "1.4.0")]
pub fn append(&mut self, other: &mut Self) {
// naive impl
self.extend(other.drain(..));
// Copies all values from `src_slice` to the start of `dst_slice`.
unsafe fn copy_whole_slice<T>(src_slice: &[T], dst_slice: &mut [T]) {
let len = src_slice.len();
ptr::copy_nonoverlapping(src_slice.as_ptr(), dst_slice[..len].as_mut_ptr(), len);
}
let src_total = other.len();
// Guarantees there is space in `self` for `other`.
self.reserve(src_total);
self.head = {
let original_head = self.head;
// The goal is to copy all values from `other` into `self`. To avoid any
// mismatch, all valid values in `other` are retrieved...
let (src_high, src_low) = other.as_slices();
// and unoccupied parts of self are retrieved.
let (dst_high, dst_low) = unsafe { self.unused_as_mut_slices() };
// Then all that is needed is to copy all values from
// src (src_high and src_low) to dst (dst_high and dst_low).
//
// other [o o o . . . . . o o o o]
// [5 6 7] [1 2 3 4]
// src_low src_high
//
// self [. . . . . . o o o o . .]
// [3 4 5 6 7 .] [1 2]
// dst_low dst_high
//
// Values are not copied one by one but as slices in `copy_whole_slice`.
// What slices are used depends on various properties of src and dst.
// There are 6 cases in total:
// 1. `src` is contiguous and fits in dst_high
// 2. `src` is contiguous and does not fit in dst_high
// 3. `src` is discontiguous and fits in dst_high
// 4. `src` is discontiguous and does not fit in dst_high
// + src_high is smaller than dst_high
// 5. `src` is discontiguous and does not fit in dst_high
// + dst_high is smaller than src_high
// 6. `src` is discontiguous and does not fit in dst_high
// + dst_high is the same size as src_high
let src_contiguous = src_low.is_empty();
let dst_high_fits_src = dst_high.len() >= src_total;
match (src_contiguous, dst_high_fits_src) {
(true, true) => {
// 1.
// other [. . . o o o . . . . . .]
// [] [1 1 1]
//
// self [. o o o o o . . . . . .]
// [.] [1 1 1 . . .]
unsafe {
copy_whole_slice(src_high, dst_high);
}
original_head + src_total
}
(true, false) => {
// 2.
// other [. . . o o o o o . . . .]
// [] [1 1 2 2 2]
//
// self [. . . . . . . o o o . .]
// [2 2 2 . . . .] [1 1]
let (src_1, src_2) = src_high.split_at(dst_high.len());
unsafe {
copy_whole_slice(src_1, dst_high);
copy_whole_slice(src_2, dst_low);
}
src_total - dst_high.len()
}
(false, true) => {
// 3.
// other [o o . . . . . . . o o o]
// [2 2] [1 1 1]
//
// self [. o o . . . . . . . . .]
// [.] [1 1 1 2 2 . . . .]
let (dst_1, dst_2) = dst_high.split_at_mut(src_high.len());
unsafe {
copy_whole_slice(src_high, dst_1);
copy_whole_slice(src_low, dst_2);
}
original_head + src_total
}
(false, false) => {
if src_high.len() < dst_high.len() {
// 4.
// other [o o o . . . . . . o o o]
// [2 3 3] [1 1 1]
//
// self [. . . . . . o o . . . .]
// [3 3 . . . .] [1 1 1 2]
let (dst_1, dst_2) = dst_high.split_at_mut(src_high.len());
let (src_2, src_3) = src_low.split_at(dst_2.len());
unsafe {
copy_whole_slice(src_high, dst_1);
copy_whole_slice(src_2, dst_2);
copy_whole_slice(src_3, dst_low);
}
src_3.len()
} else if src_high.len() > dst_high.len() {
// 5.
// other [o o o . . . . . o o o o]
// [3 3 3] [1 1 2 2]
//
// self [. . . . . . o o o o . .]
// [2 2 3 3 3 .] [1 1]
let (src_1, src_2) = src_high.split_at(dst_high.len());
let (dst_2, dst_3) = dst_low.split_at_mut(src_2.len());
unsafe {
copy_whole_slice(src_1, dst_high);
copy_whole_slice(src_2, dst_2);
copy_whole_slice(src_low, dst_3);
}
dst_2.len() + src_low.len()
} else {
// 6.
// other [o o . . . . . . . o o o]
// [2 2] [1 1 1]
//
// self [. . . . . . . o o . . .]
// [2 2 . . . . .] [1 1 1]
unsafe {
copy_whole_slice(src_high, dst_high);
copy_whole_slice(src_low, dst_low);
}
src_low.len()
}
}
}
};
// Some values now exist in both `other` and `self` but are made inaccessible in `other`.
other.tail = other.head;
}
/// Retains only the elements specified by the predicate.

View File

@ -928,6 +928,107 @@ fn test_append() {
assert_eq!(a.iter().cloned().collect::<Vec<_>>(), []);
}
#[test]
fn test_append_permutations() {
fn construct_vec_deque(
push_back: usize,
pop_back: usize,
push_front: usize,
pop_front: usize,
) -> VecDeque<usize> {
let mut out = VecDeque::new();
for a in 0..push_back {
out.push_back(a);
}
for b in 0..push_front {
out.push_front(push_back + b);
}
for _ in 0..pop_back {
out.pop_back();
}
for _ in 0..pop_front {
out.pop_front();
}
out
}
const MAX: usize = 5;
// Many different permutations of both the `VecDeque` getting appended to
// and the one getting appended are generated to check `append`.
// This ensures all 6 code paths of `append` are tested.
for src_push_back in 0..MAX {
for src_push_front in 0..MAX {
// doesn't pop more values than are pushed
for src_pop_back in 0..(src_push_back + src_push_front) {
for src_pop_front in 0..(src_push_back + src_push_front - src_pop_back) {
let src = construct_vec_deque(
src_push_back,
src_pop_back,
src_push_front,
src_pop_front,
);
for dst_push_back in 0..MAX {
for dst_push_front in 0..MAX {
for dst_pop_back in 0..(dst_push_back + dst_push_front) {
for dst_pop_front
in 0..(dst_push_back + dst_push_front - dst_pop_back)
{
let mut dst = construct_vec_deque(
dst_push_back,
dst_pop_back,
dst_push_front,
dst_pop_front,
);
let mut src = src.clone();
// Assert that appending `src` to `dst` gives the same order
// of values as iterating over both in sequence.
let correct = dst
.iter()
.chain(src.iter())
.cloned()
.collect::<Vec<usize>>();
dst.append(&mut src);
assert_eq!(dst, correct);
assert!(src.is_empty());
}
}
}
}
}
}
}
}
}
struct DropCounter<'a> {
count: &'a mut u32,
}
impl<'a> Drop for DropCounter<'a> {
fn drop(&mut self) {
*self.count += 1;
}
}
#[test]
fn test_append_double_drop() {
let (mut count_a, mut count_b) = (0, 0);
{
let mut a = VecDeque::new();
let mut b = VecDeque::new();
a.push_back(DropCounter { count: &mut count_a });
b.push_back(DropCounter { count: &mut count_b });
a.append(&mut b);
}
assert_eq!(count_a, 1);
assert_eq!(count_b, 1);
}
#[test]
fn test_retain() {
let mut buf = VecDeque::new();