Auto merge of #36743 - SimonSapin:dedup-by, r=alexcrichton
Add Vec::dedup_by and Vec::dedup_by_key
This commit is contained in:
commit
17af6b94b2
@ -781,6 +781,130 @@ impl<T> Vec<T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Removes consecutive elements in the vector that resolve to the same key.
|
||||||
|
///
|
||||||
|
/// If the vector is sorted, this removes all duplicates.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// #![feature(dedup_by)]
|
||||||
|
///
|
||||||
|
/// let mut vec = vec![10, 20, 21, 30, 20];
|
||||||
|
///
|
||||||
|
/// vec.dedup_by_key(|i| *i / 10);
|
||||||
|
///
|
||||||
|
/// assert_eq!(vec, [10, 20, 30, 20]);
|
||||||
|
/// ```
|
||||||
|
#[unstable(feature = "dedup_by", reason = "recently added", issue = "37087")]
|
||||||
|
#[inline]
|
||||||
|
pub fn dedup_by_key<F, K>(&mut self, mut key: F) where F: FnMut(&mut T) -> K, K: PartialEq {
|
||||||
|
self.dedup_by(|a, b| key(a) == key(b))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Removes consecutive elements in the vector that resolve to the same key.
|
||||||
|
///
|
||||||
|
/// If the vector is sorted, this removes all duplicates.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// #![feature(dedup_by)]
|
||||||
|
/// use std::ascii::AsciiExt;
|
||||||
|
///
|
||||||
|
/// let mut vec = vec!["foo", "bar", "Bar", "baz", "bar"];
|
||||||
|
///
|
||||||
|
/// vec.dedup_by(|a, b| a.eq_ignore_ascii_case(b));
|
||||||
|
///
|
||||||
|
/// assert_eq!(vec, ["foo", "bar", "baz", "bar"]);
|
||||||
|
/// ```
|
||||||
|
#[unstable(feature = "dedup_by", reason = "recently added", issue = "37087")]
|
||||||
|
pub fn dedup_by<F>(&mut self, mut same_bucket: F) where F: FnMut(&mut T, &mut T) -> bool {
|
||||||
|
unsafe {
|
||||||
|
// Although we have a mutable reference to `self`, we cannot make
|
||||||
|
// *arbitrary* changes. The `PartialEq` comparisons could panic, so we
|
||||||
|
// must ensure that the vector is in a valid state at all time.
|
||||||
|
//
|
||||||
|
// The way that we handle this is by using swaps; we iterate
|
||||||
|
// over all the elements, swapping as we go so that at the end
|
||||||
|
// the elements we wish to keep are in the front, and those we
|
||||||
|
// wish to reject are at the back. We can then truncate the
|
||||||
|
// vector. This operation is still O(n).
|
||||||
|
//
|
||||||
|
// Example: We start in this state, where `r` represents "next
|
||||||
|
// read" and `w` represents "next_write`.
|
||||||
|
//
|
||||||
|
// r
|
||||||
|
// +---+---+---+---+---+---+
|
||||||
|
// | 0 | 1 | 1 | 2 | 3 | 3 |
|
||||||
|
// +---+---+---+---+---+---+
|
||||||
|
// w
|
||||||
|
//
|
||||||
|
// Comparing self[r] against self[w-1], this is not a duplicate, so
|
||||||
|
// we swap self[r] and self[w] (no effect as r==w) and then increment both
|
||||||
|
// r and w, leaving us with:
|
||||||
|
//
|
||||||
|
// r
|
||||||
|
// +---+---+---+---+---+---+
|
||||||
|
// | 0 | 1 | 1 | 2 | 3 | 3 |
|
||||||
|
// +---+---+---+---+---+---+
|
||||||
|
// w
|
||||||
|
//
|
||||||
|
// Comparing self[r] against self[w-1], this value is a duplicate,
|
||||||
|
// so we increment `r` but leave everything else unchanged:
|
||||||
|
//
|
||||||
|
// r
|
||||||
|
// +---+---+---+---+---+---+
|
||||||
|
// | 0 | 1 | 1 | 2 | 3 | 3 |
|
||||||
|
// +---+---+---+---+---+---+
|
||||||
|
// w
|
||||||
|
//
|
||||||
|
// Comparing self[r] against self[w-1], this is not a duplicate,
|
||||||
|
// so swap self[r] and self[w] and advance r and w:
|
||||||
|
//
|
||||||
|
// r
|
||||||
|
// +---+---+---+---+---+---+
|
||||||
|
// | 0 | 1 | 2 | 1 | 3 | 3 |
|
||||||
|
// +---+---+---+---+---+---+
|
||||||
|
// w
|
||||||
|
//
|
||||||
|
// Not a duplicate, repeat:
|
||||||
|
//
|
||||||
|
// r
|
||||||
|
// +---+---+---+---+---+---+
|
||||||
|
// | 0 | 1 | 2 | 3 | 1 | 3 |
|
||||||
|
// +---+---+---+---+---+---+
|
||||||
|
// w
|
||||||
|
//
|
||||||
|
// Duplicate, advance r. End of vec. Truncate to w.
|
||||||
|
|
||||||
|
let ln = self.len();
|
||||||
|
if ln <= 1 {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Avoid bounds checks by using raw pointers.
|
||||||
|
let p = self.as_mut_ptr();
|
||||||
|
let mut r: usize = 1;
|
||||||
|
let mut w: usize = 1;
|
||||||
|
|
||||||
|
while r < ln {
|
||||||
|
let p_r = p.offset(r as isize);
|
||||||
|
let p_wm1 = p.offset((w - 1) as isize);
|
||||||
|
if !same_bucket(&mut *p_r, &mut *p_wm1) {
|
||||||
|
if r != w {
|
||||||
|
let p_w = p_wm1.offset(1);
|
||||||
|
mem::swap(&mut *p_r, &mut *p_w);
|
||||||
|
}
|
||||||
|
w += 1;
|
||||||
|
}
|
||||||
|
r += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.truncate(w);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Appends an element to the back of a collection.
|
/// Appends an element to the back of a collection.
|
||||||
///
|
///
|
||||||
/// # Panics
|
/// # Panics
|
||||||
@ -1155,90 +1279,9 @@ impl<T: PartialEq> Vec<T> {
|
|||||||
/// assert_eq!(vec, [1, 2, 3, 2]);
|
/// assert_eq!(vec, [1, 2, 3, 2]);
|
||||||
/// ```
|
/// ```
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
|
#[inline]
|
||||||
pub fn dedup(&mut self) {
|
pub fn dedup(&mut self) {
|
||||||
unsafe {
|
self.dedup_by(|a, b| a == b)
|
||||||
// Although we have a mutable reference to `self`, we cannot make
|
|
||||||
// *arbitrary* changes. The `PartialEq` comparisons could panic, so we
|
|
||||||
// must ensure that the vector is in a valid state at all time.
|
|
||||||
//
|
|
||||||
// The way that we handle this is by using swaps; we iterate
|
|
||||||
// over all the elements, swapping as we go so that at the end
|
|
||||||
// the elements we wish to keep are in the front, and those we
|
|
||||||
// wish to reject are at the back. We can then truncate the
|
|
||||||
// vector. This operation is still O(n).
|
|
||||||
//
|
|
||||||
// Example: We start in this state, where `r` represents "next
|
|
||||||
// read" and `w` represents "next_write`.
|
|
||||||
//
|
|
||||||
// r
|
|
||||||
// +---+---+---+---+---+---+
|
|
||||||
// | 0 | 1 | 1 | 2 | 3 | 3 |
|
|
||||||
// +---+---+---+---+---+---+
|
|
||||||
// w
|
|
||||||
//
|
|
||||||
// Comparing self[r] against self[w-1], this is not a duplicate, so
|
|
||||||
// we swap self[r] and self[w] (no effect as r==w) and then increment both
|
|
||||||
// r and w, leaving us with:
|
|
||||||
//
|
|
||||||
// r
|
|
||||||
// +---+---+---+---+---+---+
|
|
||||||
// | 0 | 1 | 1 | 2 | 3 | 3 |
|
|
||||||
// +---+---+---+---+---+---+
|
|
||||||
// w
|
|
||||||
//
|
|
||||||
// Comparing self[r] against self[w-1], this value is a duplicate,
|
|
||||||
// so we increment `r` but leave everything else unchanged:
|
|
||||||
//
|
|
||||||
// r
|
|
||||||
// +---+---+---+---+---+---+
|
|
||||||
// | 0 | 1 | 1 | 2 | 3 | 3 |
|
|
||||||
// +---+---+---+---+---+---+
|
|
||||||
// w
|
|
||||||
//
|
|
||||||
// Comparing self[r] against self[w-1], this is not a duplicate,
|
|
||||||
// so swap self[r] and self[w] and advance r and w:
|
|
||||||
//
|
|
||||||
// r
|
|
||||||
// +---+---+---+---+---+---+
|
|
||||||
// | 0 | 1 | 2 | 1 | 3 | 3 |
|
|
||||||
// +---+---+---+---+---+---+
|
|
||||||
// w
|
|
||||||
//
|
|
||||||
// Not a duplicate, repeat:
|
|
||||||
//
|
|
||||||
// r
|
|
||||||
// +---+---+---+---+---+---+
|
|
||||||
// | 0 | 1 | 2 | 3 | 1 | 3 |
|
|
||||||
// +---+---+---+---+---+---+
|
|
||||||
// w
|
|
||||||
//
|
|
||||||
// Duplicate, advance r. End of vec. Truncate to w.
|
|
||||||
|
|
||||||
let ln = self.len();
|
|
||||||
if ln <= 1 {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Avoid bounds checks by using raw pointers.
|
|
||||||
let p = self.as_mut_ptr();
|
|
||||||
let mut r: usize = 1;
|
|
||||||
let mut w: usize = 1;
|
|
||||||
|
|
||||||
while r < ln {
|
|
||||||
let p_r = p.offset(r as isize);
|
|
||||||
let p_wm1 = p.offset((w - 1) as isize);
|
|
||||||
if *p_r != *p_wm1 {
|
|
||||||
if r != w {
|
|
||||||
let p_w = p_wm1.offset(1);
|
|
||||||
mem::swap(&mut *p_r, &mut *p_w);
|
|
||||||
}
|
|
||||||
w += 1;
|
|
||||||
}
|
|
||||||
r += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
self.truncate(w);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
#![feature(collections)]
|
#![feature(collections)]
|
||||||
#![feature(collections_bound)]
|
#![feature(collections_bound)]
|
||||||
#![feature(const_fn)]
|
#![feature(const_fn)]
|
||||||
|
#![feature(dedup_by)]
|
||||||
#![feature(enumset)]
|
#![feature(enumset)]
|
||||||
#![feature(pattern)]
|
#![feature(pattern)]
|
||||||
#![feature(rand)]
|
#![feature(rand)]
|
||||||
|
@ -315,47 +315,6 @@ fn test_clear() {
|
|||||||
// If the unsafe block didn't drop things properly, we blow up here.
|
// If the unsafe block didn't drop things properly, we blow up here.
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_dedup() {
|
|
||||||
fn case(a: Vec<i32>, b: Vec<i32>) {
|
|
||||||
let mut v = a;
|
|
||||||
v.dedup();
|
|
||||||
assert_eq!(v, b);
|
|
||||||
}
|
|
||||||
case(vec![], vec![]);
|
|
||||||
case(vec![1], vec![1]);
|
|
||||||
case(vec![1, 1], vec![1]);
|
|
||||||
case(vec![1, 2, 3], vec![1, 2, 3]);
|
|
||||||
case(vec![1, 1, 2, 3], vec![1, 2, 3]);
|
|
||||||
case(vec![1, 2, 2, 3], vec![1, 2, 3]);
|
|
||||||
case(vec![1, 2, 3, 3], vec![1, 2, 3]);
|
|
||||||
case(vec![1, 1, 2, 2, 2, 3, 3], vec![1, 2, 3]);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_dedup_unique() {
|
|
||||||
let mut v0: Vec<Box<_>> = vec![box 1, box 1, box 2, box 3];
|
|
||||||
v0.dedup();
|
|
||||||
let mut v1: Vec<Box<_>> = vec![box 1, box 2, box 2, box 3];
|
|
||||||
v1.dedup();
|
|
||||||
let mut v2: Vec<Box<_>> = vec![box 1, box 2, box 3, box 3];
|
|
||||||
v2.dedup();
|
|
||||||
// If the boxed pointers were leaked or otherwise misused, valgrind
|
|
||||||
// and/or rt should raise errors.
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_dedup_shared() {
|
|
||||||
let mut v0: Vec<Box<_>> = vec![box 1, box 1, box 2, box 3];
|
|
||||||
v0.dedup();
|
|
||||||
let mut v1: Vec<Box<_>> = vec![box 1, box 2, box 2, box 3];
|
|
||||||
v1.dedup();
|
|
||||||
let mut v2: Vec<Box<_>> = vec![box 1, box 2, box 3, box 3];
|
|
||||||
v2.dedup();
|
|
||||||
// If the pointers were leaked or otherwise misused, valgrind and/or
|
|
||||||
// rt should raise errors.
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_retain() {
|
fn test_retain() {
|
||||||
let mut v = vec![1, 2, 3, 4, 5];
|
let mut v = vec![1, 2, 3, 4, 5];
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
// option. This file may not be copied, modified, or distributed
|
// option. This file may not be copied, modified, or distributed
|
||||||
// except according to those terms.
|
// except according to those terms.
|
||||||
|
|
||||||
|
use std::ascii::AsciiExt;
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::iter::{FromIterator, repeat};
|
use std::iter::{FromIterator, repeat};
|
||||||
use std::mem::size_of;
|
use std::mem::size_of;
|
||||||
@ -213,6 +214,60 @@ fn test_retain() {
|
|||||||
assert_eq!(vec, [2, 4]);
|
assert_eq!(vec, [2, 4]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_dedup() {
|
||||||
|
fn case(a: Vec<i32>, b: Vec<i32>) {
|
||||||
|
let mut v = a;
|
||||||
|
v.dedup();
|
||||||
|
assert_eq!(v, b);
|
||||||
|
}
|
||||||
|
case(vec![], vec![]);
|
||||||
|
case(vec![1], vec![1]);
|
||||||
|
case(vec![1, 1], vec![1]);
|
||||||
|
case(vec![1, 2, 3], vec![1, 2, 3]);
|
||||||
|
case(vec![1, 1, 2, 3], vec![1, 2, 3]);
|
||||||
|
case(vec![1, 2, 2, 3], vec![1, 2, 3]);
|
||||||
|
case(vec![1, 2, 3, 3], vec![1, 2, 3]);
|
||||||
|
case(vec![1, 1, 2, 2, 2, 3, 3], vec![1, 2, 3]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_dedup_by_key() {
|
||||||
|
fn case(a: Vec<i32>, b: Vec<i32>) {
|
||||||
|
let mut v = a;
|
||||||
|
v.dedup_by_key(|i| *i / 10);
|
||||||
|
assert_eq!(v, b);
|
||||||
|
}
|
||||||
|
case(vec![], vec![]);
|
||||||
|
case(vec![10], vec![10]);
|
||||||
|
case(vec![10, 11], vec![10]);
|
||||||
|
case(vec![10, 20, 30], vec![10, 20, 30]);
|
||||||
|
case(vec![10, 11, 20, 30], vec![10, 20, 30]);
|
||||||
|
case(vec![10, 20, 21, 30], vec![10, 20, 30]);
|
||||||
|
case(vec![10, 20, 30, 31], vec![10, 20, 30]);
|
||||||
|
case(vec![10, 11, 20, 21, 22, 30, 31], vec![10, 20, 30]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_dedup_by() {
|
||||||
|
let mut vec = vec!["foo", "bar", "Bar", "baz", "bar"];
|
||||||
|
vec.dedup_by(|a, b| a.eq_ignore_ascii_case(b));
|
||||||
|
|
||||||
|
assert_eq!(vec, ["foo", "bar", "baz", "bar"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_dedup_unique() {
|
||||||
|
let mut v0: Vec<Box<_>> = vec![box 1, box 1, box 2, box 3];
|
||||||
|
v0.dedup();
|
||||||
|
let mut v1: Vec<Box<_>> = vec![box 1, box 2, box 2, box 3];
|
||||||
|
v1.dedup();
|
||||||
|
let mut v2: Vec<Box<_>> = vec![box 1, box 2, box 3, box 3];
|
||||||
|
v2.dedup();
|
||||||
|
// If the boxed pointers were leaked or otherwise misused, valgrind
|
||||||
|
// and/or rt should raise errors.
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn zero_sized_values() {
|
fn zero_sized_values() {
|
||||||
let mut v = Vec::new();
|
let mut v = Vec::new();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user