From d57f25907bc4247b4d98efce5ab6948c35baa12d Mon Sep 17 00:00:00 2001 From: Clark Gaebel Date: Tue, 16 Dec 2014 17:45:03 -0500 Subject: [PATCH] [collections] Adds `drain`: a way to sneak out the elements while clearing. It is useful to move all the elements out of some collections without deallocating the underlying buffer. It came up in IRC, and this patch implements it as `drain`. This has been discussed as part of RFC 509. r? @Gankro cc: @frankmcsherry --- src/libcollections/binary_heap.rs | 41 +++++++- src/libcollections/ring_buf.rs | 130 ++++++++++++++++++++++- src/libcollections/vec.rs | 150 ++++++++++++++++++++++++++- src/libstd/collections/hash/map.rs | 61 +++++++++-- src/libstd/collections/hash/set.rs | 57 +++++++++- src/libstd/collections/hash/table.rs | 49 +++++++++ 6 files changed, 470 insertions(+), 18 deletions(-) diff --git a/src/libcollections/binary_heap.rs b/src/libcollections/binary_heap.rs index 94211592698..f362abcc4e5 100644 --- a/src/libcollections/binary_heap.rs +++ b/src/libcollections/binary_heap.rs @@ -553,9 +553,18 @@ impl BinaryHeap { #[unstable = "matches collection reform specification, waiting for dust to settle"] pub fn is_empty(&self) -> bool { self.len() == 0 } + /// Clears the queue, returning an iterator over the removed elements. + #[inline] + #[unstable = "matches collection reform specification, waiting for dust to settle"] + pub fn drain<'a>(&'a mut self) -> Drain<'a, T> { + Drain { + iter: self.data.drain(), + } + } + /// Drops all items from the queue. #[unstable = "matches collection reform specification, waiting for dust to settle"] - pub fn clear(&mut self) { self.data.truncate(0) } + pub fn clear(&mut self) { self.drain(); } } /// `BinaryHeap` iterator. @@ -598,6 +607,26 @@ impl DoubleEndedIterator for MoveItems { impl ExactSizeIterator for MoveItems {} +/// An iterator that drains a `BinaryHeap`. +pub struct Drain<'a, T: 'a> { + iter: vec::Drain<'a, T>, +} + +impl<'a, T: 'a> Iterator for Drain<'a, T> { + #[inline] + fn next(&mut self) -> Option { self.iter.next() } + + #[inline] + fn size_hint(&self) -> (uint, Option) { self.iter.size_hint() } +} + +impl<'a, T: 'a> DoubleEndedIterator for Drain<'a, T> { + #[inline] + fn next_back(&mut self) -> Option { self.iter.next_back() } +} + +impl<'a, T: 'a> ExactSizeIterator for Drain<'a, T> {} + impl FromIterator for BinaryHeap { fn from_iter>(iter: Iter) -> BinaryHeap { let vec: Vec = iter.collect(); @@ -822,4 +851,14 @@ mod tests { assert_eq!(q.pop().unwrap(), x); } } + + #[test] + fn test_drain() { + let mut q: BinaryHeap<_> = + [9u, 8, 7, 6, 5, 4, 3, 2, 1].iter().cloned().collect(); + + assert_eq!(q.drain().take(5).count(), 5); + + assert!(q.is_empty()); + } } diff --git a/src/libcollections/ring_buf.rs b/src/libcollections/ring_buf.rs index cdb92d302e9..a282d4ea8e2 100644 --- a/src/libcollections/ring_buf.rs +++ b/src/libcollections/ring_buf.rs @@ -443,6 +443,27 @@ impl RingBuf { #[unstable = "matches collection reform specification, waiting for dust to settle"] pub fn is_empty(&self) -> bool { self.len() == 0 } + /// Creates a draining iterator that clears the `RingBuf` and iterates over + /// the removed items from start to end. + /// + /// # Examples + /// + /// ``` + /// use std::collections::RingBuf; + /// + /// let mut v = RingBuf::new(); + /// v.push_back(1i); + /// assert_eq!(v.drain().next(), Some(1)); + /// assert!(v.is_empty()); + /// ``` + #[inline] + #[unstable = "matches collection reform specification, waiting for dust to settle"] + pub fn drain<'a>(&'a mut self) -> Drain<'a, T> { + Drain { + inner: self, + } + } + /// Clears the buffer, removing all values. /// /// # Examples @@ -456,10 +477,9 @@ impl RingBuf { /// assert!(v.is_empty()); /// ``` #[unstable = "matches collection reform specification, waiting for dust to settle"] + #[inline] pub fn clear(&mut self) { - while self.pop_front().is_some() {} - self.head = 0; - self.tail = 0; + self.drain(); } /// Provides a reference to the front element, or `None` if the sequence is @@ -1177,9 +1197,44 @@ impl DoubleEndedIterator for MoveItems { } } - impl ExactSizeIterator for MoveItems {} +/// A draining RingBuf iterator +pub struct Drain<'a, T: 'a> { + inner: &'a mut RingBuf, +} + +#[unsafe_destructor] +impl<'a, T: 'a> Drop for Drain<'a, T> { + fn drop(&mut self) { + for _ in *self {} + self.inner.head = 0; + self.inner.tail = 0; + } +} + +impl<'a, T: 'a> Iterator for Drain<'a, T> { + #[inline] + fn next(&mut self) -> Option { + self.inner.pop_front() + } + + #[inline] + fn size_hint(&self) -> (uint, Option) { + let len = self.inner.len(); + (len, Some(len)) + } +} + +impl<'a, T: 'a> DoubleEndedIterator for Drain<'a, T> { + #[inline] + fn next_back(&mut self) -> Option { + self.inner.pop_back() + } +} + +impl<'a, T: 'a> ExactSizeIterator for Drain<'a, T> {} + impl PartialEq for RingBuf { fn eq(&self, other: &RingBuf) -> bool { self.len() == other.len() && @@ -1789,6 +1844,73 @@ mod tests { } } + #[test] + fn test_drain() { + + // Empty iter + { + let mut d: RingBuf = RingBuf::new(); + + { + let mut iter = d.drain(); + + assert_eq!(iter.size_hint(), (0, Some(0))); + assert_eq!(iter.next(), None); + assert_eq!(iter.size_hint(), (0, Some(0))); + } + + assert!(d.is_empty()); + } + + // simple iter + { + let mut d = RingBuf::new(); + for i in range(0i, 5) { + d.push_back(i); + } + + assert_eq!(d.drain().collect::>(), [0, 1, 2, 3, 4]); + assert!(d.is_empty()); + } + + // wrapped iter + { + let mut d = RingBuf::new(); + for i in range(0i, 5) { + d.push_back(i); + } + for i in range(6, 9) { + d.push_front(i); + } + + assert_eq!(d.drain().collect::>(), [8,7,6,0,1,2,3,4]); + assert!(d.is_empty()); + } + + // partially used + { + let mut d = RingBuf::new(); + for i in range(0i, 5) { + d.push_back(i); + } + for i in range(6, 9) { + d.push_front(i); + } + + { + let mut it = d.drain(); + assert_eq!(it.size_hint(), (8, Some(8))); + assert_eq!(it.next(), Some(8)); + assert_eq!(it.size_hint(), (7, Some(7))); + assert_eq!(it.next_back(), Some(4)); + assert_eq!(it.size_hint(), (6, Some(6))); + assert_eq!(it.next(), Some(7)); + assert_eq!(it.size_hint(), (5, Some(5))); + } + assert!(d.is_empty()); + } + } + #[test] fn test_from_iter() { use std::iter; diff --git a/src/libcollections/vec.rs b/src/libcollections/vec.rs index 94e6103f05f..41f3eb81ff7 100644 --- a/src/libcollections/vec.rs +++ b/src/libcollections/vec.rs @@ -1128,6 +1128,38 @@ impl Vec { } } + /// Creates a draining iterator that clears the `Vec` and iterates over + /// the removed items from start to end. + /// + /// # Examples + /// + /// ``` + /// let mut v = vec!["a".to_string(), "b".to_string()]; + /// for s in v.drain() { + /// // s has type String, not &String + /// println!("{}", s); + /// } + /// assert!(v.is_empty()); + /// ``` + #[inline] + #[unstable = "matches collection reform specification, waiting for dust to settle"] + pub fn drain<'a>(&'a mut self) -> Drain<'a, T> { + unsafe { + let begin = self.ptr as *const T; + let end = if mem::size_of::() == 0 { + (self.ptr as uint + self.len()) as *const T + } else { + self.ptr.offset(self.len() as int) as *const T + }; + self.set_len(0); + Drain { + ptr: begin, + end: end, + marker: ContravariantLifetime, + } + } + } + /// Clears the vector, removing all values. /// /// # Examples @@ -1384,8 +1416,9 @@ pub struct MoveItems { } impl MoveItems { - #[inline] /// Drops all items that have not yet been moved and returns the empty vector. + #[inline] + #[unstable] pub fn into_inner(mut self) -> Vec { unsafe { for _x in self { } @@ -1395,8 +1428,8 @@ impl MoveItems { } } - /// Deprecated, use into_inner() instead - #[deprecated = "renamed to into_inner()"] + /// Deprecated, use .into_inner() instead + #[deprecated = "use .into_inner() instead"] pub fn unwrap(self) -> Vec { self.into_inner() } } @@ -1472,6 +1505,84 @@ impl Drop for MoveItems { } } +/// An iterator that drains a vector. +#[unsafe_no_drop_flag] +pub struct Drain<'a, T> { + ptr: *const T, + end: *const T, + marker: ContravariantLifetime<'a>, +} + +impl<'a, T> Iterator for Drain<'a, T> { + #[inline] + fn next(&mut self) -> Option { + unsafe { + if self.ptr == self.end { + None + } else { + if mem::size_of::() == 0 { + // purposefully don't use 'ptr.offset' because for + // vectors with 0-size elements this would return the + // same pointer. + self.ptr = mem::transmute(self.ptr as uint + 1); + + // Use a non-null pointer value + Some(ptr::read(mem::transmute(1u))) + } else { + let old = self.ptr; + self.ptr = self.ptr.offset(1); + + Some(ptr::read(old)) + } + } + } + } + + #[inline] + fn size_hint(&self) -> (uint, Option) { + let diff = (self.end as uint) - (self.ptr as uint); + let size = mem::size_of::(); + let exact = diff / (if size == 0 {1} else {size}); + (exact, Some(exact)) + } +} + +impl<'a, T> DoubleEndedIterator for Drain<'a, T> { + #[inline] + fn next_back(&mut self) -> Option { + unsafe { + if self.end == self.ptr { + None + } else { + if mem::size_of::() == 0 { + // See above for why 'ptr.offset' isn't used + self.end = mem::transmute(self.end as uint - 1); + + // Use a non-null pointer value + Some(ptr::read(mem::transmute(1u))) + } else { + self.end = self.end.offset(-1); + + Some(ptr::read(self.end)) + } + } + } + } +} + +impl<'a, T> ExactSizeIterator for Drain<'a, T> {} + +#[unsafe_destructor] +impl<'a, T> Drop for Drain<'a, T> { + fn drop(&mut self) { + // self.ptr == self.end == null if drop has already been called, + // so we can use #[unsafe_no_drop_flag]. + + // destroy the remaining elements + for _x in *self {} + } +} + /// Converts an iterator of pairs into a pair of vectors. /// /// Returns a tuple containing two vectors where the i-th element of the first vector contains the @@ -2280,6 +2391,39 @@ mod tests { assert!(vec2 == vec![(), (), ()]); } + #[test] + fn test_drain_items() { + let mut vec = vec![1, 2, 3]; + let mut vec2: Vec = vec![]; + for i in vec.drain() { + vec2.push(i); + } + assert_eq!(vec, []); + assert_eq!(vec2, [ 1, 2, 3 ]); + } + + #[test] + fn test_drain_items_reverse() { + let mut vec = vec![1, 2, 3]; + let mut vec2: Vec = vec![]; + for i in vec.drain().rev() { + vec2.push(i); + } + assert_eq!(vec, []); + assert_eq!(vec2, [ 3, 2, 1 ]); + } + + #[test] + fn test_drain_items_zero_sized() { + let mut vec = vec![(), (), ()]; + let mut vec2: Vec<()> = vec![]; + for i in vec.drain() { + vec2.push(i); + } + assert_eq!(vec, []); + assert_eq!(vec2, [(), (), ()]); + } + #[test] fn test_into_boxed_slice() { let xs = vec![1u, 2, 3]; diff --git a/src/libstd/collections/hash/map.rs b/src/libstd/collections/hash/map.rs index 04dd5afdfa2..2020897fb3f 100644 --- a/src/libstd/collections/hash/map.rs +++ b/src/libstd/collections/hash/map.rs @@ -982,6 +982,35 @@ impl, V, S, H: Hasher> HashMap { #[unstable = "matches collection reform specification, waiting for dust to settle"] pub fn is_empty(&self) -> bool { self.len() == 0 } + /// Clears the map, returning all key-value pairs as an iterator. Keeps the + /// allocated memory for reuse. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut a = HashMap::new(); + /// a.insert(1u, "a"); + /// a.insert(2u, "b"); + /// + /// for (k, v) in a.drain().take(1) { + /// assert!(k == 1 || k == 2); + /// assert!(v == "a" || v == "b"); + /// } + /// + /// assert!(a.is_empty()); + /// ``` + #[inline] + #[unstable = "matches collection reform specification, waiting for dust to settle"] + pub fn drain(&mut self) -> Drain { + fn last_two((_, b, c): (A, B, C)) -> (B, C) { (b, c) } + + Drain { + inner: self.table.drain().map(last_two), + } + } + /// Clears the map, removing all key-value pairs. Keeps the allocated memory /// for reuse. /// @@ -996,16 +1025,9 @@ impl, V, S, H: Hasher> HashMap { /// assert!(a.is_empty()); /// ``` #[unstable = "matches collection reform specification, waiting for dust to settle"] + #[inline] pub fn clear(&mut self) { - let cap = self.table.capacity(); - let mut buckets = Bucket::first(&mut self.table); - - while buckets.index() != cap { - buckets = match buckets.peek() { - Empty(b) => b.next(), - Full(full) => full.take().0.next(), - }; - } + self.drain(); } /// Deprecated: Renamed to `get`. @@ -1306,6 +1328,16 @@ pub struct Values<'a, K: 'a, V: 'a> { inner: Map<(&'a K, &'a V), &'a V, Entries<'a, K, V>, fn((&'a K, &'a V)) -> &'a V> } +/// HashMap drain iterator +pub struct Drain<'a, K: 'a, V: 'a> { + inner: iter::Map< + (SafeHash, K, V), + (K, V), + table::Drain<'a, K, V>, + fn((SafeHash, K, V)) -> (K, V), + > +} + /// A view into a single occupied location in a HashMap pub struct OccupiedEntry<'a, K:'a, V:'a> { elem: FullBucket>, @@ -1360,6 +1392,17 @@ impl<'a, K, V> Iterator<&'a V> for Values<'a, K, V> { #[inline] fn size_hint(&self) -> (uint, Option) { self.inner.size_hint() } } +impl<'a, K: 'a, V: 'a> Iterator<(K, V)> for Drain<'a, K, V> { + #[inline] + fn next(&mut self) -> Option<(K, V)> { + self.inner.next() + } + #[inline] + fn size_hint(&self) -> (uint, Option) { + self.inner.size_hint() + } +} + impl<'a, K, V> OccupiedEntry<'a, K, V> { /// Gets a reference to the value in the entry pub fn get(&self) -> &V { diff --git a/src/libstd/collections/hash/set.rs b/src/libstd/collections/hash/set.rs index 67c0f887832..d75602a2716 100644 --- a/src/libstd/collections/hash/set.rs +++ b/src/libstd/collections/hash/set.rs @@ -21,7 +21,7 @@ use iter::{Iterator, IteratorExt, FromIterator, Map, FilterMap, Chain, Repeat, Z use option::Option::{Some, None, mod}; use result::Result::{Ok, Err}; -use super::map::{HashMap, MoveEntries, Keys, INITIAL_CAPACITY}; +use super::map::{mod, HashMap, MoveEntries, Keys, INITIAL_CAPACITY}; // FIXME(conventions): implement BitOr, BitAnd, BitXor, and Sub @@ -420,6 +420,14 @@ impl, S, H: Hasher> HashSet { #[unstable = "matches collection reform specification, waiting for dust to settle"] pub fn is_empty(&self) -> bool { self.map.len() == 0 } + /// Clears the set, returning all elements in an iterator. + #[inline] + #[unstable = "matches collection reform specification, waiting for dust to settle"] + pub fn drain(&mut self) -> Drain { + fn first((a, _): (A, B)) -> A { a } + Drain { iter: self.map.drain().map(first) } + } + /// Clears the set, removing all values. /// /// # Example @@ -626,6 +634,11 @@ pub struct SetMoveItems { iter: Map<(K, ()), K, MoveEntries, fn((K, ())) -> K> } +/// HashSet drain iterator +pub struct Drain<'a, K: 'a> { + iter: Map<(K, ()), K, map::Drain<'a, K, ()>, fn((K, ())) -> K>, +} + // `Repeat` is used to feed the filter closure an explicit capture // of a reference to the other set /// Set operations iterator, used directly for intersection and difference @@ -658,6 +671,11 @@ impl Iterator for SetMoveItems { fn size_hint(&self) -> (uint, Option) { self.iter.size_hint() } } +impl<'a, K: 'a> Iterator for Drain<'a, K> { + fn next(&mut self) -> Option { self.iter.next() } + fn size_hint(&self) -> (uint, Option) { self.iter.size_hint() } +} + impl<'a, T, H> Iterator<&'a T> for SetAlgebraItems<'a, T, H> { fn next(&mut self) -> Option<&'a T> { self.iter.next() } fn size_hint(&self) -> (uint, Option) { self.iter.size_hint() } @@ -914,4 +932,41 @@ mod test_set { assert!(set_str == "{1, 2}" || set_str == "{2, 1}"); assert_eq!(format!("{}", empty), "{}"); } + + #[test] + fn test_trivial_drain() { + let mut s = HashSet::::new(); + for _ in s.drain() {} + assert!(s.is_empty()); + drop(s); + + let mut s = HashSet::::new(); + drop(s.drain()); + assert!(s.is_empty()); + } + + #[test] + fn test_drain() { + let mut s: HashSet = range(1, 100).collect(); + + // try this a bunch of times to make sure we don't screw up internal state. + for _ in range(0i, 20) { + assert_eq!(s.len(), 99); + + { + let mut last_i = 0; + let mut d = s.drain(); + for (i, x) in d.by_ref().take(50).enumerate() { + last_i = i; + assert!(x != 0); + } + assert_eq!(last_i, 49); + } + + for _ in s.iter() { panic!("s should be empty!"); } + + // reset to try again. + s.extend(range(1, 100)); + } + } } diff --git a/src/libstd/collections/hash/table.rs b/src/libstd/collections/hash/table.rs index da06387e9a5..115edcabca1 100644 --- a/src/libstd/collections/hash/table.rs +++ b/src/libstd/collections/hash/table.rs @@ -684,6 +684,19 @@ impl RawTable { } } + pub fn drain(&mut self) -> Drain { + let RawBuckets { raw, hashes_end, .. } = self.raw_buckets(); + // Replace the marker regardless of lifetime bounds on parameters. + Drain { + iter: RawBuckets { + raw: raw, + hashes_end: hashes_end, + marker: marker::ContravariantLifetime::<'static>, + }, + table: self, + } + } + /// Returns an iterator that copies out each entry. Used while the table /// is being dropped. unsafe fn rev_move_buckets(&mut self) -> RevMoveBuckets { @@ -774,6 +787,12 @@ pub struct MoveEntries { iter: RawBuckets<'static, K, V> } +/// Iterator over the entries in a table, clearing the table. +pub struct Drain<'a, K: 'a, V: 'a> { + table: &'a mut RawTable, + iter: RawBuckets<'static, K, V>, +} + impl<'a, K, V> Iterator<(&'a K, &'a V)> for Entries<'a, K, V> { fn next(&mut self) -> Option<(&'a K, &'a V)> { self.iter.next().map(|bucket| { @@ -828,6 +847,36 @@ impl Iterator<(SafeHash, K, V)> for MoveEntries { } } +impl<'a, K: 'a, V: 'a> Iterator<(SafeHash, K, V)> for Drain<'a, K, V> { + #[inline] + fn next(&mut self) -> Option<(SafeHash, K, V)> { + self.iter.next().map(|bucket| { + self.table.size -= 1; + unsafe { + ( + SafeHash { + hash: ptr::replace(bucket.hash, EMPTY_BUCKET), + }, + ptr::read(bucket.key as *const K), + ptr::read(bucket.val as *const V) + ) + } + }) + } + + fn size_hint(&self) -> (uint, Option) { + let size = self.table.size(); + (size, Some(size)) + } +} + +#[unsafe_destructor] +impl<'a, K: 'a, V: 'a> Drop for Drain<'a, K, V> { + fn drop(&mut self) { + for _ in *self {} + } +} + impl Clone for RawTable { fn clone(&self) -> RawTable { unsafe {