Auto merge of #32466 - jooert:btree_append, r=apasel422
Implement `append` for b-trees. I have finally found time to revive #26227, this time only with an `append` implementation. The algorithm implemented here is linear in the size of the two b-trees. It firsts creates a `MergeIter` from the two b-trees and then builds a new b-tree by pushing key-value pairs from the `MergeIter` into nodes at the right heights. Three functions for stealing have been added to the implementation of `Handle` as well as a getter for the height of a `NodeRef`. The docs have been updated with performance information about `BTreeMap::append` and the remark about B has been removed now that it is the same for all instances of `BTreeMap`. cc @gereeter @Gankro @apasel422
This commit is contained in:
commit
66ff163081
@ -11,7 +11,7 @@
|
||||
use core::cmp::Ordering;
|
||||
use core::fmt::Debug;
|
||||
use core::hash::{Hash, Hasher};
|
||||
use core::iter::FromIterator;
|
||||
use core::iter::{FromIterator, Peekable};
|
||||
use core::marker::PhantomData;
|
||||
use core::ops::Index;
|
||||
use core::{fmt, intrinsics, mem, ptr};
|
||||
@ -348,6 +348,12 @@ pub struct OccupiedEntry<'a, K: 'a, V: 'a> {
|
||||
_marker: PhantomData<&'a mut (K, V)>,
|
||||
}
|
||||
|
||||
// An iterator for merging two sorted sequences into one
|
||||
struct MergeIter<K, V, I: Iterator<Item=(K, V)>> {
|
||||
left: Peekable<I>,
|
||||
right: Peekable<I>,
|
||||
}
|
||||
|
||||
impl<K: Ord, V> BTreeMap<K, V> {
|
||||
/// Makes a new empty BTreeMap with a reasonable choice for B.
|
||||
///
|
||||
@ -535,6 +541,62 @@ impl<K: Ord, V> BTreeMap<K, V> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Moves all elements from `other` into `Self`, leaving `other` empty.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// #![feature(btree_append)]
|
||||
/// use std::collections::BTreeMap;
|
||||
///
|
||||
/// let mut a = BTreeMap::new();
|
||||
/// a.insert(1, "a");
|
||||
/// a.insert(2, "b");
|
||||
/// a.insert(3, "c");
|
||||
///
|
||||
/// let mut b = BTreeMap::new();
|
||||
/// b.insert(3, "d");
|
||||
/// b.insert(4, "e");
|
||||
/// b.insert(5, "f");
|
||||
///
|
||||
/// a.append(&mut b);
|
||||
///
|
||||
/// assert_eq!(a.len(), 5);
|
||||
/// assert_eq!(b.len(), 0);
|
||||
///
|
||||
/// assert_eq!(a[&1], "a");
|
||||
/// assert_eq!(a[&2], "b");
|
||||
/// assert_eq!(a[&3], "d");
|
||||
/// assert_eq!(a[&4], "e");
|
||||
/// assert_eq!(a[&5], "f");
|
||||
/// ```
|
||||
#[unstable(feature = "btree_append", reason = "recently added as part of collections reform 2",
|
||||
issue = "19986")]
|
||||
pub fn append(&mut self, other: &mut Self) {
|
||||
// Do we have to append anything at all?
|
||||
if other.len() == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
// We can just swap `self` and `other` if `self` is empty.
|
||||
if self.len() == 0 {
|
||||
mem::swap(self, other);
|
||||
return;
|
||||
}
|
||||
|
||||
// First, we merge `self` and `other` into a sorted sequence in linear time.
|
||||
let self_iter = mem::replace(self, BTreeMap::new()).into_iter();
|
||||
let other_iter = mem::replace(other, BTreeMap::new()).into_iter();
|
||||
let iter = MergeIter {
|
||||
left: self_iter.peekable(),
|
||||
right: other_iter.peekable(),
|
||||
};
|
||||
|
||||
// Second, we build a tree from the sorted sequence in linear time.
|
||||
self.from_sorted_iter(iter);
|
||||
self.fix_right_edge();
|
||||
}
|
||||
|
||||
/// Constructs a double-ended iterator over a sub-range of elements in the map, starting
|
||||
/// at min, and ending at max. If min is `Unbounded`, then it will be treated as "negative
|
||||
/// infinity", and if max is `Unbounded`, then it will be treated as "positive infinity".
|
||||
@ -724,6 +786,76 @@ impl<K: Ord, V> BTreeMap<K, V> {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn from_sorted_iter<I: Iterator<Item=(K, V)>>(&mut self, iter: I) {
|
||||
let mut cur_node = last_leaf_edge(self.root.as_mut()).into_node();
|
||||
// Iterate through all key-value pairs, pushing them into nodes at the right level.
|
||||
for (key, value) in iter {
|
||||
// Try to push key-value pair into the current leaf node.
|
||||
if cur_node.len() < node::CAPACITY {
|
||||
cur_node.push(key, value);
|
||||
} else {
|
||||
// No space left, go up and push there.
|
||||
let mut open_node;
|
||||
let mut test_node = cur_node.forget_type();
|
||||
loop {
|
||||
match test_node.ascend() {
|
||||
Ok(parent) => {
|
||||
let parent = parent.into_node();
|
||||
if parent.len() < node::CAPACITY {
|
||||
// Found a node with space left, push here.
|
||||
open_node = parent;
|
||||
break;
|
||||
} else {
|
||||
// Go up again.
|
||||
test_node = parent.forget_type();
|
||||
}
|
||||
},
|
||||
Err(node) => {
|
||||
// We are at the top, create a new root node and push there.
|
||||
open_node = node.into_root_mut().push_level();
|
||||
break;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// Push key-value pair and new right subtree.
|
||||
let tree_height = open_node.height() - 1;
|
||||
let mut right_tree = node::Root::new_leaf();
|
||||
for _ in 0..tree_height {
|
||||
right_tree.push_level();
|
||||
}
|
||||
open_node.push(key, value, right_tree);
|
||||
|
||||
// Go down to the right-most leaf again.
|
||||
cur_node = last_leaf_edge(open_node.forget_type()).into_node();
|
||||
}
|
||||
|
||||
self.length += 1;
|
||||
}
|
||||
}
|
||||
|
||||
fn fix_right_edge(&mut self) {
|
||||
// Handle underfull nodes, start from the top.
|
||||
let mut cur_node = self.root.as_mut();
|
||||
while let Internal(internal) = cur_node.force() {
|
||||
// Check if right-most child is underfull.
|
||||
let mut last_edge = internal.last_edge();
|
||||
let right_child_len = last_edge.reborrow().descend().len();
|
||||
if right_child_len < node::CAPACITY / 2 {
|
||||
// We need to steal.
|
||||
let mut last_kv = match last_edge.left_kv() {
|
||||
Ok(left) => left,
|
||||
Err(_) => unreachable!(),
|
||||
};
|
||||
last_kv.bulk_steal_left(node::CAPACITY/2 - right_child_len);
|
||||
last_edge = last_kv.right_edge();
|
||||
}
|
||||
|
||||
// Go further down.
|
||||
cur_node = last_edge.descend();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, K: 'a, V: 'a> IntoIterator for &'a BTreeMap<K, V> {
|
||||
@ -1690,32 +1822,41 @@ fn handle_underfull_node<'a, K, V>(node: NodeRef<marker::Mut<'a>,
|
||||
};
|
||||
|
||||
if handle.can_merge() {
|
||||
return Merged(handle.merge().into_node());
|
||||
Merged(handle.merge().into_node())
|
||||
} else {
|
||||
unsafe {
|
||||
let (k, v, edge) = if is_left {
|
||||
handle.reborrow_mut().left_edge().descend().pop()
|
||||
} else {
|
||||
handle.reborrow_mut().right_edge().descend().pop_front()
|
||||
};
|
||||
|
||||
let k = mem::replace(handle.reborrow_mut().into_kv_mut().0, k);
|
||||
let v = mem::replace(handle.reborrow_mut().into_kv_mut().1, v);
|
||||
|
||||
// FIXME: reuse cur_node?
|
||||
if is_left {
|
||||
match handle.reborrow_mut().right_edge().descend().force() {
|
||||
Leaf(mut leaf) => leaf.push_front(k, v),
|
||||
Internal(mut internal) => internal.push_front(k, v, edge.unwrap())
|
||||
}
|
||||
} else {
|
||||
match handle.reborrow_mut().left_edge().descend().force() {
|
||||
Leaf(mut leaf) => leaf.push(k, v),
|
||||
Internal(mut internal) => internal.push(k, v, edge.unwrap())
|
||||
}
|
||||
}
|
||||
if is_left {
|
||||
handle.steal_left();
|
||||
} else {
|
||||
handle.steal_right();
|
||||
}
|
||||
Stole(handle.into_node())
|
||||
}
|
||||
}
|
||||
|
||||
impl<K: Ord, V, I: Iterator<Item=(K, V)>> Iterator for MergeIter<K, V, I> {
|
||||
type Item = (K, V);
|
||||
|
||||
fn next(&mut self) -> Option<(K, V)> {
|
||||
let res = match (self.left.peek(), self.right.peek()) {
|
||||
(Some(&(ref left_key, _)), Some(&(ref right_key, _))) => left_key.cmp(right_key),
|
||||
(Some(_), None) => Ordering::Less,
|
||||
(None, Some(_)) => Ordering::Greater,
|
||||
(None, None) => return None,
|
||||
};
|
||||
|
||||
// Check which elements comes first and only advance the corresponding iterator.
|
||||
// If two keys are equal, take the value from `right`.
|
||||
match res {
|
||||
Ordering::Less => {
|
||||
self.left.next()
|
||||
},
|
||||
Ordering::Greater => {
|
||||
self.right.next()
|
||||
},
|
||||
Ordering::Equal => {
|
||||
self.left.next();
|
||||
self.right.next()
|
||||
},
|
||||
}
|
||||
|
||||
return Stole(handle.into_node());
|
||||
}
|
||||
}
|
||||
|
@ -328,6 +328,12 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> {
|
||||
self.as_leaf().len as usize
|
||||
}
|
||||
|
||||
/// Returns the height of this node in the whole tree. Zero height denotes the
|
||||
/// leaf level.
|
||||
pub fn height(&self) -> usize {
|
||||
self.height
|
||||
}
|
||||
|
||||
/// Removes any static information about whether this node is a `Leaf` or an
|
||||
/// `Internal` node.
|
||||
pub fn forget_type(self) -> NodeRef<BorrowType, K, V, marker::LeafOrInternal> {
|
||||
@ -1233,6 +1239,139 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Internal>, marker::
|
||||
Handle::new_edge(self.node, self.idx)
|
||||
}
|
||||
}
|
||||
|
||||
/// This removes a key/value pair from the left child and replaces it with the key/value pair
|
||||
/// pointed to by this handle while pushing the old key/value pair of this handle into the right
|
||||
/// child.
|
||||
pub fn steal_left(&mut self) {
|
||||
unsafe {
|
||||
let (k, v, edge) = self.reborrow_mut().left_edge().descend().pop();
|
||||
|
||||
let k = mem::replace(self.reborrow_mut().into_kv_mut().0, k);
|
||||
let v = mem::replace(self.reborrow_mut().into_kv_mut().1, v);
|
||||
|
||||
match self.reborrow_mut().right_edge().descend().force() {
|
||||
ForceResult::Leaf(mut leaf) => leaf.push_front(k, v),
|
||||
ForceResult::Internal(mut internal) => internal.push_front(k, v, edge.unwrap())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// This removes a key/value pair from the right child and replaces it with the key/value pair
|
||||
/// pointed to by this handle while pushing the old key/value pair of this handle into the left
|
||||
/// child.
|
||||
pub fn steal_right(&mut self) {
|
||||
unsafe {
|
||||
let (k, v, edge) = self.reborrow_mut().right_edge().descend().pop_front();
|
||||
|
||||
let k = mem::replace(self.reborrow_mut().into_kv_mut().0, k);
|
||||
let v = mem::replace(self.reborrow_mut().into_kv_mut().1, v);
|
||||
|
||||
match self.reborrow_mut().left_edge().descend().force() {
|
||||
ForceResult::Leaf(mut leaf) => leaf.push(k, v),
|
||||
ForceResult::Internal(mut internal) => internal.push(k, v, edge.unwrap())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// This does stealing similar to `steal_left` but steals multiple elements at once.
|
||||
pub fn bulk_steal_left(&mut self, n: usize) {
|
||||
unsafe {
|
||||
// Get raw pointers to left child's keys, values and edges.
|
||||
let (left_len, left_k, left_v, left_e) = {
|
||||
let mut left = self.reborrow_mut().left_edge().descend();
|
||||
|
||||
(left.len(),
|
||||
left.keys_mut().as_mut_ptr(),
|
||||
left.vals_mut().as_mut_ptr(),
|
||||
match left.force() {
|
||||
ForceResult::Leaf(_) => None,
|
||||
ForceResult::Internal(mut i) => Some(i.as_internal_mut().edges.as_mut_ptr()),
|
||||
})
|
||||
};
|
||||
|
||||
// Get raw pointers to right child's keys, values and edges.
|
||||
let (right_len, right_k, right_v, right_e) = {
|
||||
let mut right = self.reborrow_mut().right_edge().descend();
|
||||
|
||||
(right.len(),
|
||||
right.keys_mut().as_mut_ptr(),
|
||||
right.vals_mut().as_mut_ptr(),
|
||||
match right.force() {
|
||||
ForceResult::Leaf(_) => None,
|
||||
ForceResult::Internal(mut i) => Some(i.as_internal_mut().edges.as_mut_ptr()),
|
||||
})
|
||||
};
|
||||
|
||||
// Get raw pointers to parent's key and value.
|
||||
let (parent_k, parent_v) = {
|
||||
let kv = self.reborrow_mut().into_kv_mut();
|
||||
(kv.0 as *mut K, kv.1 as *mut V)
|
||||
};
|
||||
|
||||
// Make sure that we may steal safely.
|
||||
debug_assert!(right_len + n <= CAPACITY);
|
||||
debug_assert!(left_len >= n);
|
||||
|
||||
// Make room for stolen elements in right child.
|
||||
ptr::copy(right_k,
|
||||
right_k.offset(n as isize),
|
||||
right_len);
|
||||
ptr::copy(right_v,
|
||||
right_v.offset(n as isize),
|
||||
right_len);
|
||||
if let Some(edges) = right_e {
|
||||
ptr::copy(edges,
|
||||
edges.offset(n as isize),
|
||||
right_len+1);
|
||||
}
|
||||
|
||||
// Move elements from the left child to the right one.
|
||||
let left_ind = (left_len - n) as isize;
|
||||
ptr::copy_nonoverlapping(left_k.offset(left_ind + 1),
|
||||
right_k,
|
||||
n - 1);
|
||||
ptr::copy_nonoverlapping(left_v.offset(left_ind + 1),
|
||||
right_v,
|
||||
n - 1);
|
||||
match (left_e, right_e) {
|
||||
(Some(left), Some(right)) => {
|
||||
ptr::copy_nonoverlapping(left.offset(left_ind + 1),
|
||||
right,
|
||||
n);
|
||||
},
|
||||
(Some(_), None) => unreachable!(),
|
||||
(None, Some(_)) => unreachable!(),
|
||||
(None, None) => {},
|
||||
}
|
||||
|
||||
// Copy parent key/value pair to right child.
|
||||
ptr::copy_nonoverlapping(parent_k,
|
||||
right_k.offset(n as isize - 1),
|
||||
1);
|
||||
ptr::copy_nonoverlapping(parent_v,
|
||||
right_v.offset(n as isize - 1),
|
||||
1);
|
||||
// Copy left-most stolen pair to parent.
|
||||
ptr::copy_nonoverlapping(left_k.offset(left_ind),
|
||||
parent_k,
|
||||
1);
|
||||
ptr::copy_nonoverlapping(left_v.offset(left_ind),
|
||||
parent_v,
|
||||
1);
|
||||
|
||||
// Fix lengths of left and right child and parent pointers in children of the right
|
||||
// child.
|
||||
self.reborrow_mut().left_edge().descend().as_leaf_mut().len -= n as u16;
|
||||
let mut right = self.reborrow_mut().right_edge().descend();
|
||||
right.as_leaf_mut().len += n as u16;
|
||||
if let ForceResult::Internal(mut node) = right.force() {
|
||||
for i in 0..(right_len+n+1) {
|
||||
Handle::new_edge(node.reborrow_mut(), i as usize).correct_parent_link();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<BorrowType, K, V, HandleType>
|
||||
|
@ -545,6 +545,41 @@ impl<T: Ord> BTreeSet<T> {
|
||||
{
|
||||
Recover::take(&mut self.map, value)
|
||||
}
|
||||
|
||||
/// Moves all elements from `other` into `Self`, leaving `other` empty.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// #![feature(btree_append)]
|
||||
/// use std::collections::BTreeSet;
|
||||
///
|
||||
/// let mut a = BTreeSet::new();
|
||||
/// a.insert(1);
|
||||
/// a.insert(2);
|
||||
/// a.insert(3);
|
||||
///
|
||||
/// let mut b = BTreeSet::new();
|
||||
/// b.insert(3);
|
||||
/// b.insert(4);
|
||||
/// b.insert(5);
|
||||
///
|
||||
/// a.append(&mut b);
|
||||
///
|
||||
/// assert_eq!(a.len(), 5);
|
||||
/// assert_eq!(b.len(), 0);
|
||||
///
|
||||
/// assert!(a.contains(&1));
|
||||
/// assert!(a.contains(&2));
|
||||
/// assert!(a.contains(&3));
|
||||
/// assert!(a.contains(&4));
|
||||
/// assert!(a.contains(&5));
|
||||
/// ```
|
||||
#[unstable(feature = "btree_append", reason = "recently added as part of collections reform 2",
|
||||
issue = "19986")]
|
||||
pub fn append(&mut self, other: &mut Self) {
|
||||
self.map.append(&mut other.map);
|
||||
}
|
||||
}
|
||||
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
|
@ -446,6 +446,58 @@ fn test_vacant_entry_key() {
|
||||
assert_eq!(a[key], value);
|
||||
}
|
||||
|
||||
macro_rules! create_append_test {
|
||||
($name:ident, $len:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let mut a = BTreeMap::new();
|
||||
for i in 0..8 {
|
||||
a.insert(i, i);
|
||||
}
|
||||
|
||||
let mut b = BTreeMap::new();
|
||||
for i in 5..$len {
|
||||
b.insert(i, 2*i);
|
||||
}
|
||||
|
||||
a.append(&mut b);
|
||||
|
||||
assert_eq!(a.len(), $len);
|
||||
assert_eq!(b.len(), 0);
|
||||
|
||||
for i in 0..$len {
|
||||
if i < 5 {
|
||||
assert_eq!(a[&i], i);
|
||||
} else {
|
||||
assert_eq!(a[&i], 2*i);
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(a.remove(&($len-1)), Some(2*($len-1)));
|
||||
assert_eq!(a.insert($len-1, 20), None);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// These are mostly for testing the algorithm that "fixes" the right edge after insertion.
|
||||
// Single node.
|
||||
create_append_test!(test_append_9, 9);
|
||||
// Two leafs that don't need fixing.
|
||||
create_append_test!(test_append_17, 17);
|
||||
// Two leafs where the second one ends up underfull and needs stealing at the end.
|
||||
create_append_test!(test_append_14, 14);
|
||||
// Two leafs where the second one ends up empty because the insertion finished at the root.
|
||||
create_append_test!(test_append_12, 12);
|
||||
// Three levels; insertion finished at the root.
|
||||
create_append_test!(test_append_144, 144);
|
||||
// Three levels; insertion finished at leaf while there is an empty node on the second level.
|
||||
create_append_test!(test_append_145, 145);
|
||||
// Tests for several randomly chosen sizes.
|
||||
create_append_test!(test_append_170, 170);
|
||||
create_append_test!(test_append_181, 181);
|
||||
create_append_test!(test_append_239, 239);
|
||||
create_append_test!(test_append_1700, 1700);
|
||||
|
||||
mod bench {
|
||||
use std::collections::BTreeMap;
|
||||
use std::__rand::{Rng, thread_rng};
|
||||
|
@ -265,3 +265,27 @@ fn test_variance() {
|
||||
fn into_iter<'new>(v: IntoIter<&'static str>) -> IntoIter<&'new str> { v }
|
||||
fn range<'a, 'new>(v: Range<'a, &'static str>) -> Range<'a, &'new str> { v }
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_append() {
|
||||
let mut a = BTreeSet::new();
|
||||
a.insert(1);
|
||||
a.insert(2);
|
||||
a.insert(3);
|
||||
|
||||
let mut b = BTreeSet::new();
|
||||
b.insert(3);
|
||||
b.insert(4);
|
||||
b.insert(5);
|
||||
|
||||
a.append(&mut b);
|
||||
|
||||
assert_eq!(a.len(), 5);
|
||||
assert_eq!(b.len(), 0);
|
||||
|
||||
assert_eq!(a.contains(&1), true);
|
||||
assert_eq!(a.contains(&2), true);
|
||||
assert_eq!(a.contains(&3), true);
|
||||
assert_eq!(a.contains(&4), true);
|
||||
assert_eq!(a.contains(&5), true);
|
||||
}
|
||||
|
@ -13,6 +13,7 @@
|
||||
#![feature(binary_heap_extras)]
|
||||
#![feature(binary_heap_append)]
|
||||
#![feature(box_syntax)]
|
||||
#![feature(btree_append)]
|
||||
#![feature(btree_range)]
|
||||
#![feature(collections)]
|
||||
#![feature(collections_bound)]
|
||||
|
@ -120,12 +120,10 @@
|
||||
//!
|
||||
//! For Sets, all operations have the cost of the equivalent Map operation.
|
||||
//!
|
||||
//! | | get | insert | remove | predecessor |
|
||||
//! |----------|-----------|----------|----------|-------------|
|
||||
//! | HashMap | O(1)~ | O(1)~* | O(1)~ | N/A |
|
||||
//! | BTreeMap | O(log n) | O(log n) | O(log n) | O(log n) |
|
||||
//!
|
||||
//! Note that BTreeMap's precise performance depends on the value of B.
|
||||
//! | | get | insert | remove | predecessor | append |
|
||||
//! |----------|-----------|----------|----------|-------------|--------|
|
||||
//! | HashMap | O(1)~ | O(1)~* | O(1)~ | N/A | N/A |
|
||||
//! | BTreeMap | O(log n) | O(log n) | O(log n) | O(log n) | O(n+m) |
|
||||
//!
|
||||
//! # Correct and Efficient Usage of Collections
|
||||
//!
|
||||
|
Loading…
x
Reference in New Issue
Block a user