From 7f754764d60e79752c7cca1f8289eedeb469cad0 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Mon, 14 Jan 2013 10:27:26 -0500 Subject: [PATCH] replace treemap with a balanced tree --- src/libstd/treemap.rs | 818 +++++++++++++++++++++++++++++++++++------- 1 file changed, 679 insertions(+), 139 deletions(-) diff --git a/src/libstd/treemap.rs b/src/libstd/treemap.rs index af3ab4b88eb..35a6be135c7 100644 --- a/src/libstd/treemap.rs +++ b/src/libstd/treemap.rs @@ -1,4 +1,4 @@ -// Copyright 2012 The Rust Project Developers. See the COPYRIGHT +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at // http://rust-lang.org/COPYRIGHT. // @@ -8,187 +8,727 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - * A key,value store that works on anything. - * - * This works using a binary search tree. In the first version, it's a - * very naive algorithm, but it will probably be updated to be a - * red-black tree or something else. - */ +//! An ordered map and set implemented as self-balancing binary search +//! trees. The only requirement for the types is that the key implements +//! `Ord`, and that the `lt` method provides a total ordering. + #[forbid(deprecated_mode)]; use core::cmp::{Eq, Ord}; use core::option::{Option, Some, None}; use core::prelude::*; -pub type TreeMap = @mut TreeEdge; +// This is implemented as an AA tree, which is a simplified variation of +// a red-black tree where where red (horizontal) nodes can only be added +// as a right child. The time complexity is the same, and re-balancing +// operations are more frequent but also cheaper. -type TreeEdge = Option<@TreeNode>; +// TODO: lazy iteration, for O(n) Eq and set operations instead of O(n*log(m)) -struct TreeNode { - key: K, - mut value: V, - mut left: TreeEdge, - mut right: TreeEdge +// TODO: implement Ord for TreeSet +// could be superset/subset-based or in-order lexicographic comparison... but +// there are methods for is_superset/is_subset so lexicographic is more useful + +// TODO: (possibly) implement the overloads Python does for sets: +// * union: | +// * intersection: & +// * difference: - +// * symmetric difference: ^ +// These would be convenient since the methods will work like `each` + +pub struct TreeMap { + priv root: Option<~TreeNode>, + priv length: uint } -/// Create a treemap -pub fn TreeMap() -> TreeMap { @mut None } - -/// Insert a value into the map -pub fn insert(m: &mut TreeEdge, k: K, v: V) { - match copy *m { - None => { - *m = Some(@TreeNode {key: k, - mut value: v, - mut left: None, - mut right: None}); - return; - } - Some(node) => { - if k == node.key { - node.value = v; - } else if k < node.key { - insert(&mut node.left, k, v); - } else { - insert(&mut node.right, k, v); +// FIXME: this is a naive O(n*log(m)) implementation, could be O(n) +impl TreeMap: Eq { + pure fn eq(&self, other: &TreeMap) -> bool { + if self.len() != other.len() { + return false } - } + for self.each |x, y| { + match other.find(x) { + Some(z) => if z != y { return false }, + None => return false + } + } + true + } + pure fn ne(&self, other: &TreeMap) -> bool { + !self.eq(other) + } +} + +impl TreeMap { + /// Create an empty TreeMap + static pure fn new() -> TreeMap { TreeMap{root: None, length: 0} } + + /// Return the number of elements in the map + pure fn len(&self) -> uint { self.length } + + /// Return true if the map contains no elements + pure fn is_empty(&self) -> bool { self.root.is_none() } + + /// Return true if the map contains some elements + pure fn is_not_empty(&self) -> bool { self.root.is_some() } + + /// Visit all key-value pairs in order + pure fn each(&self, f: fn(&K, &V) -> bool) { each(&self.root, f) } + + /// Visit all keys in order + pure fn each_key(&self, f: fn(&K) -> bool) { self.each(|k, _| f(k)) } + + /// Visit all values in order + pure fn each_value(&self, f: fn(&V) -> bool) { self.each(|_, v| f(v)) } + + /// Visit all key-value pairs in reverse order + pure fn each_reverse(&self, f: fn(&K, &V) -> bool) { + each_reverse(&self.root, f); + } + + /// Visit all keys in reverse order + pure fn each_key_reverse(&self, f: fn(&K) -> bool) { + self.each_reverse(|k, _| f(k)) + } + + /// Visit all values in reverse order + pure fn each_value_reverse(&self, f: fn(&V) -> bool) { + self.each_reverse(|_, v| f(v)) + } + + /// Return true if the map contains a value for the specified key + pure fn contains_key(&self, key: &K) -> bool { + self.find(key).is_some() + } + + /// Return the value corresponding to the key in the map + pure fn find(&self, key: &K) -> Option<&self/V> { + let mut current: &self/Option<~TreeNode> = &self.root; + loop { + match *current { + Some(ref r) => { + let r: &self/~TreeNode = r; // FIXME: #3148 + if *key < r.key { + current = &r.left; + } else if r.key < *key { + current = &r.right; + } else { + return Some(&r.value); + } + } + None => return None + } + } + } + + /// Insert a key-value pair into the map. An existing value for a + /// key is replaced by the new value. Return true if the key did + /// not already exist in the map. + fn insert(&mut self, key: K, value: V) -> bool { + let ret = insert(&mut self.root, key, value); + if ret { self.length += 1 } + ret + } + + /// Remove a key-value pair from the map. Return true if the key + /// was present in the map, otherwise false. + fn remove(&mut self, key: &K) -> bool { + let ret = remove(&mut self.root, key); + if ret { self.length -= 1 } + ret + } +} + +pub struct TreeSet { + priv map: TreeMap +} + +impl TreeSet: iter::BaseIter { + /// Visit all values in order + pure fn each(&self, f: fn(&T) -> bool) { self.map.each_key(f) } + pure fn size_hint(&self) -> Option { Some(self.len()) } +} + +impl TreeSet: Eq { + pure fn eq(&self, other: &TreeSet) -> bool { self.map == other.map } + pure fn ne(&self, other: &TreeSet) -> bool { self.map != other.map } +} + +impl TreeSet { + /// Create an empty TreeSet + static pure fn new() -> TreeSet { TreeSet{map: TreeMap::new()} } + + /// Return the number of elements in the set + pure fn len(&self) -> uint { self.map.len() } + + /// Return true if the set contains no elements + pure fn is_empty(&self) -> bool { self.map.is_empty() } + + /// Return true if the set contains some elements + pure fn is_not_empty(&self) -> bool { self.map.is_not_empty() } + + /// Visit all values in reverse order + pure fn each_reverse(&self, f: fn(&T) -> bool) { + self.map.each_key_reverse(f) + } + + /// Return true if the set contains a value + pure fn contains(&self, value: &T) -> bool { self.map.contains_key(value) } + + /// Add a value to the set. Return true if the value was not + /// already present in the set. + fn insert(&mut self, value: T) -> bool { self.map.insert(value, ()) } + + /// Remove a value from the set. Return true if the value was + /// present in the set. + fn remove(&mut self, value: &T) -> bool { self.map.remove(value) } + + /// Return true if the set has no elements in common with `other`. + /// This is equivalent to checking for an empty intersection. + pure fn is_disjoint(&self, other: &TreeSet) -> bool { + // FIXME: this is a naive O(n*log(m)) implementation, could be O(n) + !iter::any(self, |x| other.contains(x)) + } + + /// Check of the set is a subset of another + pure fn is_subset(&self, other: &TreeSet) -> bool { + // FIXME: this is a naive O(n*log(m)) implementation, could be O(n) + !iter::any(self, |x| !other.contains(x)) + } + + /// Check of the set is a superset of another + pure fn is_superset(&self, other: &TreeSet) -> bool { + other.is_subset(self) + } + + /// Visit the values (in-order) representing the difference + pure fn difference(&self, _other: &TreeSet, + _f: fn(&T) -> bool) { + fail ~"not yet implemented" // TODO + } + + /// Visit the values (in-order) representing the symmetric difference + pure fn symmetric_difference(&self, _other: &TreeSet, + _f: fn(&T) -> bool) { + fail ~"not yet implemented" // TODO + } + + /// Visit the values (in-order) representing the intersection + pure fn intersection(&self, other: &TreeSet, + f: fn(&T) -> bool) { + // FIXME: this is a naive O(n*log(m)) implementation, could be O(n) + for self.each |x| { + if other.contains(x) { + if !f(x) { break } + } + } + } + + /// Visit the values (in-order) representing the union + pure fn union(&self, _other: &TreeSet, _f: fn(&T) -> bool) -> TreeSet { + fail ~"not yet implemented" // TODO + } +} + +// Nodes keep track of their level in the tree, starting at 1 in the +// leaves and with a red child sharing the level of the parent. +struct TreeNode { + key: K, + value: V, + left: Option<~TreeNode>, + right: Option<~TreeNode>, + level: uint +} + +impl TreeNode { + #[inline(always)] + static pure fn new(key: K, value: V) -> TreeNode { + TreeNode{key: key, value: value, left: None, right: None, level: 1} + } +} + +pure fn each(node: &Option<~TreeNode>, + f: fn(&K, &V) -> bool) { + do node.map |x| { + each(&x.left, f); + if f(&x.key, &x.value) { each(&x.right, f) } }; } -/// Find a value based on the key -pub fn find(m: &const TreeEdge, k: K) - -> Option { - match copy *m { - None => None, +pure fn each_reverse(node: &Option<~TreeNode>, + f: fn(&K, &V) -> bool) { + do node.map |x| { + each_reverse(&x.right, f); + if f(&x.key, &x.value) { each_reverse(&x.left, f) } + }; +} - // FIXME (#2808): was that an optimization? - Some(node) => { - if k == node.key { - Some(node.value) - } else if k < node.key { - find(&const node.left, k) +// Remove left horizontal link by rotating right +fn skew(node: ~TreeNode) -> ~TreeNode { + if node.left.map_default(false, |x| x.level == node.level) { + let mut node = node; + let mut save = node.left.swap_unwrap(); + node.left <-> save.right; // save.right now None + save.right = Some(node); + save + } else { + node // nothing to do + } +} + +// Remove dual horizontal link by rotating left and increasing level of +// the parent +fn split(node: ~TreeNode) -> ~TreeNode { + if node.right.map_default(false, |x| x.right.map_default(false, |y| y.level == node.level)) { + let mut node = node; + let mut save = node.right.swap_unwrap(); + node.right <-> save.left; // save.left now None + save.left = Some(node); + save.level += 1; + save + } else { + node // nothing to do + } +} + +fn insert(node: &mut Option<~TreeNode>, key: K, + value: V) -> bool { + if node.is_none() { + *node = Some(~TreeNode::new(key, value)); + true + } else { + let mut save = node.swap_unwrap(); + if key < save.key { + let inserted = insert(&mut save.left, key, value); + *node = Some(split(skew(save))); // re-balance, if necessary + inserted + } else if save.key < key { + let inserted = insert(&mut save.right, key, value); + *node = Some(split(skew(save))); // re-balance, if necessary + inserted } else { - find(&const node.right, k) + save.key = key; + save.value = value; + *node = Some(save); + false } - } } } -/// Visit all pairs in the map in order. -pub fn traverse(m: &const TreeEdge, - f: fn((&K), (&V))) { - match copy *m { - None => (), - Some(node) => { - traverse(&const node.left, f); - // copy of value is req'd as f() requires an immutable ptr - f(&node.key, © node.value); - traverse(&const node.right, f); - } +fn remove(node: &mut Option<~TreeNode>, key: &K) -> bool { + fn heir_swap(node: &mut TreeNode, + child: &mut Option<~TreeNode>) { + // *could* be done without recursion, but it won't borrow check + do child.mutate |child| { + let mut child = child; + if child.right.is_some() { + heir_swap(node, &mut child.right); + } else { + node.key <-> child.key; + node.value <-> child.value; + } + child + } + } + + if node.is_none() { + return false // bottom of tree + } else { + let mut save = node.swap_unwrap(); + + let removed = if save.key < *key { + remove(&mut save.right, key) + } else if *key < save.key { + remove(&mut save.left, key) + } else { + if save.left.is_some() { + if save.right.is_some() { + let mut left = save.left.swap_unwrap(); + if left.right.is_some() { + heir_swap(save, &mut left.right); + save.left = Some(left); + remove(&mut save.left, key); + } else { + save.key <-> left.key; + save.value <-> left.value; + save.left = Some(left); + remove(&mut save.left, key); + } + } else { + let mut rm = save.left.swap_unwrap(); + save.key <-> rm.key; + save.value <-> rm.value; + save.level <-> rm.level; // FIXME: may not be needed + save.left <-> rm.left; // FIXME: may not be needed + save.right <-> rm.right; // FIXME: may not be needed + } + } else if save.right.is_some() { + let mut rm = save.right.swap_unwrap(); + save.key <-> rm.key; + save.value <-> rm.value; + save.level <-> rm.level; // FIXME: may not be needed + save.left <-> rm.left; // FIXME: may not be needed + save.right <-> rm.right; // FIXME: may not be needed + } else { + return true // leaf + } + true + }; + + let left_level = save.left.map_default(0, |x| x.level); + let right_level = save.right.map_default(0, |x| x.level); + + // re-balance, if necessary + if left_level < save.level - 1 || right_level < save.level - 1 { + save.level -= 1; + + if right_level > save.level { + do save.right.mutate |x| { + let mut x = x; x.level = save.level; x + } + } + + save = skew(save); + + do save.right.mutate |right| { + let mut right = skew(right); + right.right.mutate(skew); + right + } + save = split(save); + save.right.mutate(split); + } + + *node = Some(save); + removed } } -/// Compare two treemaps and return true iff -/// they contain same keys and values -pub fn equals(t1: &const TreeEdge, - t2: &const TreeEdge) - -> bool { - let mut v1 = ~[]; - let mut v2 = ~[]; - traverse(t1, |k,v| { v1.push((copy *k, copy *v)) }); - traverse(t2, |k,v| { v2.push((copy *k, copy *v)) }); - return v1 == v2; -} - - #[cfg(test)] -mod tests { - #[legacy_exports]; - - use treemap::*; - - use core::option::{None, Option, Some}; +mod test_treemap { + use super::*; use core::str; - #[test] - fn init_treemap() { let _m = TreeMap::(); } - - #[test] - fn insert_one() { let m = TreeMap(); insert(m, 1, 2); } - - #[test] - fn insert_two() { let m = TreeMap(); insert(m, 1, 2); insert(m, 3, 4); } - - #[test] - fn insert_find() { - let m = TreeMap(); - insert(m, 1, 2); - assert (find(m, 1) == Some(2)); - } - #[test] fn find_empty() { - let m = TreeMap::(); assert (find(m, 1) == None); + let m = TreeMap::new::(); assert m.find(&5) == None; } #[test] fn find_not_found() { - let m = TreeMap(); - insert(m, 1, 2); - assert (find(m, 2) == None); + let mut m = TreeMap::new(); + assert m.insert(1, 2); + assert m.insert(5, 3); + assert m.insert(9, 3); + assert m.find(&2) == None; } #[test] - fn traverse_in_order() { - let m = TreeMap(); - insert(m, 3, ()); - insert(m, 0, ()); - insert(m, 4, ()); - insert(m, 2, ()); - insert(m, 1, ()); - - let n = @mut 0; - fn t(n: @mut int, k: int, _v: ()) { - assert (*n == k); *n += 1; - } - traverse(m, |x,y| t(n, *x, *y)); - } - - #[test] - fn equality() { - let m1 = TreeMap(); - insert(m1, 3, ()); - insert(m1, 0, ()); - insert(m1, 4, ()); - insert(m1, 2, ()); - insert(m1, 1, ()); - let m2 = TreeMap(); - insert(m2, 2, ()); - insert(m2, 1, ()); - insert(m2, 3, ()); - insert(m2, 0, ()); - insert(m2, 4, ()); - - assert equals(m1, m2); - - let m3 = TreeMap(); - assert !equals(m1,m3); - + fn insert_replace() { + let mut m = TreeMap::new(); + assert m.insert(5, 2); + assert m.insert(2, 9); + assert !m.insert(2, 11); + assert m.find(&2).unwrap() == &11; } #[test] fn u8_map() { - let m = TreeMap(); + let mut m = TreeMap::new(); let k1 = str::to_bytes(~"foo"); let k2 = str::to_bytes(~"bar"); + let v1 = str::to_bytes(~"baz"); + let v2 = str::to_bytes(~"foobar"); - insert(m, k1, ~"foo"); - insert(m, k2, ~"bar"); + m.insert(k1, v1); + m.insert(k2, v2); - assert (find(m, k2) == Some(~"bar")); - assert (find(m, k1) == Some(~"foo")); + assert m.find(&k2) == Some(&v2); + assert m.find(&k1) == Some(&v1); + } + + fn check_equal(ctrl: &[(K, V)], map: &TreeMap) { + assert ctrl.is_empty() == map.is_empty(); + assert ctrl.is_not_empty() == map.is_not_empty(); + for ctrl.each |x| { + let &(k, v) = x; + assert map.find(&k).unwrap() == &v + } + for map.each |map_k, map_v| { + let mut found = false; + for ctrl.each |x| { + let &(ctrl_k, ctrl_v) = x; + if *map_k == ctrl_k { + assert *map_v == ctrl_v; + found = true; + break; + } + } + assert found; + } + } + + fn check_left(node: &Option<~TreeNode>, parent: &~TreeNode) { + match *node { + Some(ref r) => { + assert r.key < parent.key; + assert r.level == parent.level - 1; // left is black + check_left(&r.left, r); + check_right(&r.right, r, false); + } + None => assert parent.level == 1 // parent is leaf + } + } + + fn check_right(node: &Option<~TreeNode>, + parent: &~TreeNode, parent_red: bool) { + match *node { + Some(ref r) => { + assert r.key > parent.key; + let red = r.level == parent.level; + if parent_red { assert !red } // no dual horizontal links + assert red || r.level == parent.level - 1; // right is red or black + check_left(&r.left, r); + check_right(&r.right, r, red); + } + None => assert parent.level == 1 // parent is leaf + } + } + + fn check_structure(map: &TreeMap) { + match map.root { + Some(ref r) => { + check_left(&r.left, r); + check_right(&r.right, r, false); + } + None => () + } + } + + #[test] + fn test_rand_int() { + let mut map = TreeMap::new::(); + let mut ctrl = ~[]; + + check_equal(ctrl, &map); + assert map.find(&5).is_none(); + + let rng = rand::seeded_rng(&~[42]); + + for 3.times { + for 90.times { + let k = rng.gen_int(); + let v = rng.gen_int(); + if !ctrl.contains(&(k, v)) { + assert map.insert(k, v); + ctrl.push((k, v)); + check_structure(&map); + check_equal(ctrl, &map); + } + } + + for 30.times { + let r = rng.gen_uint_range(0, ctrl.len()); + let (key, _) = vec::remove(&mut ctrl, r); + assert map.remove(&key); + check_structure(&map); + check_equal(ctrl, &map); + } + } + } + + #[test] + fn test_len() { + let mut m = TreeMap::new(); + assert m.insert(3, 6); + assert m.len() == 1; + assert m.insert(0, 0); + assert m.len() == 2; + assert m.insert(4, 8); + assert m.len() == 3; + assert m.remove(&3); + assert m.len() == 2; + assert !m.remove(&5); + assert m.len() == 2; + assert m.insert(2, 4); + assert m.len() == 3; + assert m.insert(1, 2); + assert m.len() == 4; + } + + #[test] + fn test_each() { + let mut m = TreeMap::new(); + + assert m.insert(3, 6); + assert m.insert(0, 0); + assert m.insert(4, 8); + assert m.insert(2, 4); + assert m.insert(1, 2); + + let mut n = 0; + for m.each |k, v| { + assert *k == n; + assert *v == n * 2; + n += 1; + } + } + + #[test] + fn test_each_reverse() { + let mut m = TreeMap::new(); + + assert m.insert(3, 6); + assert m.insert(0, 0); + assert m.insert(4, 8); + assert m.insert(2, 4); + assert m.insert(1, 2); + + let mut n = 4; + for m.each_reverse |k, v| { + assert *k == n; + assert *v == n * 2; + n -= 1; + } + } + + #[test] + fn test_eq() { + let mut a = TreeMap::new(); + let mut b = TreeMap::new(); + + assert a == b; + assert a.insert(0, 5); + assert a != b; + assert b.insert(0, 4); + assert a != b; + assert a.insert(5, 19); + assert a != b; + assert !b.insert(0, 5); + assert a != b; + assert b.insert(5, 19); + assert a == b; + } +} + +#[cfg(test)] +mod test_set { + use super::*; + + #[test] + fn test_disjoint() { + let mut xs = TreeSet::new(); + let mut ys = TreeSet::new(); + assert xs.is_disjoint(&ys); + assert ys.is_disjoint(&xs); + assert xs.insert(5); + assert ys.insert(11); + assert xs.is_disjoint(&ys); + assert ys.is_disjoint(&xs); + assert xs.insert(7); + assert xs.insert(19); + assert xs.insert(4); + assert ys.insert(2); + assert ys.insert(-11); + assert xs.is_disjoint(&ys); + assert ys.is_disjoint(&xs); + assert ys.insert(7); + assert !xs.is_disjoint(&ys); + assert !ys.is_disjoint(&xs); + } + + #[test] + fn test_subset_and_superset() { + let mut a = TreeSet::new(); + assert a.insert(0); + assert a.insert(5); + assert a.insert(11); + assert a.insert(7); + + let mut b = TreeSet::new(); + assert b.insert(0); + assert b.insert(7); + assert b.insert(19); + assert b.insert(250); + assert b.insert(11); + assert b.insert(200); + + assert !a.is_subset(&b); + assert !a.is_superset(&b); + assert !b.is_subset(&a); + assert !b.is_superset(&a); + + assert b.insert(5); + + assert a.is_subset(&b); + assert !a.is_superset(&b); + assert !b.is_subset(&a); + assert b.is_superset(&a); + } + + #[test] + fn test_each() { + let mut m = TreeSet::new(); + + assert m.insert(3); + assert m.insert(0); + assert m.insert(4); + assert m.insert(2); + assert m.insert(1); + + let mut n = 0; + for m.each |x| { + assert *x == n; + n += 1 + } + } + + #[test] + fn test_each_reverse() { + let mut m = TreeSet::new(); + + assert m.insert(3); + assert m.insert(0); + assert m.insert(4); + assert m.insert(2); + assert m.insert(1); + + let mut n = 4; + for m.each_reverse |x| { + assert *x == n; + n -= 1 + } + } + + #[test] + fn test_intersection() { + let mut a = TreeSet::new(); + let mut b = TreeSet::new(); + + a.insert(11); + a.insert(1); + a.insert(3); + a.insert(77); + a.insert(103); + a.insert(5); + a.insert(-5); + + b.insert(2); + b.insert(11); + b.insert(77); + b.insert(-9); + b.insert(-42); + b.insert(5); + b.insert(3); + + let mut i = 0; + let expected = [3, 5, 11, 77]; + for a.intersection(&b) |x| { + assert *x == expected[i]; + i += 1 + } + assert i == expected.len(); } }