rust/src/libcore/send_map.rs

413 lines
13 KiB
Rust
Raw Normal View History

/*!
Sendable hash maps. Very much a work in progress.
*/
/**
* A function that returns a hash of a value
*
* The hash should concentrate entropy in the lower bits.
*/
2012-08-14 18:54:13 -05:00
type HashFn<K> = pure fn~(K) -> uint;
type EqFn<K> = pure fn~(K, K) -> bool;
/// Open addressing with linear probing.
mod linear {
2012-08-14 18:54:13 -05:00
export LinearMap, linear_map, linear_map_with_capacity, public_methods;
const initial_capacity: uint = 32u; // 2^5
2012-08-14 18:54:13 -05:00
type Bucket<K,V> = {hash: uint, key: K, value: V};
enum LinearMap<K,V> {
LinearMap_({
hashfn: pure fn~(x: &K) -> uint,
eqfn: pure fn~(x: &K, y: &K) -> bool,
resize_at: uint,
size: uint,
2012-08-14 18:54:13 -05:00
buckets: ~[option<Bucket<K,V>>]})
}
// FIXME(#3148) -- we could rewrite found_entry
// to have type option<&bucket<K,V>> which would be nifty
// However, that won't work until #3148 is fixed
2012-08-14 18:54:13 -05:00
enum SearchResult {
FoundEntry(uint), FoundHole(uint), TableFull
}
fn resize_at(capacity: uint) -> uint {
((capacity as float) * 3. / 4.) as uint
}
fn linear_map<K,V>(
+hashfn: pure fn~(x: &K) -> uint,
2012-08-14 18:54:13 -05:00
+eqfn: pure fn~(x: &K, y: &K) -> bool) -> LinearMap<K,V> {
linear_map_with_capacity(hashfn, eqfn, 32)
}
fn linear_map_with_capacity<K,V>(
+hashfn: pure fn~(x: &K) -> uint,
+eqfn: pure fn~(x: &K, y: &K) -> bool,
2012-08-14 18:54:13 -05:00
initial_capacity: uint) -> LinearMap<K,V> {
2012-08-14 18:54:13 -05:00
LinearMap_({
hashfn: hashfn,
eqfn: eqfn,
resize_at: resize_at(initial_capacity),
size: 0,
buckets: vec::from_fn(initial_capacity, |_i| none)})
}
2012-08-22 23:01:30 -05:00
priv impl<K, V> LinearMap<K,V> {
#[inline(always)]
2012-08-22 23:01:30 -05:00
pure fn to_bucket(&const self,
h: uint) -> uint {
// FIXME(#3041) borrow a more sophisticated technique here from
// Gecko, for example borrowing from Knuth, as Eich so
// colorfully argues for here:
// https://bugzilla.mozilla.org/show_bug.cgi?id=743107#c22
h % self.buckets.len()
}
#[inline(always)]
2012-08-22 23:01:30 -05:00
pure fn next_bucket(&const self,
idx: uint,
len_buckets: uint) -> uint {
let n = (idx + 1) % len_buckets;
unsafe{ // argh. log not considered pure.
2012-08-22 19:24:52 -05:00
debug!("next_bucket(%?, %?) = %?", idx, len_buckets, n);
}
2012-08-01 19:30:05 -05:00
return n;
}
#[inline(always)]
2012-08-22 23:01:30 -05:00
pure fn bucket_sequence(&const self,
hash: uint,
op: fn(uint) -> bool) -> uint {
let start_idx = self.to_bucket(hash);
let len_buckets = self.buckets.len();
let mut idx = start_idx;
loop {
if !op(idx) {
2012-08-01 19:30:05 -05:00
return idx;
}
idx = self.next_bucket(idx, len_buckets);
if idx == start_idx {
2012-08-01 19:30:05 -05:00
return start_idx;
}
}
}
#[inline(always)]
2012-08-22 23:01:30 -05:00
pure fn bucket_for_key(&const self,
buckets: &[option<Bucket<K,V>>],
k: &K) -> SearchResult {
let hash = self.hashfn(k);
self.bucket_for_key_with_hash(buckets, hash, k)
}
#[inline(always)]
2012-08-22 23:01:30 -05:00
pure fn bucket_for_key_with_hash(&const self,
buckets: &[option<Bucket<K,V>>],
hash: uint,
k: &K) -> SearchResult {
let _ = for self.bucket_sequence(hash) |i| {
2012-08-06 14:34:08 -05:00
match buckets[i] {
2012-08-03 21:59:04 -05:00
some(bkt) => if bkt.hash == hash && self.eqfn(k, &bkt.key) {
2012-08-14 18:54:13 -05:00
return FoundEntry(i);
},
2012-08-14 18:54:13 -05:00
none => return FoundHole(i)
}
};
2012-08-14 18:54:13 -05:00
return TableFull;
}
/// Expands the capacity of the array and re-inserts each
/// of the existing buckets.
2012-08-22 23:01:30 -05:00
fn expand(&mut self) {
let old_capacity = self.buckets.len();
let new_capacity = old_capacity * 2;
self.resize_at = ((new_capacity as float) * 3.0 / 4.0) as uint;
let mut old_buckets = vec::from_fn(new_capacity, |_i| none);
self.buckets <-> old_buckets;
for uint::range(0, old_capacity) |i| {
let mut bucket = none;
bucket <-> old_buckets[i];
if bucket.is_some() {
self.insert_bucket(bucket);
}
}
}
2012-08-22 23:01:30 -05:00
fn insert_bucket(&mut self, +bucket: option<Bucket<K,V>>) {
let {hash, key, value} <- option::unwrap(bucket);
let _ = self.insert_internal(hash, key, value);
}
/// Inserts the key value pair into the buckets.
/// Assumes that there will be a bucket.
/// True if there was no previous entry with that key
2012-08-22 23:01:30 -05:00
fn insert_internal(&mut self, hash: uint, +k: K, +v: V) -> bool {
match self.bucket_for_key_with_hash(self.buckets, hash, &k) {
2012-08-14 18:54:13 -05:00
TableFull => {fail ~"Internal logic error";}
FoundHole(idx) => {
2012-08-22 19:24:52 -05:00
debug!("insert fresh (%?->%?) at idx %?, hash %?",
k, v, idx, hash);
self.buckets[idx] = some({hash: hash, key: k, value: v});
self.size += 1;
2012-08-01 19:30:05 -05:00
return true;
}
2012-08-14 18:54:13 -05:00
FoundEntry(idx) => {
2012-08-22 19:24:52 -05:00
debug!("insert overwrite (%?->%?) at idx %?, hash %?",
k, v, idx, hash);
self.buckets[idx] = some({hash: hash, key: k, value: v});
2012-08-01 19:30:05 -05:00
return false;
}
}
}
2012-08-22 23:01:30 -05:00
fn search(&self,
hash: uint,
op: fn(x: &option<Bucket<K,V>>) -> bool) {
let _ = self.bucket_sequence(hash, |i| op(&self.buckets[i]));
}
}
2012-08-22 23:01:30 -05:00
impl<K,V> LinearMap<K,V> {
fn insert(&mut self, +k: K, +v: V) -> bool {
if self.size >= self.resize_at {
// n.b.: We could also do this after searching, so
// that we do not resize if this call to insert is
// simply going to update a key in place. My sense
// though is that it's worse to have to search through
// buckets to find the right spot twice than to just
// resize in this corner case.
self.expand();
}
let hash = self.hashfn(&k);
self.insert_internal(hash, k, v)
}
2012-08-22 23:01:30 -05:00
fn remove(&mut self, k: &K) -> bool {
// Removing from an open-addressed hashtable
// is, well, painful. The problem is that
// the entry may lie on the probe path for other
// entries, so removing it would make you think that
// those probe paths are empty.
//
// To address this we basically have to keep walking,
// re-inserting entries we find until we reach an empty
// bucket. We know we will eventually reach one because
// we insert one ourselves at the beginning (the removed
// entry).
//
// I found this explanation elucidating:
// http://www.maths.lse.ac.uk/Courses/MA407/del-hash.pdf
2012-08-06 14:34:08 -05:00
let mut idx = match self.bucket_for_key(self.buckets, k) {
2012-08-14 18:54:13 -05:00
TableFull | FoundHole(_) => {
2012-08-01 19:30:05 -05:00
return false;
}
2012-08-14 18:54:13 -05:00
FoundEntry(idx) => {
idx
}
};
let len_buckets = self.buckets.len();
self.buckets[idx] = none;
idx = self.next_bucket(idx, len_buckets);
while self.buckets[idx].is_some() {
let mut bucket = none;
bucket <-> self.buckets[idx];
self.insert_bucket(bucket);
idx = self.next_bucket(idx, len_buckets);
}
self.size -= 1;
2012-08-01 19:30:05 -05:00
return true;
}
2012-08-22 23:01:30 -05:00
fn clear(&mut self) {
for uint::range(0, self.buckets.len()) |idx| {
self.buckets[idx] = none;
}
self.size = 0;
}
2012-08-22 23:01:30 -05:00
pure fn len(&const self) -> uint {
self.size
}
2012-08-22 23:01:30 -05:00
pure fn is_empty(&const self) -> bool {
self.len() == 0
}
2012-08-22 23:01:30 -05:00
fn contains_key(&const self,
k: &K) -> bool {
2012-08-06 14:34:08 -05:00
match self.bucket_for_key(self.buckets, k) {
2012-08-14 18:54:13 -05:00
FoundEntry(_) => {true}
TableFull | FoundHole(_) => {false}
}
}
2012-07-31 16:00:17 -05:00
/*
FIXME(#3148)--region inference fails to capture needed deps
2012-08-22 23:01:30 -05:00
fn find_ref(&self, k: &K) -> option<&self/V> {
match self.bucket_for_key(self.buckets, k) {
FoundEntry(idx) => {
match check self.buckets[idx] {
some(ref bkt) => some(&bkt.value)
}
}
TableFull | FoundHole(_) => {
none
}
}
}
2012-07-31 16:00:17 -05:00
*/
2012-08-22 23:01:30 -05:00
fn each_ref(&self, blk: fn(k: &K, v: &V) -> bool) {
2012-07-31 16:00:17 -05:00
for vec::each(self.buckets) |slot| {
let mut broke = false;
do slot.iter |bucket| {
if !blk(&bucket.key, &bucket.value) {
broke = true; // FIXME(#3064) just write "break;"
}
}
if broke { break; }
}
}
2012-08-22 23:01:30 -05:00
fn each_key_ref(&self, blk: fn(k: &K) -> bool) {
self.each_ref(|k, _v| blk(k))
2012-07-31 16:00:17 -05:00
}
2012-08-22 23:01:30 -05:00
fn each_value_ref(&self, blk: fn(v: &V) -> bool) {
self.each_ref(|_k, v| blk(v))
}
}
2012-08-22 23:01:30 -05:00
impl<K,V: copy> LinearMap<K,V> {
fn find(&const self, k: &K) -> option<V> {
match self.bucket_for_key(self.buckets, k) {
FoundEntry(idx) => {
// FIXME (#3148): Once we rewrite found_entry, this
// failure case won't be necessary
match self.buckets[idx] {
some(bkt) => {some(copy bkt.value)}
none => fail ~"LinearMap::find: internal logic error"
2012-08-22 23:01:30 -05:00
}
}
TableFull | FoundHole(_) => {
none
}
}
}
fn get(&const self, k: &K) -> V {
let value = self.find(k);
if value.is_none() {
2012-08-22 19:24:52 -05:00
fail fmt!("No entry found for key: %?", k);
2012-08-22 23:01:30 -05:00
}
option::unwrap(value)
}
}
impl<K: copy, V: copy> LinearMap<K,V> {
fn each(&self, blk: fn(+K,+V) -> bool) {
self.each_ref(|k,v| blk(copy *k, copy *v));
}
}
2012-08-22 23:01:30 -05:00
impl<K: copy, V> LinearMap<K,V> {
fn each_key(&self, blk: fn(+K) -> bool) {
self.each_key_ref(|k| blk(copy *k));
}
}
2012-08-22 23:01:30 -05:00
impl<K, V: copy> LinearMap<K,V> {
fn each_value(&self, blk: fn(+V) -> bool) {
self.each_value_ref(|v| blk(copy *v));
2012-07-31 16:00:17 -05:00
}
}
}
#[test]
mod test {
2012-08-14 18:54:13 -05:00
import linear::{LinearMap, linear_map};
pure fn uint_hash(x: &uint) -> uint { *x }
pure fn uint_eq(x: &uint, y: &uint) -> bool { *x == *y }
2012-08-14 18:54:13 -05:00
fn int_linear_map<V>() -> LinearMap<uint,V> {
2012-08-01 19:30:05 -05:00
return linear_map(uint_hash, uint_eq);
}
#[test]
fn inserts() {
let mut m = ~int_linear_map();
assert m.insert(1, 2);
assert m.insert(2, 4);
assert m.get(&1) == 2;
assert m.get(&2) == 4;
}
#[test]
fn overwrite() {
let mut m = ~int_linear_map();
assert m.insert(1, 2);
assert m.get(&1) == 2;
assert !m.insert(1, 3);
assert m.get(&1) == 3;
}
#[test]
fn conflicts() {
let mut m = ~linear::linear_map_with_capacity(uint_hash, uint_eq, 4);
assert m.insert(1, 2);
assert m.insert(5, 3);
assert m.insert(9, 4);
assert m.get(&9) == 4;
assert m.get(&5) == 3;
assert m.get(&1) == 2;
}
#[test]
fn conflict_remove() {
let mut m = ~linear::linear_map_with_capacity(uint_hash, uint_eq, 4);
assert m.insert(1, 2);
assert m.insert(5, 3);
assert m.insert(9, 4);
assert m.remove(&1);
assert m.get(&9) == 4;
assert m.get(&5) == 3;
}
#[test]
fn empty() {
let mut m = ~linear::linear_map_with_capacity(uint_hash, uint_eq, 4);
assert m.insert(1, 2);
assert !m.is_empty();
assert m.remove(&1);
assert m.is_empty();
}
#[test]
fn iterate() {
let mut m = linear::linear_map_with_capacity(uint_hash, uint_eq, 4);
for uint::range(0, 32) |i| {
assert (&mut m).insert(i, i*2);
}
let mut observed = 0;
for (&m).each |k, v| {
assert v == k*2;
observed |= (1 << k);
}
assert observed == 0xFFFF_FFFF;
}
2012-07-31 16:00:17 -05:00
}