rust/src/libcollections/hashmap.rs

// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

//! Unordered containers, implemented as hash-tables (`HashSet` and `HashMap` types)

use std::container::{Container, Mutable, Map, MutableMap, Set, MutableSet};
use std::clone::Clone;
use std::cmp::{Eq, TotalEq, Equiv, max};
use std::default::Default;
use std::fmt;
use std::fmt::Show;
use std::hash::{Hash, Hasher, sip};
use std::iter;
use std::iter::{Iterator, FromIterator, Extendable};
use std::iter::{FilterMap, Chain, Repeat, Zip};
use std::iter::{range, range_inclusive};
use std::mem::replace;
use std::num;
use std::option::{Option, Some, None};
use rand;
use rand::Rng;
use std::result::{Ok, Err};
use std::slice::ImmutableVector;

mod table {
    use std::clone::Clone;
    use std::cmp::Eq;
    use std::hash::{Hash, Hasher};
    use std::kinds::marker;
    use std::libc;
    use std::num::CheckedMul;
    use std::option::{Option, Some, None};
    use std::prelude::Drop;
    use std::ptr;
    use std::ptr::RawPtr;
    use std::rt::global_heap;
    use std::intrinsics::{size_of, transmute, move_val_init};
    use std::iter::{Iterator, range_step_inclusive};

    static EMPTY_BUCKET: u64 = 0u64;

    /// The raw hashtable, providing safe-ish access to the unzipped and highly
    /// optimized arrays of hashes, keys, and values.
    ///
    /// This design uses less memory and is a lot faster than the naive
    /// `~[Option<u64, K, V>]`, because we don't pay for the overhead of an
    /// option on every element, and we get a generally more cache-aware design.
    ///
    /// Key invariants of this structure:
    ///
    ///   - if hashes[i] == EMPTY_BUCKET, then keys[i] and vals[i] have
    ///     'undefined' contents. Don't read from them. This invariant is
    ///     enforced outside this module with the [EmptyIndex], [FullIndex],
    ///     and [SafeHash] types/concepts.
    ///
    ///   - An `EmptyIndex` is only constructed for a bucket at an index with
    ///     a hash of EMPTY_BUCKET.
    ///
    ///   - A `FullIndex` is only constructed for a bucket at an index with a
    ///     non-EMPTY_BUCKET hash.
    ///
    ///   - A `SafeHash` is only constructed for non-`EMPTY_BUCKET` hash. We get
    ///     around hashes of zero by changing them to 0x800_0000, which will
    ///     likely hash to the same bucket, but not be represented as "empty".
    ///
    ///   - All three "arrays represented by pointers" are the same length:
    ///     `capacity`. This is set at creation and never changes. The arrays
    ///     are unzipped to save space (we don't have to pay for the padding
    ///     between odd sized elements, such as in a map from u64 to u8), and
    ///     be more cache aware (scanning through 8 hashes brings in 2 cache
    ///     lines, since they're all right beside each other).
    ///
    /// You can kind of think of this module/data structure as a safe wrapper
    /// around just the "table" part of the hashtable. It enforces some
    /// invariants at the type level and employs some performance trickery,
    /// but in general is just a tricked out `~[Option<u64, K, V>]`.
    ///
    /// FIXME(cgaebel):
    ///
    /// Feb 11, 2014: This hashtable was just implemented, and, hard as I tried,
    /// isn't yet totally safe. There's a "known exploit" that you can create
    /// multiple FullIndexes for a bucket, `take` one, and then still `take`
    /// the other causing undefined behavior. Currently, there's no story
    /// for how to protect against this statically. Therefore, there are asserts
    /// on `take`, `get`, `get_mut`, and `put` which check the bucket state.
    /// With time, and when we're confident this works correctly, they should
    /// be removed. Also, the bounds check in `peek` is especially painful,
    /// as that's called in the innermost loops of the hashtable and has the
    /// potential to be a major performance drain. Remove this too.
    ///
    /// Or, better than remove, only enable these checks for debug builds.
    /// There's currently no "debug-only" asserts in rust, so if you're reading
    /// this and going "what? of course there are debug-only asserts!", then
    /// please make this use them!
    pub struct RawTable<K, V> {
        priv capacity: uint,
        priv size:     uint,
        priv hashes:   *mut u64,
        priv keys:     *mut K,
        priv vals:     *mut V,
    }

    /// Represents an index into a `RawTable` with no key or value in it.
    pub struct EmptyIndex {
        priv idx:   int,
        priv nopod: marker::NoPod,
    }

    /// Represents an index into a `RawTable` with a key, value, and hash
    /// in it.
    pub struct FullIndex {
        priv idx:   int,
        priv hash:  SafeHash,
        priv nopod: marker::NoPod,
    }

    impl FullIndex {
        /// Since we get the hash for free whenever we check the bucket state,
        /// this function is provided for fast access, letting us avoid making
        /// redundant trips back to the hashtable.
        pub fn hash(&self) -> SafeHash { self.hash }

        /// Same comment as with `hash`.
        pub fn raw_index(&self) -> uint { self.idx as uint }
    }

    /// Represents the state of a bucket: it can either have a key/value
    /// pair (be full) or not (be empty). You cannot `take` empty buckets,
    /// and you cannot `put` into full buckets.
    pub enum BucketState {
        Empty(EmptyIndex),
        Full(FullIndex),
    }

    /// A hash that is not zero, since we use that to represent empty buckets.
    #[deriving(Eq)]
    pub struct SafeHash {
        priv hash: u64,
    }

    impl SafeHash {
        /// Peek at the hash value, which is guaranteed to be non-zero.
        pub fn inspect(&self) -> u64 { self.hash }
    }

    /// We need to remove hashes of 0. That's reserved for empty buckets.
    /// This function wraps up `hash_keyed` to be the only way outside this
    /// module to generate a SafeHash.
    pub fn make_hash<T: Hash<S>, S, H: Hasher<S>>(hasher: &H, t: &T) -> SafeHash {
        match hasher.hash(t) {
            // This constant is exceedingly likely to hash to the same
            // bucket, but it won't be counted as empty!
            EMPTY_BUCKET => SafeHash { hash: 0x8000_0000_0000_0000 },
            h            => SafeHash { hash: h },
        }
    }

    impl<K, V> RawTable<K, V> {

        /// Does not initialize the buckets. The caller should ensure they,
        /// at the very least, set every hash to EMPTY_BUCKET.
        unsafe fn new_uninitialized(capacity: uint) -> RawTable<K, V> {
            let hashes_size =
                capacity.checked_mul(&size_of::<u64>()).expect("capacity overflow");
            let keys_size   =
                capacity.checked_mul(&size_of::< K >()).expect("capacity overflow");
            let vals_size   =
                capacity.checked_mul(&size_of::< V >()).expect("capacity overflow");

            /*
            The following code was my first pass at making RawTable only
            allocate a single buffer, since that's all it needs. There's
            no logical reason for this to require three calls to malloc.

            However, I'm not convinced the code below is correct. If you
            want to take a stab at it, please do! The alignment is
            especially tricky to get right, especially if you need more
            alignment than malloc guarantees.

            let hashes_offset = 0;
            let keys_offset   = align_size(hashes_offset + hashes_size, keys_align);
            let vals_offset   = align_size(keys_offset + keys_size, vals_align);
            let end = vals_offset + vals_size;

            let buffer = global_heap::malloc_raw(end);

            let hashes = buffer.offset(hashes_offset) as *mut u64;
            let keys   = buffer.offset(keys_offset)   as *mut K;
            let vals   = buffer.offset(vals_offset)   as *mut V;

            */

            let hashes = global_heap::malloc_raw(hashes_size) as *mut u64;
            let keys   = global_heap::malloc_raw(keys_size)   as *mut K;
            let vals   = global_heap::malloc_raw(vals_size)   as *mut V;

            RawTable {
                capacity: capacity,
                size:     0,
                hashes:   hashes,
                keys:     keys,
                vals:     vals,
            }
        }


        /// Creates a new raw table from a given capacity. All buckets are
        /// initially empty.
        pub fn new(capacity: uint) -> RawTable<K, V> {
            unsafe {
                let ret = RawTable::new_uninitialized(capacity);

                for i in range(0, ret.capacity() as int) {
                    *ret.hashes.offset(i) = EMPTY_BUCKET;
                }

                ret
            }
        }

        /// Reads a bucket at a given index, returning an enum indicating whether
        /// there's anything there or not. You need to match on this enum to get
        /// the appropriate types to pass on to most of the rest of the functions
        /// in this module.
        pub fn peek(&self, index: uint) -> BucketState {
            // FIXME #12049
            if cfg!(test) { assert!(index < self.capacity) }

            let idx  = index as int;
            let hash = unsafe { *self.hashes.offset(idx) };

            let nopod = marker::NoPod;

            match hash {
                EMPTY_BUCKET =>
                    Empty(EmptyIndex {
                        idx: idx,
                        nopod: nopod
                    }),
                full_hash =>
                    Full(FullIndex {
                        idx:   idx,
                        hash:  SafeHash { hash: full_hash },
                        nopod: nopod,
                    })
            }
        }

        /// Gets references to the key and value at a given index.
        pub fn read<'a>(&'a self, index: &FullIndex) -> (&'a K, &'a V) {
            let idx = index.idx;

            unsafe {
                // FIXME #12049
                if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }
                (&'a *self.keys.offset(idx),
                 &'a *self.vals.offset(idx))
            }
        }

        /// Gets references to the key and value at a given index, with the
        /// value's reference being mutable.
        pub fn read_mut<'a>(&'a mut self, index: &FullIndex) -> (&'a K, &'a mut V) {
            let idx = index.idx;

            unsafe {
                // FIXME #12049
                if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }
                (&'a     *self.keys.offset(idx),
                 &'a mut *self.vals.offset(idx))
            }
        }

        /// Read everything, mutably.
        pub fn read_all_mut<'a>(&'a mut self, index: &FullIndex)
            -> (&'a mut SafeHash, &'a mut K, &'a mut V) {
            let idx = index.idx;

            // I'm totally abusing the fact that a pointer to any u64 in the
            // hashtable at a full index is a safe hash. Thanks to `SafeHash`
            // just being a wrapper around u64, this is true. It's just really
            // really really really unsafe. However, the exposed API is now
            // impossible to get wrong. You cannot insert an empty hash into
            // this slot now.

            unsafe {
                // FIXME #12049
                if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }
                (transmute(self.hashes.offset(idx)),
                 &'a mut *self.keys.offset(idx),
                 &'a mut *self.vals.offset(idx))
            }
        }

        /// Puts a key and value pair, along with the key's hash, into a given
        /// index in the hashtable. Note how the `EmptyIndex` is 'moved' into this
        /// function, because that slot will no longer be empty when we return!
        /// Because we know this, a FullIndex is returned for later use, pointing
        /// to the newly-filled slot in the hashtable.
        ///
        /// Use `make_hash` to construct a `SafeHash` to pass to this function.
        pub fn put(&mut self, index: EmptyIndex, hash: SafeHash, k: K, v: V) -> FullIndex {
            let idx = index.idx;

            unsafe {
                // FIXME #12049
                if cfg!(test) { assert!(*self.hashes.offset(idx) == EMPTY_BUCKET) }
                *self.hashes.offset(idx) = hash.inspect();
                move_val_init(&mut *self.keys.offset(idx), k);
                move_val_init(&mut *self.vals.offset(idx), v);
            }

            self.size += 1;

            FullIndex { idx: idx, hash: hash, nopod: marker::NoPod }
        }

        /// Removes a key and value from the hashtable.
        ///
        /// This works similarly to `put`, building an `EmptyIndex` out of the
        /// taken FullIndex.
        pub fn take(&mut self, index: FullIndex) -> (EmptyIndex, K, V) {
            let idx  = index.idx;

            unsafe {
                // FIXME #12049
                if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }

                let hash_ptr = self.hashes.offset(idx);

                *hash_ptr = EMPTY_BUCKET;

                // Drop the mutable constraint.
                let keys = self.keys as *K;
                let vals = self.vals as *V;

                let k = ptr::read(keys.offset(idx));
                let v = ptr::read(vals.offset(idx));

                self.size -= 1;

                (EmptyIndex { idx: idx, nopod: marker::NoPod }, k, v)
            }
        }

        /// The hashtable's capacity, similar to a vector's.
        pub fn capacity(&self) -> uint {
            self.capacity
        }

        /// The number of elements ever `put` in the hashtable, minus the number
        /// of elements ever `take`n.
        pub fn size(&self) -> uint {
            self.size
        }

        pub fn iter<'a>(&'a self) -> Entries<'a, K, V> {
            Entries { table: self, idx: 0 }
        }

        pub fn mut_iter<'a>(&'a mut self) -> MutEntries<'a, K, V> {
            MutEntries { table: self, idx: 0 }
        }

        pub fn move_iter(self) -> MoveEntries<K, V> {
            MoveEntries { table: self, idx: 0 }
        }
    }

    pub struct Entries<'a, K, V> {
        priv table: &'a RawTable<K, V>,
        priv idx: uint,
    }

    pub struct MutEntries<'a, K, V> {
        priv table: &'a mut RawTable<K, V>,
        priv idx: uint,
    }

    pub struct MoveEntries<K, V> {
        priv table: RawTable<K, V>,
        priv idx: uint,
    }

    impl<'a, K, V> Iterator<(&'a K, &'a V)> for Entries<'a, K, V> {
        fn next(&mut self) -> Option<(&'a K, &'a V)> {
            while self.idx < self.table.capacity() {
                let i = self.idx;
                self.idx += 1;

                match self.table.peek(i) {
                    Empty(_)  => {},
                    Full(idx) => return Some(self.table.read(&idx))
                }
            }

            None
        }

        fn size_hint(&self) -> (uint, Option<uint>) {
            let size = self.table.size() - self.idx;
            (size, Some(size))
        }
    }

    impl<'a, K, V> Iterator<(&'a K, &'a mut V)> for MutEntries<'a, K, V> {
        fn next(&mut self) -> Option<(&'a K, &'a mut V)> {
            while self.idx < self.table.capacity() {
                let i = self.idx;
                self.idx += 1;

                match self.table.peek(i) {
                    Empty(_)  => {},
                    // the transmute here fixes:
                    // error: lifetime of `self` is too short to guarantee its contents
                    //        can be safely reborrowed
                    Full(idx) => unsafe {
                        return Some(transmute(self.table.read_mut(&idx)))
                    }
                }
            }

            None
        }

        fn size_hint(&self) -> (uint, Option<uint>) {
            let size = self.table.size() - self.idx;
            (size, Some(size))
        }
    }

    impl<K, V> Iterator<(SafeHash, K, V)> for MoveEntries<K, V> {
        fn next(&mut self) -> Option<(SafeHash, K, V)> {
            while self.idx < self.table.capacity() {
                let i = self.idx;
                self.idx += 1;

                match self.table.peek(i) {
                    Empty(_) => {},
                    Full(idx) => {
                        let h = idx.hash();
                        let (_, k, v) = self.table.take(idx);
                        return Some((h, k, v));
                    }
                }
            }

            None
        }

        fn size_hint(&self) -> (uint, Option<uint>) {
            let size = self.table.size();
            (size, Some(size))
        }
    }

    impl<K: Clone, V: Clone> Clone for RawTable<K, V> {
        fn clone(&self) -> RawTable<K, V> {
            unsafe {
                let mut new_ht = RawTable::new_uninitialized(self.capacity());

                for i in range(0, self.capacity()) {
                    match self.peek(i) {
                        Empty(_)  => {
                            *new_ht.hashes.offset(i as int) = EMPTY_BUCKET;
                        },
                        Full(idx) => {
                            let hash = idx.hash().inspect();
                            let (k, v) = self.read(&idx);
                            *new_ht.hashes.offset(i as int) = hash;
                            move_val_init(&mut *new_ht.keys.offset(i as int), (*k).clone());
                            move_val_init(&mut *new_ht.vals.offset(i as int), (*v).clone());
                        }
                    }
                }

                new_ht.size = self.size();

                new_ht
            }
        }
    }


    #[unsafe_destructor]
    impl<K, V> Drop for RawTable<K, V> {
        fn drop(&mut self) {
            // Ideally, this should be in reverse, since we're likely to have
            // partially taken some elements out with `.move_iter()` from the
            // front.
            for i in range_step_inclusive(self.capacity as int - 1, 0, -1) {
                // Check if the size is 0, so we don't do a useless scan when
                // dropping empty tables such as on resize.

                if self.size == 0 { break }

                match self.peek(i as uint) {
                    Empty(_)  => {},
                    Full(idx) => { self.take(idx); }
                }
            }

            assert!(self.size == 0);

            unsafe {
                libc::free(self.vals   as *mut libc::c_void);
                libc::free(self.keys   as *mut libc::c_void);
                libc::free(self.hashes as *mut libc::c_void);
            }
        }
    }
}

// We use this type for the load factor, to avoid floating point operations
// which might not be supported efficiently on some hardware.
//
// We use small u16s here to save space in the hashtable. They get upcasted
// to u64s when we actually use them.
type Fraction = (u16, u16); // (numerator, denominator)

// multiplication by a fraction, in a way that won't generally overflow for
// array sizes outside a factor of 10 of U64_MAX.
fn fraction_mul(lhs: uint, (num, den): Fraction) -> uint {
    (((lhs as u64) * (num as u64)) / (den as u64)) as uint
}

static INITIAL_LOG2_CAP: uint = 5;
static INITIAL_CAPACITY: uint = 1 << INITIAL_LOG2_CAP; // 2^5
static INITIAL_LOAD_FACTOR: Fraction = (9, 10);

// The main performance trick in this hashmap is called Robin Hood Hashing.
// It gains its excellent performance from one key invariant:
//
//    If an insertion collides with an existing element, and that elements
//    "probe distance" (how far away the element is from its ideal location)
//    is higher than how far we've already probed, swap the elements.
//
// This massively lowers variance in probe distance, and allows us to get very
// high load factors with good performance. The 90% load factor I use is rather
// conservative.
//
// > Why a load factor of 90%?
//
// In general, all the distances to inital buckets will converge on the mean.
// At a load factor of α, the odds of finding the target bucket after k
// probes is approximately 1-α^k. If we set this equal to 50% (since we converge
// on the mean) and set k=8 (64-byte cache line / 8-byte hash), α=0.92. I round
// this down to 0.90 to make the math easier on the CPU and avoid its FPU.
// Since on average we start the probing in the middle of a cache line, this
// strategy pulls in two cache lines of hashes on every lookup. I think that's
// pretty good, but if you want to trade off some space, it could go down to one
// cache line on average with an α of 0.84.
//
// > Wait, what? Where did you get 1-α^k from?
//
// On the first probe, your odds of a collision with an existing element is α.
// The odds of doing this twice in a row is approximatelly α^2. For three times,
// α^3, etc. Therefore, the odds of colliding k times is α^k. The odds of NOT
// colliding after k tries is 1-α^k.
//
// Future Improvements (FIXME!)
// ============================
//
// Allow the load factor to be changed dynamically and/or at initialization.
// I'm having trouble figuring out a sane API for this without exporting my
// hackish fraction type, while still avoiding floating point.
//
// Also, would it be possible for us to reuse storage when growing the
// underlying table? This is exactly the use case for 'realloc', and may
// be worth exploring.
//
// Future Optimizations (FIXME!)
// =============================
//
// The paper cited below mentions an implementation which keeps track of the
// distance-to-initial-bucket histogram. I'm suspicious of this approach because
// it requires maintaining an internal map. If this map were replaced with a
// hashmap, it would be faster, but now our data structure is self-referential
// and blows up. Also, this allows very good first guesses, but array accesses
// are no longer linear and in one direction, as we have now. There is also
// memory and cache pressure that this map would entail that would be very
// difficult to properly see in a microbenchmark.
//
// Another possible design choice that I made without any real reason is
// parameterizing the raw table over keys and values. Technically, all we need
// is the size and alignment of keys and values, and the code should be just as
// efficient (well, we might need one for power-of-two size and one for not...).
// This has the potential to reduce code bloat in rust executables, without
// really losing anything except 4 words (key size, key alignment, val size,
// val alignment) which can be passed in to every call of a `RawTable` function.
// This would definitely be an avenue worth exploring if people start complaining
// about the size of rust executables.
//
// There's also two optimizations that have been omitted regarding how the
// hashtable allocates. The first is that a hashtable which never has an element
// inserted should not allocate. I'm suspicious of this one, because supporting
// that internally gains no performance over just using an
// `Option<HashMap<K, V>>`, and is significantly more complicated.
//
// The second omitted allocation optimization is that right now we allocate three
// arrays to back the hashtable. This is wasteful. In theory, we only need one
// array, and each of the three original arrays can just be slices of it. This
// would reduce the pressure on the allocator, and will play much nicer with the
// rest of the system. An initial implementation is commented out in
// `table::RawTable::new`, but I'm not confident it works for all sane alignments,
// especially if a type needs more alignment than `malloc` provides.

/// A hash map implementation which uses linear probing with Robin
/// Hood bucket stealing.
///
/// The hashes are all keyed by the task-local random number generator
/// on creation by default, this means the ordering of the keys is
/// randomized, but makes the tables more resistant to
/// denial-of-service attacks (Hash DoS). This behaviour can be
/// overriden with one of the constructors.
///
/// It is required that the keys implement the `Eq` and `Hash` traits, although
/// this can frequently be achieved by using `#[deriving(Eq, Hash)]`.
///
/// Relevant papers/articles:
///
/// 1. Pedro Celis. ["Robin Hood Hashing"](https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf)
/// 2. Emmanuel Goossaert. ["Robin Hood
///    hashing"](http://codecapsule.com/2013/11/11/robin-hood-hashing/)
/// 3. Emmanuel Goossaert. ["Robin Hood hashing: backward shift
///    deletion"](http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/)
///
/// # Example
///
/// ```rust
/// use collections::HashMap;
///
/// // type inference lets us omit an explicit type signature (which
/// // would be `HashMap<&str, &str>` in this example).
/// let mut book_reviews = HashMap::new();
///
/// // review some books.
/// book_reviews.insert("Adventures of Hucklebury Fin",      "My favorite book.");
/// book_reviews.insert("Grimms' Fairy Tales",               "Masterpiece.");
/// book_reviews.insert("Pride and Prejudice",               "Very enjoyable.");
/// book_reviews.insert("The Adventures of Sherlock Holmes", "Eye lyked it alot.");
///
/// // check for a specific one.
/// if !book_reviews.contains_key(& &"Les Misérables") {
///     println!("We've got {} reviews, but Les Misérables ain't one.",
///              book_reviews.len());
/// }
///
/// // oops, this review has a lot of spelling mistakes, let's delete it.
/// book_reviews.remove(& &"The Adventures of Sherlock Holmes");
///
/// // look up the values associated with some keys.
/// let to_find = ["Pride and Prejudice", "Alice's Adventure in Wonderland"];
/// for book in to_find.iter() {
///     match book_reviews.find(book) {
///         Some(review) => println!("{}: {}", *book, *review),
///         None => println!("{} is unreviewed.", *book)
///     }
/// }
///
/// // iterate over everything.
/// for (book, review) in book_reviews.iter() {
///     println!("{}: \"{}\"", *book, *review);
/// }
/// ```
#[deriving(Clone)]
pub struct HashMap<K, V, H = sip::SipHasher> {
    // All hashes are keyed on these values, to prevent hash collision attacks.
    priv hasher: H,

    // When size == grow_at, we double the capacity.
    priv grow_at: uint,

    // The capacity must never drop below this.
    priv minimum_capacity: uint,

    priv table: table::RawTable<K, V>,

    // We keep this at the end since it's 4-bytes, unlike everything else
    // in this struct. Might as well save a word of padding!
    priv load_factor: Fraction,
}

/// Get the number of elements which will force the capacity to grow.
fn grow_at(capacity: uint, load_factor: Fraction) -> uint {
    fraction_mul(capacity, load_factor)
}

impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
    /// Get the number of elements which will force the capacity to shrink.
    /// When size == self.shrink_at(), we halve the capacity.
    fn shrink_at(&self) -> uint {
        self.table.capacity() >> 2
    }

    // Probe the `idx`th bucket for a given hash, returning the index of the
    // target bucket.
    //
    // This exploits the power-of-two size of the hashtable. As long as this
    // is always true, we can use a bitmask of cap-1 to do modular arithmetic.
    //
    // Prefer to use this with increasing values of `idx` rather than repeatedly
    // calling `probe_next`. This reduces data-dependencies between loops, which
    // can help the optimizer, and certainly won't hurt it. `probe_next` is
    // simply for convenience, and is no more efficient than `probe`.
    fn probe(&self, hash: &table::SafeHash, idx: uint) -> uint {
        let hash_mask = self.table.capacity() - 1;

        // So I heard a rumor that unsigned overflow is safe in rust..
        ((hash.inspect() as uint) + idx) & hash_mask
    }

    // Generate the next probe in a sequence. Prefer to use 'probe' by itself,
    // but this can sometimes be useful.
    fn probe_next(&self, probe: uint) -> uint {
        let hash_mask = self.table.capacity() - 1;
        (probe + 1) & hash_mask
    }

    fn make_hash<X: Hash<S>>(&self, x: &X) -> table::SafeHash {
        table::make_hash(&self.hasher, x)
    }

    /// Get the distance of the bucket at the given index that it lies
    /// from its 'ideal' location.
    ///
    /// In the cited blog posts above, this is called the "distance to
    /// inital bucket", or DIB.
    fn bucket_distance(&self, index_of_elem: &table::FullIndex) -> uint {
        // where the hash of the element that happens to reside at
        // `index_of_elem` tried to place itself first.
        let first_probe_index = self.probe(&index_of_elem.hash(), 0);

        let raw_index = index_of_elem.raw_index();

        if first_probe_index <= raw_index {
             // probe just went forward
            raw_index - first_probe_index
        } else {
            // probe wrapped around the hashtable
            raw_index + (self.table.capacity() - first_probe_index)
        }
    }

    /// Search for a pre-hashed key.
    fn search_hashed_generic(&self, hash: &table::SafeHash, is_match: |&K| -> bool)
        -> Option<table::FullIndex> {
        for num_probes in range(0u, self.table.size()) {
            let probe = self.probe(hash, num_probes);

            let idx = match self.table.peek(probe) {
                table::Empty(_)  => return None, // hit an empty bucket
                table::Full(idx) => idx
            };

            // We can finish the search early if we hit any bucket
            // with a lower distance to initial bucket than we've probed.
            if self.bucket_distance(&idx) < num_probes { return None }

            // If the hash doesn't match, it can't be this one..
            if hash != &idx.hash() { continue }

            let (k, _) = self.table.read(&idx);

            // If the key doesn't match, it can't be this one..
            if !is_match(k) { continue }

            return Some(idx);
        }

        return None
    }

    fn search_hashed(&self, hash: &table::SafeHash, k: &K) -> Option<table::FullIndex> {
        self.search_hashed_generic(hash, |k_| *k == *k_)
    }

    fn search_equiv<Q: Hash<S> + Equiv<K>>(&self, q: &Q) -> Option<table::FullIndex> {
        self.search_hashed_generic(&self.make_hash(q), |k| q.equiv(k))
    }

    /// Search for a key, yielding the index if it's found in the hashtable.
    /// If you already have the hash for the key lying around, use
    /// search_hashed.
    fn search(&self, k: &K) -> Option<table::FullIndex> {
        self.search_hashed(&self.make_hash(k), k)
    }
}

impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> Container for HashMap<K, V, H> {
    /// Return the number of elements in the map
    fn len(&self) -> uint { self.table.size() }
}

impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> Mutable for HashMap<K, V, H> {
    /// Clear the map, removing all key-value pairs.
    fn clear(&mut self) {
        self.minimum_capacity = self.table.size();

        for i in range(0, self.table.capacity()) {
            match self.table.peek(i) {
                table::Empty(_)  => {},
                table::Full(idx) => { self.table.take(idx); }
            }
        }
    }
}


impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> Map<K, V> for HashMap<K, V, H> {
    fn find<'a>(&'a self, k: &K) -> Option<&'a V> {
        self.search(k).map(|idx| {
            let (_, v) = self.table.read(&idx);
            v
        })
    }

    fn contains_key(&self, k: &K) -> bool {
        self.search(k).is_some()
    }
}

impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> MutableMap<K, V> for HashMap<K, V, H> {
    fn find_mut<'a>(&'a mut self, k: &K) -> Option<&'a mut V> {
        match self.search(k) {
            None => None,
            Some(idx) => {
                let (_, v) = self.table.read_mut(&idx);
                Some(v)
            }
        }
    }

    fn swap(&mut self, k: K, v: V) -> Option<V> {
        let hash = self.make_hash(&k);
        let potential_new_size = self.table.size() + 1;
        self.make_some_room(potential_new_size);

        for dib in range_inclusive(0u, self.table.size()) {
            let probe = self.probe(&hash, dib);

            let idx = match self.table.peek(probe) {
                table::Empty(idx) => {
                    // Found a hole!
                    self.table.put(idx, hash, k, v);
                    return None;
                },
                table::Full(idx) => idx
            };

            if idx.hash() == hash {
                let (bucket_k, bucket_v) = self.table.read_mut(&idx);
                if k == *bucket_k {
                    // Found an existing value.
                    return Some(replace(bucket_v, v));
                }
            }

            let probe_dib = self.bucket_distance(&idx);

            if probe_dib < dib {
                // Found a luckier bucket. This implies that the key does not
                // already exist in the hashtable. Just do a robin hood
                // insertion, then.
                self.robin_hood(idx, probe_dib, hash, k, v);
                return None;
            }
        }

        // We really shouldn't be here.
        fail!("Internal HashMap error: Out of space.");
    }

    fn pop(&mut self, k: &K) -> Option<V> {
        if self.table.size() == 0 {
            return None
        }

        let potential_new_size = self.table.size() - 1;
        self.make_some_room(potential_new_size);

        let starting_index = match self.search(k) {
            Some(idx) => idx,
            None      => return None,
        };

        let starting_probe = starting_index.raw_index();

        let ending_probe = {
            let mut probe = self.probe_next(starting_probe);
            for _ in range(0u, self.table.size()) {
                match self.table.peek(probe) {
                    table::Empty(_) => {}, // empty bucket. this is the end of our shifting.
                    table::Full(idx) => {
                        // Bucket that isn't us, which has a non-zero probe distance.
                        // This isn't the ending index, so keep searching.
                        if self.bucket_distance(&idx) != 0 {
                            probe = self.probe_next(probe);
                            continue;
                        }

                        // if we do have a bucket_distance of zero, we're at the end
                        // of what we need to shift.
                    }
                }
                break;
            }

            probe
        };

        let (_, _, retval) = self.table.take(starting_index);

        let mut      probe = starting_probe;
        let mut next_probe = self.probe_next(probe);

        // backwards-shift all the elements after our newly-deleted one.
        while next_probe != ending_probe {
            match self.table.peek(next_probe) {
                table::Empty(_) => {
                    // nothing to shift in. just empty it out.
                    match self.table.peek(probe) {
                        table::Empty(_) => {},
                        table::Full(idx) => { self.table.take(idx); }
                    }
                },
                table::Full(next_idx) => {
                    // something to shift. move it over!
                    let next_hash = next_idx.hash();
                    let (_, next_key, next_val) = self.table.take(next_idx);
                    match self.table.peek(probe) {
                        table::Empty(idx) => {
                            self.table.put(idx, next_hash, next_key, next_val);
                        },
                        table::Full(idx) => {
                            let (emptyidx, _, _) = self.table.take(idx);
                            self.table.put(emptyidx, next_hash, next_key, next_val);
                        }
                    }
                }
            }

            probe = next_probe;
            next_probe = self.probe_next(next_probe);
        }

        // Done the backwards shift, but there's still an element left!
        // Empty it out.
        match self.table.peek(probe) {
            table::Empty(_) => {},
            table::Full(idx) => { self.table.take(idx); }
        }

        // Now we're done all our shifting. Return the value we grabbed
        // earlier.
        return Some(retval);
    }
}

impl<K: Hash + TotalEq, V> HashMap<K, V, sip::SipHasher> {
    /// Create an empty HashMap.
    pub fn new() -> HashMap<K, V, sip::SipHasher> {
        HashMap::with_capacity(INITIAL_CAPACITY)
    }

    pub fn with_capacity(capacity: uint) -> HashMap<K, V, sip::SipHasher> {
        let mut r = rand::task_rng();
        let r0 = r.gen();
        let r1 = r.gen();
        let hasher = sip::SipHasher::new_with_keys(r0, r1);
        HashMap::with_capacity_and_hasher(capacity, hasher)
    }
}

impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
    pub fn with_hasher(hasher: H) -> HashMap<K, V, H> {
        HashMap::with_capacity_and_hasher(INITIAL_CAPACITY, hasher)
    }

    /// Create an empty HashMap with space for at least `capacity`
    /// elements, using `hasher` to hash the keys.
    ///
    /// Warning: `hasher` is normally randomly generated, and
    /// is designed to allow HashMaps to be resistant to attacks that
    /// cause many collisions and very poor performance. Setting it
    /// manually using this function can expose a DoS attack vector.
    pub fn with_capacity_and_hasher(capacity: uint, hasher: H) -> HashMap<K, V, H> {
        let cap = num::next_power_of_two(max(INITIAL_CAPACITY, capacity));
        HashMap {
            hasher:           hasher,
            load_factor:      INITIAL_LOAD_FACTOR,
            grow_at:          grow_at(cap, INITIAL_LOAD_FACTOR),
            minimum_capacity: cap,
            table:            table::RawTable::new(cap),
        }
    }

    /// The hashtable will never try to shrink below this size. You can use
    /// this function to reduce reallocations if your hashtable frequently
    /// grows and shrinks by large amounts.
    ///
    /// This function has no effect on the operational semantics of the
    /// hashtable, only on performance.
    pub fn reserve(&mut self, new_minimum_capacity: uint) {
        let cap = num::next_power_of_two(
            max(INITIAL_CAPACITY, new_minimum_capacity));

        self.minimum_capacity = cap;

        if self.table.capacity() < cap {
            self.resize(cap);
        }
    }

    /// Resizes the internal vectors to a new capacity. It's your responsibility to:
    ///   1) Make sure the new capacity is enough for all the elements, accounting
    ///      for the load factor.
    ///   2) Ensure new_capacity is a power of two.
    fn resize(&mut self, new_capacity: uint) {
        assert!(self.table.size() <= new_capacity);
        assert!((new_capacity - 1) & new_capacity == 0);

        self.grow_at = grow_at(new_capacity, self.load_factor);

        let old_table = replace(&mut self.table, table::RawTable::new(new_capacity));
        let old_size  = old_table.size();

        for (h, k, v) in old_table.move_iter() {
            self.manual_insert_hashed_nocheck(h, k, v);
        }

        assert_eq!(self.table.size(), old_size);
    }

    /// Performs any necessary resize operations, such that there's space for
    /// new_size elements.
    fn make_some_room(&mut self, new_size: uint) {
        let should_shrink = new_size <= self.shrink_at();
        let should_grow   = self.grow_at <= new_size;

        if should_grow {
            let new_capacity = self.table.capacity() << 1;
            self.resize(new_capacity);
        } else if should_shrink {
            let new_capacity = self.table.capacity() >> 1;

            // Never shrink below the minimum capacity
            if self.minimum_capacity <= new_capacity {
                self.resize(new_capacity);
            }
        }
    }

    /// Perform robin hood bucket stealing at the given 'index'. You must
    /// also pass that probe's "distance to initial bucket" so we don't have
    /// to recalculate it, as well as the total number of probes already done
    /// so we have some sort of upper bound on the number of probes to do.
    ///
    /// 'hash', 'k', and 'v' are the elements to robin hood into the hashtable.
    fn robin_hood(&mut self, mut index: table::FullIndex, mut dib_param: uint,
                  mut hash: table::SafeHash, mut k: K, mut v: V) {
        'outer: loop {
            let (old_hash, old_key, old_val) = {
                let (old_hash_ref, old_key_ref, old_val_ref) =
                        self.table.read_all_mut(&index);

                let old_hash = replace(old_hash_ref, hash);
                let old_key  = replace(old_key_ref,  k);
                let old_val  = replace(old_val_ref,  v);

                (old_hash, old_key, old_val)
            };

            let mut probe = self.probe_next(index.raw_index());

            for dib in range(dib_param + 1, self.table.size()) {
                let full_index = match self.table.peek(probe) {
                    table::Empty(idx) => {
                        // Finally. A hole!
                        self.table.put(idx, old_hash, old_key, old_val);
                        return;
                    },
                    table::Full(idx) => idx
                };

                let probe_dib = self.bucket_distance(&full_index);

                // Robin hood! Steal the spot.
                if probe_dib < dib {
                    index = full_index;
                    dib_param = probe_dib;
                    hash = old_hash;
                    k = old_key;
                    v = old_val;
                    continue 'outer;
                }

                probe = self.probe_next(probe);
            }

            fail!("HashMap fatal error: 100% load factor?");
        }
    }

    /// Manually insert a pre-hashed key-value pair, without first checking
    /// that there's enough room in the buckets. Returns a reference to the
    /// newly insert value.
    ///
    /// If the key already exists, the hashtable will be returned untouched
    /// and a reference to the existing element will be returned.
    fn manual_insert_hashed_nocheck<'a>(
        &'a mut self, hash: table::SafeHash, k: K, v: V) -> &'a mut V {

        for dib in range_inclusive(0u, self.table.size()) {
            let probe = self.probe(&hash, dib);

            let idx = match self.table.peek(probe) {
                table::Empty(idx) => {
                    // Found a hole!
                    let fullidx  = self.table.put(idx, hash, k, v);
                    let (_, val) = self.table.read_mut(&fullidx);
                    return val;
                },
                table::Full(idx) => idx
            };

            if idx.hash() == hash {
                let (bucket_k, bucket_v) = self.table.read_mut(&idx);
                // FIXME #12147 the conditional return confuses
                // borrowck if we return bucket_v directly
                let bv: *mut V = bucket_v;
                if k == *bucket_k {
                    // Key already exists. Get its reference.
                    return unsafe {&mut *bv};
                }
            }

            let probe_dib = self.bucket_distance(&idx);

            if  probe_dib < dib {
                // Found a luckier bucket than me. Better steal his spot.
                self.robin_hood(idx, probe_dib, hash, k, v);

                // Now that it's stolen, just read the value's pointer
                // right out of the table!
                match self.table.peek(probe) {
                    table::Empty(_)  => fail!("Just stole a spot, but now that spot's empty."),
                    table::Full(idx) => {
                        let (_, v) = self.table.read_mut(&idx);
                        return v;
                    }
                }
            }
        }

        // We really shouldn't be here.
        fail!("Internal HashMap error: Out of space.");
    }

    fn manual_insert_hashed<'a>(&'a mut self, hash: table::SafeHash, k: K, v: V) -> &'a mut V {
        let potential_new_size = self.table.size() + 1;
        self.make_some_room(potential_new_size);
        self.manual_insert_hashed_nocheck(hash, k, v)
    }

    /// Inserts an element, returning a reference to that element inside the
    /// hashtable.
    fn manual_insert<'a>(&'a mut self, k: K, v: V) -> &'a mut V {
        let hash = self.make_hash(&k);
        self.manual_insert_hashed(hash, k, v)
    }

    /// Return the value corresponding to the key in the map, or insert
    /// and return the value if it doesn't exist.
    pub fn find_or_insert<'a>(&'a mut self, k: K, v: V) -> &'a mut V {
        match self.search(&k) {
            Some(idx) => {
                let (_, v_ref) = self.table.read_mut(&idx);
                v_ref
            },
            None => self.manual_insert(k, v)
        }
    }

    /// Return the value corresponding to the key in the map, or create,
    /// insert, and return a new value if it doesn't exist.
    pub fn find_or_insert_with<'a>(&'a mut self, k: K, f: |&K| -> V)
                               -> &'a mut V {
        match self.search(&k) {
            Some(idx) => {
                let (_, v_ref) = self.table.read_mut(&idx);
                v_ref
            },
            None      => {
                let v = f(&k);
                self.manual_insert(k, v)
            }
        }
    }

    /// Insert a key-value pair into the map if the key is not already present.
    /// Otherwise, modify the existing value for the key.
    /// Returns the new or modified value for the key.
    pub fn insert_or_update_with<'a>(
                                 &'a mut self,
                                 k: K,
                                 v: V,
                                 f: |&K, &mut V|)
                                 -> &'a mut V {
        match self.search(&k) {
            None      => self.manual_insert(k, v),
            Some(idx) => {
                let (_, v_ref) = self.table.read_mut(&idx);
                f(&k, v_ref);
                v_ref
            }
        }
    }

    /// Retrieves a value for the given key, failing if the key is not present.
    pub fn get<'a>(&'a self, k: &K) -> &'a V {
        match self.find(k) {
            Some(v) => v,
            None => fail!("No entry found for key: {:?}", k)
        }
    }

    /// Retrieves a (mutable) value for the given key, failing if the key is not present.
    pub fn get_mut<'a>(&'a mut self, k: &K) -> &'a mut V {
        match self.find_mut(k) {
            Some(v) => v,
            None => fail!("No entry found for key: {:?}", k)
        }
    }

    /// Return true if the map contains a value for the specified key,
    /// using equivalence.
    pub fn contains_key_equiv<Q: Hash<S> + Equiv<K>>(&self, key: &Q) -> bool {
        self.search_equiv(key).is_some()
    }

    /// Return the value corresponding to the key in the map, using
    /// equivalence.
    pub fn find_equiv<'a, Q: Hash<S> + Equiv<K>>(&'a self, k: &Q) -> Option<&'a V> {
        match self.search_equiv(k) {
            None      => None,
            Some(idx) => {
                let (_, v_ref) = self.table.read(&idx);
                Some(v_ref)
            }
        }
    }

    /// An iterator visiting all keys in arbitrary order.
    /// Iterator element type is &'a K.
    pub fn keys<'a>(&'a self) -> Keys<'a, K, V> {
        self.iter().map(|(k, _v)| k)
    }

    /// An iterator visiting all values in arbitrary order.
    /// Iterator element type is &'a V.
    pub fn values<'a>(&'a self) -> Values<'a, K, V> {
        self.iter().map(|(_k, v)| v)
    }

    /// An iterator visiting all key-value pairs in arbitrary order.
    /// Iterator element type is (&'a K, &'a V).
    pub fn iter<'a>(&'a self) -> Entries<'a, K, V> {
        self.table.iter()
    }

    /// An iterator visiting all key-value pairs in arbitrary order,
    /// with mutable references to the values.
    /// Iterator element type is (&'a K, &'a mut V).
    pub fn mut_iter<'a>(&'a mut self) -> MutEntries<'a, K, V> {
        self.table.mut_iter()
    }

    /// Creates a consuming iterator, that is, one that moves each key-value
    /// pair out of the map in arbitrary order. The map cannot be used after
    /// calling this.
    pub fn move_iter(self) -> MoveEntries<K, V> {
        self.table.move_iter().map(|(_, k, v)| (k, v))
    }
}

impl<K: TotalEq + Hash<S>, V: Clone, S, H: Hasher<S>> HashMap<K, V, H> {
    /// Like `find`, but returns a copy of the value.
    pub fn find_copy(&self, k: &K) -> Option<V> {
        self.find(k).map(|v| (*v).clone())
    }

    /// Like `get`, but returns a copy of the value.
    pub fn get_copy(&self, k: &K) -> V {
        (*self.get(k)).clone()
    }
}

impl<K: TotalEq + Hash<S>, V: Eq, S, H: Hasher<S>> Eq for HashMap<K, V, H> {
    fn eq(&self, other: &HashMap<K, V, H>) -> bool {
        if self.len() != other.len() { return false; }

        self.iter().all(|(key, value)| {
            match other.find(key) {
                None    => false,
                Some(v) => *value == *v
            }
        })
    }
}

impl<K: TotalEq + Hash<S> + Show, V: Show, S, H: Hasher<S>> Show for HashMap<K, V, H> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        try!(write!(f.buf, r"\{"));

        for (i, (k, v)) in self.iter().enumerate() {
            if i != 0 { try!(write!(f.buf, ", ")); }
            try!(write!(f.buf, "{}: {}", *k, *v));
        }

        write!(f.buf, r"\}")
    }
}

impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S> + Default> Default for HashMap<K, V, H> {
    fn default() -> HashMap<K, V, H> {
        HashMap::with_capacity_and_hasher(INITIAL_CAPACITY, Default::default())
    }
}

/// HashMap iterator
pub type Entries<'a, K, V> = table::Entries<'a, K, V>;

/// HashMap mutable values iterator
pub type MutEntries<'a, K, V> = table::MutEntries<'a, K, V>;

/// HashMap move iterator
pub type MoveEntries<K, V> =
    iter::Map<'static, (table::SafeHash, K, V), (K, V), table::MoveEntries<K, V>>;

/// HashMap keys iterator
pub type Keys<'a, K, V> =
    iter::Map<'static, (&'a K, &'a V), &'a K, Entries<'a, K, V>>;

/// HashMap values iterator
pub type Values<'a, K, V> =
    iter::Map<'static, (&'a K, &'a V), &'a V, Entries<'a, K, V>>;

impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S> + Default> FromIterator<(K, V)> for HashMap<K, V, H> {
    fn from_iterator<T: Iterator<(K, V)>>(iter: &mut T) -> HashMap<K, V, H> {
        let (lower, _) = iter.size_hint();
        let mut map = HashMap::with_capacity_and_hasher(lower, Default::default());
        map.extend(iter);
        map
    }
}

impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S> + Default> Extendable<(K, V)> for HashMap<K, V, H> {
    fn extend<T: Iterator<(K, V)>>(&mut self, iter: &mut T) {
        for (k, v) in *iter {
            self.insert(k, v);
        }
    }
}

/// HashSet iterator
pub type SetItems<'a, K> =
    iter::Map<'static, (&'a K, &'a ()), &'a K, Entries<'a, K, ()>>;

/// HashSet move iterator
pub type SetMoveItems<K> =
    iter::Map<'static, (K, ()), K, MoveEntries<K, ()>>;

/// An implementation of a hash set using the underlying representation of a
/// HashMap where the value is (). As with the `HashMap` type, a `HashSet`
/// requires that the elements implement the `Eq` and `Hash` traits.
#[deriving(Clone)]
pub struct HashSet<T, H = sip::SipHasher> {
    priv map: HashMap<T, (), H>
}

impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> Eq for HashSet<T, H> {
    // FIXME #11998: Since the value is a (), and `find` returns a Some(&()),
    // we trigger #11998 when matching on it. I've fallen back to manual
    // iteration until this is fixed.
    fn eq(&self, other: &HashSet<T, H>) -> bool {
        if self.len() != other.len() { return false; }

        self.iter().all(|key| other.map.contains_key(key))
    }
}

impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> Container for HashSet<T, H> {
    /// Return the number of elements in the set
    fn len(&self) -> uint { self.map.len() }
}

impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> Mutable for HashSet<T, H> {
    /// Clear the set, removing all values.
    fn clear(&mut self) { self.map.clear() }
}

impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> Set<T> for HashSet<T, H> {
    /// Return true if the set contains a value
    fn contains(&self, value: &T) -> bool { self.map.search(value).is_some() }

    /// Return true if the set has no elements in common with `other`.
    /// This is equivalent to checking for an empty intersection.
    fn is_disjoint(&self, other: &HashSet<T, H>) -> bool {
        self.iter().all(|v| !other.contains(v))
    }

    /// Return true if the set is a subset of another
    fn is_subset(&self, other: &HashSet<T, H>) -> bool {
        self.iter().all(|v| other.contains(v))
    }

    /// Return true if the set is a superset of another
    fn is_superset(&self, other: &HashSet<T, H>) -> bool {
        other.is_subset(self)
    }
}

impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> MutableSet<T> for HashSet<T, H> {
    /// Add a value to the set. Return true if the value was not already
    /// present in the set.
    fn insert(&mut self, value: T) -> bool { self.map.insert(value, ()) }

    /// Remove a value from the set. Return true if the value was
    /// present in the set.
    fn remove(&mut self, value: &T) -> bool { self.map.remove(value) }
}

impl<T: Hash + TotalEq> HashSet<T, sip::SipHasher> {
    /// Create an empty HashSet
    pub fn new() -> HashSet<T, sip::SipHasher> {
        HashSet::with_capacity(INITIAL_CAPACITY)
    }

    /// Create an empty HashSet with space for at least `n` elements in
    /// the hash table.
    pub fn with_capacity(capacity: uint) -> HashSet<T, sip::SipHasher> {
        HashSet { map: HashMap::with_capacity(capacity) }
    }
}

impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> HashSet<T, H> {
    pub fn with_hasher(hasher: H) -> HashSet<T, H> {
        HashSet::with_capacity_and_hasher(INITIAL_CAPACITY, hasher)
    }

    /// Create an empty HashSet with space for at least `capacity`
    /// elements in the hash table, using `hasher` to hash the keys.
    ///
    /// Warning: `hasher` is normally randomly generated, and
    /// is designed to allow `HashSet`s to be resistant to attacks that
    /// cause many collisions and very poor performance. Setting it
    /// manually using this function can expose a DoS attack vector.
    pub fn with_capacity_and_hasher(capacity: uint, hasher: H) -> HashSet<T, H> {
        HashSet { map: HashMap::with_capacity_and_hasher(capacity, hasher) }
    }

    /// Reserve space for at least `n` elements in the hash table.
    pub fn reserve(&mut self, n: uint) {
        self.map.reserve(n)
    }

    /// Returns true if the hash set contains a value equivalent to the
    /// given query value.
    pub fn contains_equiv<Q: Hash<S> + Equiv<T>>(&self, value: &Q) -> bool {
      self.map.contains_key_equiv(value)
    }

    /// An iterator visiting all elements in arbitrary order.
    /// Iterator element type is &'a T.
    pub fn iter<'a>(&'a self) -> SetItems<'a, T> {
        self.map.keys()
    }

    /// Creates a consuming iterator, that is, one that moves each value out
    /// of the set in arbitrary order. The set cannot be used after calling
    /// this.
    pub fn move_iter(self) -> SetMoveItems<T> {
        self.map.move_iter().map(|(k, _)| k)
    }

    /// Visit the values representing the difference
    pub fn difference<'a>(&'a self, other: &'a HashSet<T, H>) -> SetAlgebraItems<'a, T, H> {
        Repeat::new(other)
            .zip(self.iter())
            .filter_map(|(other, elt)| {
                if !other.contains(elt) { Some(elt) } else { None }
            })
    }

    /// Visit the values representing the symmetric difference
    pub fn symmetric_difference<'a>(&'a self, other: &'a HashSet<T, H>)
        -> Chain<SetAlgebraItems<'a, T, H>, SetAlgebraItems<'a, T, H>> {
        self.difference(other).chain(other.difference(self))
    }

    /// Visit the values representing the intersection
    pub fn intersection<'a>(&'a self, other: &'a HashSet<T, H>)
        -> SetAlgebraItems<'a, T, H> {
        Repeat::new(other)
            .zip(self.iter())
            .filter_map(|(other, elt)| {
                if other.contains(elt) { Some(elt) } else { None }
            })
    }

    /// Visit the values representing the union
    pub fn union<'a>(&'a self, other: &'a HashSet<T, H>)
        -> Chain<SetItems<'a, T>, SetAlgebraItems<'a, T, H>> {
        self.iter().chain(other.difference(self))
    }

}

impl<T: TotalEq + Hash<S> + fmt::Show, S, H: Hasher<S>> fmt::Show for HashSet<T, H> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        try!(write!(f.buf, r"\{"));

        for (i, x) in self.iter().enumerate() {
            if i != 0 { try!(write!(f.buf, ", ")); }
            try!(write!(f.buf, "{}", *x));
        }

        write!(f.buf, r"\}")
    }
}

impl<T: TotalEq + Hash<S>, S, H: Hasher<S> + Default> FromIterator<T> for HashSet<T, H> {
    fn from_iterator<I: Iterator<T>>(iter: &mut I) -> HashSet<T, H> {
        let (lower, _) = iter.size_hint();
        let mut set = HashSet::with_capacity_and_hasher(lower, Default::default());
        set.extend(iter);
        set
    }
}

impl<T: TotalEq + Hash<S>, S, H: Hasher<S> + Default> Extendable<T> for HashSet<T, H> {
    fn extend<I: Iterator<T>>(&mut self, iter: &mut I) {
        for k in *iter {
            self.insert(k);
        }
    }
}

impl<T: TotalEq + Hash> Default for HashSet<T, sip::SipHasher> {
    fn default() -> HashSet<T> { HashSet::new() }
}

// `Repeat` is used to feed the filter closure an explicit capture
// of a reference to the other set
/// Set operations iterator
pub type SetAlgebraItems<'a, T, H> =
    FilterMap<'static, (&'a HashSet<T, H>, &'a T), &'a T,
              Zip<Repeat<&'a HashSet<T, H>>, SetItems<'a, T>>>;

#[cfg(test)]
mod test_map {
    use super::HashMap;
    use std::iter::{Iterator,range_inclusive,range_step_inclusive};
    use std::local_data;
    use std::vec;

    #[test]
    fn test_create_capacity_zero() {
        let mut m = HashMap::with_capacity(0);

        assert!(m.insert(1, 1));

        assert!(m.contains_key(&1));
        assert!(!m.contains_key(&0));
    }

    #[test]
    fn test_insert() {
        let mut m = HashMap::new();
        assert_eq!(m.len(), 0);
        assert!(m.insert(1, 2));
        assert_eq!(m.len(), 1);
        assert!(m.insert(2, 4));
        assert_eq!(m.len(), 2);
        assert_eq!(*m.find(&1).unwrap(), 2);
        assert_eq!(*m.find(&2).unwrap(), 4);
    }

    local_data_key!(drop_vector: vec::Vec<int>)

    #[deriving(Hash, Eq, TotalEq)]
    struct Dropable {
        k: int
    }


    impl Dropable {
        fn new(k: int) -> Dropable {
            local_data::get_mut(drop_vector,
                |v| { v.unwrap().as_mut_slice()[k] += 1; });

            Dropable { k: k }
        }
    }

    impl Drop for Dropable {
        fn drop(&mut self) {
            local_data::get_mut(drop_vector, |v|
                { v.unwrap().as_mut_slice()[self.k] -= 1; });
        }
    }

    #[test]
    fn test_drops() {
        local_data::set(drop_vector, vec::Vec::from_elem(200, 0));

        {
            let mut m = HashMap::new();

            local_data::get(drop_vector, |v| {
                for i in range(0, 200) {
                    assert_eq!(v.unwrap().as_slice()[i], 0);
                }
            });

            for i in range(0, 100) {
                let d1 = Dropable::new(i);
                let d2 = Dropable::new(i+100);
                m.insert(d1, d2);
            }

            local_data::get(drop_vector, |v| {
                for i in range(0, 200) {
                    assert_eq!(v.unwrap().as_slice()[i], 1);
                }
            });

            for i in range(0, 50) {
                let k = Dropable::new(i);
                let v = m.pop(&k);

                assert!(v.is_some());

                local_data::get(drop_vector, |v| {
                    assert_eq!(v.unwrap().as_slice()[i], 1);
                    assert_eq!(v.unwrap().as_slice()[i+100], 1);
                });
            }

            local_data::get(drop_vector, |v| {
                for i in range(0, 50) {
                    assert_eq!(v.unwrap().as_slice()[i], 0);
                    assert_eq!(v.unwrap().as_slice()[i+100], 0);
                }

                for i in range(50, 100) {
                    assert_eq!(v.unwrap().as_slice()[i], 1);
                    assert_eq!(v.unwrap().as_slice()[i+100], 1);
                }
            });
        }

        local_data::get(drop_vector, |v| {
            for i in range(0, 200) {
                assert_eq!(v.unwrap().as_slice()[i], 0);
            }
        });
    }

    #[test]
    fn test_empty_pop() {
        let mut m: HashMap<int, bool> = HashMap::new();
        assert_eq!(m.pop(&0), None);
    }

    #[test]
    fn test_lots_of_insertions() {
        let mut m = HashMap::new();

        // Try this a few times to make sure we never screw up the hashmap's
        // internal state.
        for _ in range(0, 10) {
            assert!(m.is_empty());

            for i in range_inclusive(1, 1000) {
                assert!(m.insert(i, i));

                for j in range_inclusive(1, i) {
                    let r = m.find(&j);
                    assert_eq!(r, Some(&j));
                }

                for j in range_inclusive(i+1, 1000) {
                    let r = m.find(&j);
                    assert_eq!(r, None);
                }
            }

            for i in range_inclusive(1001, 2000) {
                assert!(!m.contains_key(&i));
            }

            // remove forwards
            for i in range_inclusive(1, 1000) {
                assert!(m.remove(&i));

                for j in range_inclusive(1, i) {
                    assert!(!m.contains_key(&j));
                }

                for j in range_inclusive(i+1, 1000) {
                    assert!(m.contains_key(&j));
                }
            }

            for i in range_inclusive(1, 1000) {
                assert!(!m.contains_key(&i));
            }

            for i in range_inclusive(1, 1000) {
                assert!(m.insert(i, i));
            }

            // remove backwards
            for i in range_step_inclusive(1000, 1, -1) {
                assert!(m.remove(&i));

                for j in range_inclusive(i, 1000) {
                    assert!(!m.contains_key(&j));
                }

                for j in range_inclusive(1, i-1) {
                    assert!(m.contains_key(&j));
                }
            }
        }
    }

    #[test]
    fn test_find_mut() {
        let mut m = HashMap::new();
        assert!(m.insert(1, 12));
        assert!(m.insert(2, 8));
        assert!(m.insert(5, 14));
        let new = 100;
        match m.find_mut(&5) {
            None => fail!(), Some(x) => *x = new
        }
        assert_eq!(m.find(&5), Some(&new));
    }

    #[test]
    fn test_insert_overwrite() {
        let mut m = HashMap::new();
        assert!(m.insert(1, 2));
        assert_eq!(*m.find(&1).unwrap(), 2);
        assert!(!m.insert(1, 3));
        assert_eq!(*m.find(&1).unwrap(), 3);
    }

    #[test]
    fn test_insert_conflicts() {
        let mut m = HashMap::with_capacity(4);
        assert!(m.insert(1, 2));
        assert!(m.insert(5, 3));
        assert!(m.insert(9, 4));
        assert_eq!(*m.find(&9).unwrap(), 4);
        assert_eq!(*m.find(&5).unwrap(), 3);
        assert_eq!(*m.find(&1).unwrap(), 2);
    }

    #[test]
    fn test_conflict_remove() {
        let mut m = HashMap::with_capacity(4);
        assert!(m.insert(1, 2));
        assert_eq!(*m.find(&1).unwrap(), 2);
        assert!(m.insert(5, 3));
        assert_eq!(*m.find(&1).unwrap(), 2);
        assert_eq!(*m.find(&5).unwrap(), 3);
        assert!(m.insert(9, 4));
        assert_eq!(*m.find(&1).unwrap(), 2);
        assert_eq!(*m.find(&5).unwrap(), 3);
        assert_eq!(*m.find(&9).unwrap(), 4);
        assert!(m.remove(&1));
        assert_eq!(*m.find(&9).unwrap(), 4);
        assert_eq!(*m.find(&5).unwrap(), 3);
    }

    #[test]
    fn test_is_empty() {
        let mut m = HashMap::with_capacity(4);
        assert!(m.insert(1, 2));
        assert!(!m.is_empty());
        assert!(m.remove(&1));
        assert!(m.is_empty());
    }

    #[test]
    fn test_pop() {
        let mut m = HashMap::new();
        m.insert(1, 2);
        assert_eq!(m.pop(&1), Some(2));
        assert_eq!(m.pop(&1), None);
    }

    #[test]
    fn test_swap() {
        let mut m = HashMap::new();
        assert_eq!(m.swap(1, 2), None);
        assert_eq!(m.swap(1, 3), Some(2));
        assert_eq!(m.swap(1, 4), Some(3));
    }

    #[test]
    fn test_move_iter() {
        let hm = {
            let mut hm = HashMap::new();

            hm.insert('a', 1);
            hm.insert('b', 2);

            hm
        };

        let v = hm.move_iter().collect::<~[(char, int)]>();
        assert!([('a', 1), ('b', 2)] == v || [('b', 2), ('a', 1)] == v);
    }

    #[test]
    fn test_iterate() {
        let mut m = HashMap::with_capacity(4);
        for i in range(0u, 32) {
            assert!(m.insert(i, i*2));
        }
        assert_eq!(m.len(), 32);

        let mut observed = 0;

        for (k, v) in m.iter() {
            assert_eq!(*v, *k * 2);
            observed |= 1 << *k;
        }
        assert_eq!(observed, 0xFFFF_FFFF);
    }

    #[test]
    fn test_keys() {
        let vec = ~[(1, 'a'), (2, 'b'), (3, 'c')];
        let map = vec.move_iter().collect::<HashMap<int, char>>();
        let keys = map.keys().map(|&k| k).collect::<~[int]>();
        assert_eq!(keys.len(), 3);
        assert!(keys.contains(&1));
        assert!(keys.contains(&2));
        assert!(keys.contains(&3));
    }

    #[test]
    fn test_values() {
        let vec = ~[(1, 'a'), (2, 'b'), (3, 'c')];
        let map = vec.move_iter().collect::<HashMap<int, char>>();
        let values = map.values().map(|&v| v).collect::<~[char]>();
        assert_eq!(values.len(), 3);
        assert!(values.contains(&'a'));
        assert!(values.contains(&'b'));
        assert!(values.contains(&'c'));
    }

    #[test]
    fn test_find() {
        let mut m = HashMap::new();
        assert!(m.find(&1).is_none());
        m.insert(1, 2);
        match m.find(&1) {
            None => fail!(),
            Some(v) => assert!(*v == 2)
        }
    }

    #[test]
    fn test_eq() {
        let mut m1 = HashMap::new();
        m1.insert(1, 2);
        m1.insert(2, 3);
        m1.insert(3, 4);

        let mut m2 = HashMap::new();
        m2.insert(1, 2);
        m2.insert(2, 3);

        assert!(m1 != m2);

        m2.insert(3, 4);

        assert_eq!(m1, m2);
    }

    #[test]
    fn test_expand() {
        let mut m = HashMap::new();

        assert_eq!(m.len(), 0);
        assert!(m.is_empty());

        let mut i = 0u;
        let old_resize_at = m.grow_at;
        while old_resize_at == m.grow_at {
            m.insert(i, i);
            i += 1;
        }

        assert_eq!(m.len(), i);
        assert!(!m.is_empty());
    }

    #[test]
    fn test_find_equiv() {
        let mut m = HashMap::new();

        let (foo, bar, baz) = (1,2,3);
        m.insert(~"foo", foo);
        m.insert(~"bar", bar);
        m.insert(~"baz", baz);


        assert_eq!(m.find_equiv(&("foo")), Some(&foo));
        assert_eq!(m.find_equiv(&("bar")), Some(&bar));
        assert_eq!(m.find_equiv(&("baz")), Some(&baz));

        assert_eq!(m.find_equiv(&("qux")), None);
    }

    #[test]
    fn test_from_iter() {
        let xs = ~[(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)];

        let map: HashMap<int, int> = xs.iter().map(|&x| x).collect();

        for &(k, v) in xs.iter() {
            assert_eq!(map.find(&k), Some(&v));
        }
    }
}

#[cfg(test)]
mod test_set {
    use super::HashSet;
    use std::container::Container;
    use std::slice::ImmutableEqVector;

    #[test]
    fn test_disjoint() {
        let mut xs = HashSet::new();
        let mut ys = HashSet::new();
        assert!(xs.is_disjoint(&ys));
        assert!(ys.is_disjoint(&xs));
        assert!(xs.insert(5));
        assert!(ys.insert(11));
        assert!(xs.is_disjoint(&ys));
        assert!(ys.is_disjoint(&xs));
        assert!(xs.insert(7));
        assert!(xs.insert(19));
        assert!(xs.insert(4));
        assert!(ys.insert(2));
        assert!(ys.insert(-11));
        assert!(xs.is_disjoint(&ys));
        assert!(ys.is_disjoint(&xs));
        assert!(ys.insert(7));
        assert!(!xs.is_disjoint(&ys));
        assert!(!ys.is_disjoint(&xs));
    }

    #[test]
    fn test_subset_and_superset() {
        let mut a = HashSet::new();
        assert!(a.insert(0));
        assert!(a.insert(5));
        assert!(a.insert(11));
        assert!(a.insert(7));

        let mut b = HashSet::new();
        assert!(b.insert(0));
        assert!(b.insert(7));
        assert!(b.insert(19));
        assert!(b.insert(250));
        assert!(b.insert(11));
        assert!(b.insert(200));

        assert!(!a.is_subset(&b));
        assert!(!a.is_superset(&b));
        assert!(!b.is_subset(&a));
        assert!(!b.is_superset(&a));

        assert!(b.insert(5));

        assert!(a.is_subset(&b));
        assert!(!a.is_superset(&b));
        assert!(!b.is_subset(&a));
        assert!(b.is_superset(&a));
    }

    #[test]
    fn test_iterate() {
        let mut a = HashSet::new();
        for i in range(0u, 32) {
            assert!(a.insert(i));
        }
        let mut observed = 0;
        for k in a.iter() {
            observed |= 1 << *k;
        }
        assert_eq!(observed, 0xFFFF_FFFF);
    }

    #[test]
    fn test_intersection() {
        let mut a = HashSet::new();
        let mut b = HashSet::new();

        assert!(a.insert(11));
        assert!(a.insert(1));
        assert!(a.insert(3));
        assert!(a.insert(77));
        assert!(a.insert(103));
        assert!(a.insert(5));
        assert!(a.insert(-5));

        assert!(b.insert(2));
        assert!(b.insert(11));
        assert!(b.insert(77));
        assert!(b.insert(-9));
        assert!(b.insert(-42));
        assert!(b.insert(5));
        assert!(b.insert(3));

        let mut i = 0;
        let expected = [3, 5, 11, 77];
        for x in a.intersection(&b) {
            assert!(expected.contains(x));
            i += 1
        }
        assert_eq!(i, expected.len());
    }

    #[test]
    fn test_difference() {
        let mut a = HashSet::new();
        let mut b = HashSet::new();

        assert!(a.insert(1));
        assert!(a.insert(3));
        assert!(a.insert(5));
        assert!(a.insert(9));
        assert!(a.insert(11));

        assert!(b.insert(3));
        assert!(b.insert(9));

        let mut i = 0;
        let expected = [1, 5, 11];
        for x in a.difference(&b) {
            assert!(expected.contains(x));
            i += 1
        }
        assert_eq!(i, expected.len());
    }

    #[test]
    fn test_symmetric_difference() {
        let mut a = HashSet::new();
        let mut b = HashSet::new();

        assert!(a.insert(1));
        assert!(a.insert(3));
        assert!(a.insert(5));
        assert!(a.insert(9));
        assert!(a.insert(11));

        assert!(b.insert(-2));
        assert!(b.insert(3));
        assert!(b.insert(9));
        assert!(b.insert(14));
        assert!(b.insert(22));

        let mut i = 0;
        let expected = [-2, 1, 5, 11, 14, 22];
        for x in a.symmetric_difference(&b) {
            assert!(expected.contains(x));
            i += 1
        }
        assert_eq!(i, expected.len());
    }

    #[test]
    fn test_union() {
        let mut a = HashSet::new();
        let mut b = HashSet::new();

        assert!(a.insert(1));
        assert!(a.insert(3));
        assert!(a.insert(5));
        assert!(a.insert(9));
        assert!(a.insert(11));
        assert!(a.insert(16));
        assert!(a.insert(19));
        assert!(a.insert(24));

        assert!(b.insert(-2));
        assert!(b.insert(1));
        assert!(b.insert(5));
        assert!(b.insert(9));
        assert!(b.insert(13));
        assert!(b.insert(19));

        let mut i = 0;
        let expected = [-2, 1, 3, 5, 9, 11, 13, 16, 19, 24];
        for x in a.union(&b) {
            assert!(expected.contains(x));
            i += 1
        }
        assert_eq!(i, expected.len());
    }

    #[test]
    fn test_from_iter() {
        let xs = ~[1, 2, 3, 4, 5, 6, 7, 8, 9];

        let set: HashSet<int> = xs.iter().map(|&x| x).collect();

        for x in xs.iter() {
            assert!(set.contains(x));
        }
    }

    #[test]
    fn test_move_iter() {
        let hs = {
            let mut hs = HashSet::new();

            hs.insert('a');
            hs.insert('b');

            hs
        };

        let v = hs.move_iter().collect::<~[char]>();
        assert!(['a', 'b'] == v || ['b', 'a'] == v);
    }

    #[test]
    fn test_eq() {
        // These constants once happened to expose a bug in insert().
        // I'm keeping them around to prevent a regression.
        let mut s1 = HashSet::new();

        s1.insert(1);
        s1.insert(2);
        s1.insert(3);

        let mut s2 = HashSet::new();

        s2.insert(1);
        s2.insert(2);

        assert!(s1 != s2);

        s2.insert(3);

        assert_eq!(s1, s2);
    }

    #[test]
    fn test_show() {
        let mut set: HashSet<int> = HashSet::new();
        let empty: HashSet<int> = HashSet::new();

        set.insert(1);
        set.insert(2);

        let set_str = format!("{}", set);

        assert!(set_str == ~"{1, 2}" || set_str == ~"{2, 1}");
        assert_eq!(format!("{}", empty), ~"{}");
    }
}

#[cfg(test)]
mod bench {
    extern crate test;
    use self::test::BenchHarness;
    use std::iter::{range_inclusive};

    #[bench]
    fn insert(b: &mut BenchHarness) {
        use super::HashMap;

        let mut m = HashMap::new();

        for i in range_inclusive(1, 1000) {
            m.insert(i, i);
        }

        let mut k = 1001;

        b.iter(|| {
            m.insert(k, k);
            k += 1;
        });
    }

    #[bench]
    fn find_existing(b: &mut BenchHarness) {
        use super::HashMap;

        let mut m = HashMap::new();

        for i in range_inclusive(1, 1000) {
            m.insert(i, i);
        }

        b.iter(|| {
            m.contains_key(&412);
        });
    }

    #[bench]
    fn find_nonexisting(b: &mut BenchHarness) {
        use super::HashMap;

        let mut m = HashMap::new();

        for i in range_inclusive(1, 1000) {
            m.insert(i, i);
        }

        b.iter(|| {
            m.contains_key(&2048);
        });
    }

    #[bench]
    fn hashmap_as_queue(b: &mut BenchHarness) {
        use super::HashMap;

        let mut m = HashMap::new();

        for i in range_inclusive(1, 1000) {
            m.insert(i, i);
        }

        let mut k = 1;

        b.iter(|| {
            m.pop(&k);
            m.insert(k + 1000, k + 1000);
            k += 1;
        });
    }

    #[bench]
    fn find_pop_insert(b: &mut BenchHarness) {
        use super::HashMap;

        let mut m = HashMap::new();

        for i in range_inclusive(1, 1000) {
            m.insert(i, i);
        }

        let mut k = 1;

        b.iter(|| {
            m.find(&(k + 400));
            m.find(&(k + 2000));
            m.pop(&k);
            m.insert(k + 1000, k + 1000);
            k += 1;
        })
    }
}
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
-												Update license, add license boilerplate to most files. Remainder will follow.

											
										
										
											2012-12-03 16:48:01 -08:00
+								// file at the top-level directory of this distribution and at
 								// http://rust-lang.org/COPYRIGHT.
 								//
 								// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
 								// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 								// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 								// option. This file may not be copied, modified, or distributed
 								// except according to those terms.
-												std: uniform modules titles for doc

This commit uniforms the short title of modules provided by libstd,
in order to make their roles more explicit when glancing at the index.

Signed-off-by: Luca Bruno <lucab@debian.org>

											
										
										
											2013-12-24 17:08:28 +01:00
+								//! Unordered containers, implemented as hash-tables (`HashSet` and `HashMap` types)
-												wip: add a sendable hashmap to stdlib

caveats:
- not efficient nor heavily tested
- API subject to change and currently incompatible with map

											
										
										
											2012-07-26 16:09:22 -07:00
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								use std::container::{Container, Mutable, Map, MutableMap, Set, MutableSet};
 								use std::clone::Clone;
-												use TotalEq for HashMap

Closes #5283

											
										
										
											2014-03-22 16:30:45 -04:00
+								use std::cmp::{Eq, TotalEq, Equiv, max};
-												Remove std::default::Default from the prelude

											
										
										
											2014-02-23 12:07:11 +11:00
+								use std::default::Default;
-												Move std::{trie, hashmap} to libcollections

These two containers are indeed collections, so their place is in
libcollections, not in libstd. There will always be a hash map as part of the
standard distribution of Rust, but by moving it out of the standard library it
makes libstd that much more portable to more platforms and environments.

This conveniently also removes the stuttering of 'std::hashmap::HashMap',
although 'collections::HashMap' is only one character shorter.

											
										
										
											2014-02-19 19:29:58 -08:00
+								use std::fmt;
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								use std::fmt::Show;
 								use std::hash::{Hash, Hasher, sip};
-												Move std::{trie, hashmap} to libcollections

These two containers are indeed collections, so their place is in
libcollections, not in libstd. There will always be a hash map as part of the
standard distribution of Rust, but by moving it out of the standard library it
makes libstd that much more portable to more platforms and environments.

This conveniently also removes the stuttering of 'std::hashmap::HashMap',
although 'collections::HashMap' is only one character shorter.

											
										
										
											2014-02-19 19:29:58 -08:00
+								use std::iter;
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								use std::iter::{Iterator, FromIterator, Extendable};
 								use std::iter::{FilterMap, Chain, Repeat, Zip};
 								use std::iter::{range, range_inclusive};
-												Move std::{trie, hashmap} to libcollections

These two containers are indeed collections, so their place is in
libcollections, not in libstd. There will always be a hash map as part of the
standard distribution of Rust, but by moving it out of the standard library it
makes libstd that much more portable to more platforms and environments.

This conveniently also removes the stuttering of 'std::hashmap::HashMap',
although 'collections::HashMap' is only one character shorter.

											
										
										
											2014-02-19 19:29:58 -08:00
+								use std::mem::replace;
 								use std::num;
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								use std::option::{Option, Some, None};
-												Update users for the std::rand -> librand move.

											
										
										
											2014-03-02 11:33:24 +11:00
+								use rand;
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								use rand::Rng;
 								use std::result::{Ok, Err};
-												rename std::vec -> std::slice

Closes #12702

											
										
										
											2014-03-08 18:11:52 -05:00
+								use std::slice::ImmutableVector;
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
 								mod table {
 								    use std::clone::Clone;
 								    use std::cmp::Eq;
 								    use std::hash::{Hash, Hasher};
 								    use std::kinds::marker;
 								    use std::libc;
 								    use std::num::CheckedMul;
 								    use std::option::{Option, Some, None};
 								    use std::prelude::Drop;
 								    use std::ptr;
 								    use std::ptr::RawPtr;
 								    use std::rt::global_heap;
 								    use std::intrinsics::{size_of, transmute, move_val_init};
 								    use std::iter::{Iterator, range_step_inclusive};
 								    static EMPTY_BUCKET: u64 = 0u64;
 								    /// The raw hashtable, providing safe-ish access to the unzipped and highly
 								    /// optimized arrays of hashes, keys, and values.
 								    ///
 								    /// This design uses less memory and is a lot faster than the naive
 								    /// `~[Option<u64, K, V>]`, because we don't pay for the overhead of an
 								    /// option on every element, and we get a generally more cache-aware design.
 								    ///
 								    /// Key invariants of this structure:
 								    ///
 								    ///   - if hashes[i] == EMPTY_BUCKET, then keys[i] and vals[i] have
 								    ///     'undefined' contents. Don't read from them. This invariant is
 								    ///     enforced outside this module with the [EmptyIndex], [FullIndex],
 								    ///     and [SafeHash] types/concepts.
 								    ///
 								    ///   - An `EmptyIndex` is only constructed for a bucket at an index with
 								    ///     a hash of EMPTY_BUCKET.
 								    ///
 								    ///   - A `FullIndex` is only constructed for a bucket at an index with a
 								    ///     non-EMPTY_BUCKET hash.
 								    ///
 								    ///   - A `SafeHash` is only constructed for non-`EMPTY_BUCKET` hash. We get
 								    ///     around hashes of zero by changing them to 0x800_0000, which will
 								    ///     likely hash to the same bucket, but not be represented as "empty".
 								    ///
 								    ///   - All three "arrays represented by pointers" are the same length:
 								    ///     `capacity`. This is set at creation and never changes. The arrays
 								    ///     are unzipped to save space (we don't have to pay for the padding
 								    ///     between odd sized elements, such as in a map from u64 to u8), and
 								    ///     be more cache aware (scanning through 8 hashes brings in 2 cache
 								    ///     lines, since they're all right beside each other).
 								    ///
 								    /// You can kind of think of this module/data structure as a safe wrapper
 								    /// around just the "table" part of the hashtable. It enforces some
 								    /// invariants at the type level and employs some performance trickery,
 								    /// but in general is just a tricked out `~[Option<u64, K, V>]`.
 								    ///
 								    /// FIXME(cgaebel):
 								    ///
 								    /// Feb 11, 2014: This hashtable was just implemented, and, hard as I tried,
 								    /// isn't yet totally safe. There's a "known exploit" that you can create
 								    /// multiple FullIndexes for a bucket, `take` one, and then still `take`
 								    /// the other causing undefined behavior. Currently, there's no story
 								    /// for how to protect against this statically. Therefore, there are asserts
 								    /// on `take`, `get`, `get_mut`, and `put` which check the bucket state.
 								    /// With time, and when we're confident this works correctly, they should
 								    /// be removed. Also, the bounds check in `peek` is especially painful,
 								    /// as that's called in the innermost loops of the hashtable and has the
 								    /// potential to be a major performance drain. Remove this too.
 								    ///
 								    /// Or, better than remove, only enable these checks for debug builds.
 								    /// There's currently no "debug-only" asserts in rust, so if you're reading
 								    /// this and going "what? of course there are debug-only asserts!", then
 								    /// please make this use them!
 								    pub struct RawTable<K, V> {
 								        priv capacity: uint,
 								        priv size:     uint,
 								        priv hashes:   *mut u64,
 								        priv keys:     *mut K,
 								        priv vals:     *mut V,
 								    }
-												Move std::{trie, hashmap} to libcollections

These two containers are indeed collections, so their place is in
libcollections, not in libstd. There will always be a hash map as part of the
standard distribution of Rust, but by moving it out of the standard library it
makes libstd that much more portable to more platforms and environments.

This conveniently also removes the stuttering of 'std::hashmap::HashMap',
although 'collections::HashMap' is only one character shorter.

											
										
										
											2014-02-19 19:29:58 -08:00
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    /// Represents an index into a `RawTable` with no key or value in it.
 								    pub struct EmptyIndex {
 								        priv idx:   int,
 								        priv nopod: marker::NoPod,
 								    }
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    /// Represents an index into a `RawTable` with a key, value, and hash
 								    /// in it.
 								    pub struct FullIndex {
 								        priv idx:   int,
 								        priv hash:  SafeHash,
 								        priv nopod: marker::NoPod,
 								    }
-												librustc: Terminate name searches at the nearest module scope for paths that contain at least two components. r=graydon

											
										
										
											2012-12-23 17:41:37 -05:00
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    impl FullIndex {
 								        /// Since we get the hash for free whenever we check the bucket state,
 								        /// this function is provided for fast access, letting us avoid making
 								        /// redundant trips back to the hashtable.
 								        pub fn hash(&self) -> SafeHash { self.hash }
-												libcore: De-export ptr, send_map, and task::local_data

											
										
										
											2012-09-26 17:47:29 -07:00
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        /// Same comment as with `hash`.
 								        pub fn raw_index(&self) -> uint { self.idx as uint }
 								    }
-												clean up LinearMap implementation

											
										
										
											2013-01-24 22:00:58 -05:00
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    /// Represents the state of a bucket: it can either have a key/value
 								    /// pair (be full) or not (be empty). You cannot `take` empty buckets,
 								    /// and you cannot `put` into full buckets.
 								    pub enum BucketState {
 								        Empty(EmptyIndex),
 								        Full(FullIndex),
 								    }
-												wip: add a sendable hashmap to stdlib

caveats:
- not efficient nor heavily tested
- API subject to change and currently incompatible with map

											
										
										
											2012-07-26 16:09:22 -07:00
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    /// A hash that is not zero, since we use that to represent empty buckets.
-												use TotalEq for HashMap

Closes #5283

											
										
										
											2014-03-22 16:30:45 -04:00
+								    #[deriving(Eq)]
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    pub struct SafeHash {
 								        priv hash: u64,
-												wip: add a sendable hashmap to stdlib

caveats:
- not efficient nor heavily tested
- API subject to change and currently incompatible with map

											
										
										
											2012-07-26 16:09:22 -07:00
+								    }
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    impl SafeHash {
 								        /// Peek at the hash value, which is guaranteed to be non-zero.
 								        pub fn inspect(&self) -> u64 { self.hash }
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    }
-												wip: add a sendable hashmap to stdlib

caveats:
- not efficient nor heavily tested
- API subject to change and currently incompatible with map

											
										
										
											2012-07-26 16:09:22 -07:00
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    /// We need to remove hashes of 0. That's reserved for empty buckets.
 								    /// This function wraps up `hash_keyed` to be the only way outside this
 								    /// module to generate a SafeHash.
 								    pub fn make_hash<T: Hash<S>, S, H: Hasher<S>>(hasher: &H, t: &T) -> SafeHash {
 								        match hasher.hash(t) {
 								            // This constant is exceedingly likely to hash to the same
 								            // bucket, but it won't be counted as empty!
 								            EMPTY_BUCKET => SafeHash { hash: 0x8000_0000_0000_0000 },
 								            h            => SafeHash { hash: h },
 								        }
 								    }
 								    impl<K, V> RawTable<K, V> {
 								        /// Does not initialize the buckets. The caller should ensure they,
 								        /// at the very least, set every hash to EMPTY_BUCKET.
 								        unsafe fn new_uninitialized(capacity: uint) -> RawTable<K, V> {
 								            let hashes_size =
 								                capacity.checked_mul(&size_of::<u64>()).expect("capacity overflow");
 								            let keys_size   =
 								                capacity.checked_mul(&size_of::< K >()).expect("capacity overflow");
 								            let vals_size   =
 								                capacity.checked_mul(&size_of::< V >()).expect("capacity overflow");
 								            /*
 								            The following code was my first pass at making RawTable only
 								            allocate a single buffer, since that's all it needs. There's
 								            no logical reason for this to require three calls to malloc.
 								            However, I'm not convinced the code below is correct. If you
 								            want to take a stab at it, please do! The alignment is
 								            especially tricky to get right, especially if you need more
 								            alignment than malloc guarantees.
 								            let hashes_offset = 0;
 								            let keys_offset   = align_size(hashes_offset + hashes_size, keys_align);
 								            let vals_offset   = align_size(keys_offset + keys_size, vals_align);
 								            let end = vals_offset + vals_size;
 								            let buffer = global_heap::malloc_raw(end);
 								            let hashes = buffer.offset(hashes_offset) as *mut u64;
 								            let keys   = buffer.offset(keys_offset)   as *mut K;
 								            let vals   = buffer.offset(vals_offset)   as *mut V;
 								            */
 								            let hashes = global_heap::malloc_raw(hashes_size) as *mut u64;
 								            let keys   = global_heap::malloc_raw(keys_size)   as *mut K;
 								            let vals   = global_heap::malloc_raw(vals_size)   as *mut V;
 								            RawTable {
 								                capacity: capacity,
 								                size:     0,
 								                hashes:   hashes,
 								                keys:     keys,
 								                vals:     vals,
-												wip: add a sendable hashmap to stdlib

caveats:
- not efficient nor heavily tested
- API subject to change and currently incompatible with map

											
										
										
											2012-07-26 16:09:22 -07:00
+								            }
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        }
 								        /// Creates a new raw table from a given capacity. All buckets are
 								        /// initially empty.
 								        pub fn new(capacity: uint) -> RawTable<K, V> {
 								            unsafe {
 								                let ret = RawTable::new_uninitialized(capacity);
 								                for i in range(0, ret.capacity() as int) {
 								                    *ret.hashes.offset(i) = EMPTY_BUCKET;
 								                }
 								                ret
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								            }
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        }
 								        /// Reads a bucket at a given index, returning an enum indicating whether
 								        /// there's anything there or not. You need to match on this enum to get
 								        /// the appropriate types to pass on to most of the rest of the functions
 								        /// in this module.
 								        pub fn peek(&self, index: uint) -> BucketState {
 								            // FIXME #12049
 								            if cfg!(test) { assert!(index < self.capacity) }
 								            let idx  = index as int;
 								            let hash = unsafe { *self.hashes.offset(idx) };
 								            let nopod = marker::NoPod;
 								            match hash {
 								                EMPTY_BUCKET =>
 								                    Empty(EmptyIndex {
 								                        idx: idx,
 								                        nopod: nopod
 								                    }),
 								                full_hash =>
 								                    Full(FullIndex {
 								                        idx:   idx,
 								                        hash:  SafeHash { hash: full_hash },
 								                        nopod: nopod,
 								                    })
 								            }
 								        }
 								        /// Gets references to the key and value at a given index.
 								        pub fn read<'a>(&'a self, index: &FullIndex) -> (&'a K, &'a V) {
 								            let idx = index.idx;
 								            unsafe {
 								                // FIXME #12049
 								                if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }
 								                (&'a *self.keys.offset(idx),
 								                 &'a *self.vals.offset(idx))
 								            }
 								        }
 								        /// Gets references to the key and value at a given index, with the
 								        /// value's reference being mutable.
 								        pub fn read_mut<'a>(&'a mut self, index: &FullIndex) -> (&'a K, &'a mut V) {
 								            let idx = index.idx;
 								            unsafe {
 								                // FIXME #12049
 								                if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }
 								                (&'a     *self.keys.offset(idx),
 								                 &'a mut *self.vals.offset(idx))
 								            }
 								        }
 								        /// Read everything, mutably.
 								        pub fn read_all_mut<'a>(&'a mut self, index: &FullIndex)
 								            -> (&'a mut SafeHash, &'a mut K, &'a mut V) {
 								            let idx = index.idx;
 								            // I'm totally abusing the fact that a pointer to any u64 in the
 								            // hashtable at a full index is a safe hash. Thanks to `SafeHash`
 								            // just being a wrapper around u64, this is true. It's just really
 								            // really really really unsafe. However, the exposed API is now
 								            // impossible to get wrong. You cannot insert an empty hash into
 								            // this slot now.
 								            unsafe {
 								                // FIXME #12049
 								                if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }
 								                (transmute(self.hashes.offset(idx)),
 								                 &'a mut *self.keys.offset(idx),
 								                 &'a mut *self.vals.offset(idx))
 								            }
 								        }
 								        /// Puts a key and value pair, along with the key's hash, into a given
 								        /// index in the hashtable. Note how the `EmptyIndex` is 'moved' into this
 								        /// function, because that slot will no longer be empty when we return!
 								        /// Because we know this, a FullIndex is returned for later use, pointing
 								        /// to the newly-filled slot in the hashtable.
 								        ///
 								        /// Use `make_hash` to construct a `SafeHash` to pass to this function.
 								        pub fn put(&mut self, index: EmptyIndex, hash: SafeHash, k: K, v: V) -> FullIndex {
 								            let idx = index.idx;
 								            unsafe {
 								                // FIXME #12049
 								                if cfg!(test) { assert!(*self.hashes.offset(idx) == EMPTY_BUCKET) }
 								                *self.hashes.offset(idx) = hash.inspect();
 								                move_val_init(&mut *self.keys.offset(idx), k);
 								                move_val_init(&mut *self.vals.offset(idx), v);
 								            }
 								            self.size += 1;
 								            FullIndex { idx: idx, hash: hash, nopod: marker::NoPod }
 								        }
 								        /// Removes a key and value from the hashtable.
 								        ///
 								        /// This works similarly to `put`, building an `EmptyIndex` out of the
 								        /// taken FullIndex.
 								        pub fn take(&mut self, index: FullIndex) -> (EmptyIndex, K, V) {
 								            let idx  = index.idx;
 								            unsafe {
 								                // FIXME #12049
 								                if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }
 								                let hash_ptr = self.hashes.offset(idx);
 								                *hash_ptr = EMPTY_BUCKET;
 								                // Drop the mutable constraint.
 								                let keys = self.keys as *K;
 								                let vals = self.vals as *V;
 								                let k = ptr::read(keys.offset(idx));
 								                let v = ptr::read(vals.offset(idx));
 								                self.size -= 1;
 								                (EmptyIndex { idx: idx, nopod: marker::NoPod }, k, v)
 								            }
 								        }
 								        /// The hashtable's capacity, similar to a vector's.
 								        pub fn capacity(&self) -> uint {
 								            self.capacity
 								        }
 								        /// The number of elements ever `put` in the hashtable, minus the number
 								        /// of elements ever `take`n.
 								        pub fn size(&self) -> uint {
 								            self.size
 								        }
 								        pub fn iter<'a>(&'a self) -> Entries<'a, K, V> {
 								            Entries { table: self, idx: 0 }
 								        }
 								        pub fn mut_iter<'a>(&'a mut self) -> MutEntries<'a, K, V> {
 								            MutEntries { table: self, idx: 0 }
 								        }
 								        pub fn move_iter(self) -> MoveEntries<K, V> {
 								            MoveEntries { table: self, idx: 0 }
 								        }
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    }
-												libcore: Implement an `Equiv` trait and use it on hashmaps.

7.3x speedup in string map search speed on a microbenchmark of pure hashmap
searching against a constant string, due to the lack of allocations.

I ran into a few snags.

1. The way the coherence check is set up, I can't implement `Equiv<@str>` and
   `Equiv<~str>` for `&str` simultaneously.

2. I wanted to implement `Equiv<T>` for all `T:Eq` (i.e. every type can be
   compared to itself if it implements `Eq`), but the coherence check didn't
   like that either.

3. I couldn't add this to the `Map` trait because `LinearMap` needs special
   handling for its `Q` type parameter: it must not only implement `Equiv<T>`
   but also `Hash` and `Eq`.

4. `find_equiv(&&"foo")` doesn't parse, because of the double ampersand. It has
   to be written `find_equiv(& &"foo")`. We can probably just fix this.

Nevertheless, this is a huge win; it should address a major source of
performance problems, including the one here:

http://maniagnosis.crsr.net/2013/02/creating-letterpress-cheating-program.html

											
										
										
											2013-03-04 19:43:14 -08:00
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    pub struct Entries<'a, K, V> {
 								        priv table: &'a RawTable<K, V>,
 								        priv idx: uint,
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    }
-												add reserve_at_least method to LinearMap/LinearSet

											
										
										
											2013-02-15 01:35:15 -05:00
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    pub struct MutEntries<'a, K, V> {
 								        priv table: &'a mut RawTable<K, V>,
 								        priv idx: uint,
 								    }
 								    pub struct MoveEntries<K, V> {
 								        priv table: RawTable<K, V>,
 								        priv idx: uint,
 								    }
-												wip: add a sendable hashmap to stdlib

caveats:
- not efficient nor heavily tested
- API subject to change and currently incompatible with map

											
										
										
											2012-07-26 16:09:22 -07:00
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    impl<'a, K, V> Iterator<(&'a K, &'a V)> for Entries<'a, K, V> {
 								        fn next(&mut self) -> Option<(&'a K, &'a V)> {
 								            while self.idx < self.table.capacity() {
 								                let i = self.idx;
 								                self.idx += 1;
-												wip: add a sendable hashmap to stdlib

caveats:
- not efficient nor heavily tested
- API subject to change and currently incompatible with map

											
										
										
											2012-07-26 16:09:22 -07:00
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								                match self.table.peek(i) {
 								                    Empty(_)  => {},
 								                    Full(idx) => return Some(self.table.read(&idx))
 								                }
 								            }
 								            None
 								        }
 								        fn size_hint(&self) -> (uint, Option<uint>) {
 								            let size = self.table.size() - self.idx;
 								            (size, Some(size))
-												wip: add a sendable hashmap to stdlib

caveats:
- not efficient nor heavily tested
- API subject to change and currently incompatible with map

											
										
										
											2012-07-26 16:09:22 -07:00
+								        }
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    }
-												wip: add a sendable hashmap to stdlib

caveats:
- not efficient nor heavily tested
- API subject to change and currently incompatible with map

											
										
										
											2012-07-26 16:09:22 -07:00
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    impl<'a, K, V> Iterator<(&'a K, &'a mut V)> for MutEntries<'a, K, V> {
 								        fn next(&mut self) -> Option<(&'a K, &'a mut V)> {
 								            while self.idx < self.table.capacity() {
 								                let i = self.idx;
 								                self.idx += 1;
 								                match self.table.peek(i) {
 								                    Empty(_)  => {},
 								                    // the transmute here fixes:
 								                    // error: lifetime of `self` is too short to guarantee its contents
 								                    //        can be safely reborrowed
 								                    Full(idx) => unsafe {
 								                        return Some(transmute(self.table.read_mut(&idx)))
 								                    }
 								                }
-												refactor send_map impl to be based on structs

											
										
										
											2012-08-28 11:59:31 -07:00
+								            }
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
 								            None
-												wip: add a sendable hashmap to stdlib

caveats:
- not efficient nor heavily tested
- API subject to change and currently incompatible with map

											
										
										
											2012-07-26 16:09:22 -07:00
+								        }
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        fn size_hint(&self) -> (uint, Option<uint>) {
 								            let size = self.table.size() - self.idx;
 								            (size, Some(size))
-												core: changes in response to #5656

											
										
										
											2013-04-10 13:11:35 -07:00
+								        }
 								    }
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    impl<K, V> Iterator<(SafeHash, K, V)> for MoveEntries<K, V> {
 								        fn next(&mut self) -> Option<(SafeHash, K, V)> {
 								            while self.idx < self.table.capacity() {
 								                let i = self.idx;
 								                self.idx += 1;
 								                match self.table.peek(i) {
 								                    Empty(_) => {},
 								                    Full(idx) => {
 								                        let h = idx.hash();
 								                        let (_, k, v) = self.table.take(idx);
 								                        return Some((h, k, v));
 								                    }
 								                }
 								            }
 								            None
 								        }
 								        fn size_hint(&self) -> (uint, Option<uint>) {
 								            let size = self.table.size();
 								            (size, Some(size))
-												core: changes in response to #5656

											
										
										
											2013-04-10 13:11:35 -07:00
+								        }
 								    }
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    impl<K: Clone, V: Clone> Clone for RawTable<K, V> {
 								        fn clone(&self) -> RawTable<K, V> {
 								            unsafe {
 								                let mut new_ht = RawTable::new_uninitialized(self.capacity());
 								                for i in range(0, self.capacity()) {
 								                    match self.peek(i) {
 								                        Empty(_)  => {
 								                            *new_ht.hashes.offset(i as int) = EMPTY_BUCKET;
 								                        },
 								                        Full(idx) => {
 								                            let hash = idx.hash().inspect();
 								                            let (k, v) = self.read(&idx);
 								                            *new_ht.hashes.offset(i as int) = hash;
 								                            move_val_init(&mut *new_ht.keys.offset(i as int), (*k).clone());
 								                            move_val_init(&mut *new_ht.vals.offset(i as int), (*v).clone());
 								                        }
-												Add pop() and swap() to the Map trait

											
										
										
											2013-05-04 09:54:58 -04:00
+								                    }
 								                }
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
 								                new_ht.size = self.size();
 								                new_ht
-												wip: add a sendable hashmap to stdlib

caveats:
- not efficient nor heavily tested
- API subject to change and currently incompatible with map

											
										
										
											2012-07-26 16:09:22 -07:00
+								            }
 								        }
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    }
-												convert send_map to use explicit self

											
										
										
											2012-08-22 21:01:30 -07:00
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    #[unsafe_destructor]
 								    impl<K, V> Drop for RawTable<K, V> {
 								        fn drop(&mut self) {
 								            // Ideally, this should be in reverse, since we're likely to have
 								            // partially taken some elements out with `.move_iter()` from the
 								            // front.
 								            for i in range_step_inclusive(self.capacity as int - 1, 0, -1) {
 								                // Check if the size is 0, so we don't do a useless scan when
 								                // dropping empty tables such as on resize.
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								                if self.size == 0 { break }
 								                match self.peek(i as uint) {
 								                    Empty(_)  => {},
 								                    Full(idx) => { self.take(idx); }
 								                }
 								            }
 								            assert!(self.size == 0);
 								            unsafe {
 								                libc::free(self.vals   as *mut libc::c_void);
 								                libc::free(self.keys   as *mut libc::c_void);
 								                libc::free(self.hashes as *mut libc::c_void);
 								            }
-												libcore: add pop/swap/consume to SendMap

											
										
										
											2012-10-08 07:05:01 -07:00
+								        }
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    }
 								}
-												libcore: add pop/swap/consume to SendMap

											
										
										
											2012-10-08 07:05:01 -07:00
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								// We use this type for the load factor, to avoid floating point operations
 								// which might not be supported efficiently on some hardware.
 								//
 								// We use small u16s here to save space in the hashtable. They get upcasted
 								// to u64s when we actually use them.
 								type Fraction = (u16, u16); // (numerator, denominator)
 								// multiplication by a fraction, in a way that won't generally overflow for
 								// array sizes outside a factor of 10 of U64_MAX.
 								fn fraction_mul(lhs: uint, (num, den): Fraction) -> uint {
 								    (((lhs as u64) * (num as u64)) / (den as u64)) as uint
 								}
 								static INITIAL_LOG2_CAP: uint = 5;
 								static INITIAL_CAPACITY: uint = 1 << INITIAL_LOG2_CAP; // 2^5
 								static INITIAL_LOAD_FACTOR: Fraction = (9, 10);
 								// The main performance trick in this hashmap is called Robin Hood Hashing.
 								// It gains its excellent performance from one key invariant:
 								//
 								//    If an insertion collides with an existing element, and that elements
 								//    "probe distance" (how far away the element is from its ideal location)
 								//    is higher than how far we've already probed, swap the elements.
 								//
 								// This massively lowers variance in probe distance, and allows us to get very
 								// high load factors with good performance. The 90% load factor I use is rather
 								// conservative.
 								//
 								// > Why a load factor of 90%?
 								//
 								// In general, all the distances to inital buckets will converge on the mean.
 								// At a load factor of α, the odds of finding the target bucket after k
 								// probes is approximately 1-α^k. If we set this equal to 50% (since we converge
 								// on the mean) and set k=8 (64-byte cache line / 8-byte hash), α=0.92. I round
 								// this down to 0.90 to make the math easier on the CPU and avoid its FPU.
 								// Since on average we start the probing in the middle of a cache line, this
 								// strategy pulls in two cache lines of hashes on every lookup. I think that's
 								// pretty good, but if you want to trade off some space, it could go down to one
 								// cache line on average with an α of 0.84.
 								//
 								// > Wait, what? Where did you get 1-α^k from?
 								//
 								// On the first probe, your odds of a collision with an existing element is α.
 								// The odds of doing this twice in a row is approximatelly α^2. For three times,
 								// α^3, etc. Therefore, the odds of colliding k times is α^k. The odds of NOT
 								// colliding after k tries is 1-α^k.
 								//
 								// Future Improvements (FIXME!)
 								// ============================
 								//
 								// Allow the load factor to be changed dynamically and/or at initialization.
 								// I'm having trouble figuring out a sane API for this without exporting my
 								// hackish fraction type, while still avoiding floating point.
 								//
 								// Also, would it be possible for us to reuse storage when growing the
 								// underlying table? This is exactly the use case for 'realloc', and may
 								// be worth exploring.
 								//
 								// Future Optimizations (FIXME!)
 								// =============================
 								//
 								// The paper cited below mentions an implementation which keeps track of the
 								// distance-to-initial-bucket histogram. I'm suspicious of this approach because
 								// it requires maintaining an internal map. If this map were replaced with a
 								// hashmap, it would be faster, but now our data structure is self-referential
 								// and blows up. Also, this allows very good first guesses, but array accesses
 								// are no longer linear and in one direction, as we have now. There is also
 								// memory and cache pressure that this map would entail that would be very
 								// difficult to properly see in a microbenchmark.
 								//
 								// Another possible design choice that I made without any real reason is
 								// parameterizing the raw table over keys and values. Technically, all we need
 								// is the size and alignment of keys and values, and the code should be just as
 								// efficient (well, we might need one for power-of-two size and one for not...).
 								// This has the potential to reduce code bloat in rust executables, without
 								// really losing anything except 4 words (key size, key alignment, val size,
 								// val alignment) which can be passed in to every call of a `RawTable` function.
 								// This would definitely be an avenue worth exploring if people start complaining
 								// about the size of rust executables.
 								//
 								// There's also two optimizations that have been omitted regarding how the
 								// hashtable allocates. The first is that a hashtable which never has an element
 								// inserted should not allocate. I'm suspicious of this one, because supporting
 								// that internally gains no performance over just using an
 								// `Option<HashMap<K, V>>`, and is significantly more complicated.
 								//
 								// The second omitted allocation optimization is that right now we allocate three
 								// arrays to back the hashtable. This is wasteful. In theory, we only need one
 								// array, and each of the three original arrays can just be slices of it. This
 								// would reduce the pressure on the allocator, and will play much nicer with the
 								// rest of the system. An initial implementation is commented out in
 								// `table::RawTable::new`, but I'm not confident it works for all sane alignments,
 								// especially if a type needs more alignment than `malloc` provides.
-												collections: move hashmap's example to the struct.

Most people go straight to the struct, not looking at the module, so the
example was well hidden.

											
										
										
											2014-03-13 22:31:56 +11:00
+								/// A hash map implementation which uses linear probing with Robin
 								/// Hood bucket stealing.
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								///
 								/// The hashes are all keyed by the task-local random number generator
-												collections: move hashmap's example to the struct.

Most people go straight to the struct, not looking at the module, so the
example was well hidden.

											
										
										
											2014-03-13 22:31:56 +11:00
+								/// on creation by default, this means the ordering of the keys is
 								/// randomized, but makes the tables more resistant to
 								/// denial-of-service attacks (Hash DoS). This behaviour can be
 								/// overriden with one of the constructors.
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								///
 								/// It is required that the keys implement the `Eq` and `Hash` traits, although
 								/// this can frequently be achieved by using `#[deriving(Eq, Hash)]`.
-												collections: move hashmap's example to the struct.

Most people go straight to the struct, not looking at the module, so the
example was well hidden.

											
										
										
											2014-03-13 22:31:56 +11:00
+								///
 								/// Relevant papers/articles:
 								///
 								/// 1. Pedro Celis. ["Robin Hood Hashing"](https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf)
 								/// 2. Emmanuel Goossaert. ["Robin Hood
 								///    hashing"](http://codecapsule.com/2013/11/11/robin-hood-hashing/)
 								/// 3. Emmanuel Goossaert. ["Robin Hood hashing: backward shift
 								///    deletion"](http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/)
 								///
 								/// # Example
 								///
 								/// ```rust
 								/// use collections::HashMap;
 								///
 								/// // type inference lets us omit an explicit type signature (which
 								/// // would be `HashMap<&str, &str>` in this example).
 								/// let mut book_reviews = HashMap::new();
 								///
 								/// // review some books.
 								/// book_reviews.insert("Adventures of Hucklebury Fin",      "My favorite book.");
 								/// book_reviews.insert("Grimms' Fairy Tales",               "Masterpiece.");
 								/// book_reviews.insert("Pride and Prejudice",               "Very enjoyable.");
 								/// book_reviews.insert("The Adventures of Sherlock Holmes", "Eye lyked it alot.");
 								///
 								/// // check for a specific one.
 								/// if !book_reviews.contains_key(& &"Les Misérables") {
 								///     println!("We've got {} reviews, but Les Misérables ain't one.",
 								///              book_reviews.len());
 								/// }
 								///
 								/// // oops, this review has a lot of spelling mistakes, let's delete it.
 								/// book_reviews.remove(& &"The Adventures of Sherlock Holmes");
 								///
 								/// // look up the values associated with some keys.
 								/// let to_find = ["Pride and Prejudice", "Alice's Adventure in Wonderland"];
 								/// for book in to_find.iter() {
 								///     match book_reviews.find(book) {
 								///         Some(review) => println!("{}: {}", *book, *review),
 								///         None => println!("{} is unreviewed.", *book)
 								///     }
 								/// }
 								///
 								/// // iterate over everything.
 								/// for (book, review) in book_reviews.iter() {
 								///     println!("{}: \"{}\"", *book, *review);
 								/// }
 								/// ```
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								#[deriving(Clone)]
 								pub struct HashMap<K, V, H = sip::SipHasher> {
 								    // All hashes are keyed on these values, to prevent hash collision attacks.
 								    priv hasher: H,
 								    // When size == grow_at, we double the capacity.
 								    priv grow_at: uint,
 								    // The capacity must never drop below this.
 								    priv minimum_capacity: uint,
 								    priv table: table::RawTable<K, V>,
 								    // We keep this at the end since it's 4-bytes, unlike everything else
 								    // in this struct. Might as well save a word of padding!
 								    priv load_factor: Fraction,
 								}
 								/// Get the number of elements which will force the capacity to grow.
 								fn grow_at(capacity: uint, load_factor: Fraction) -> uint {
 								    fraction_mul(capacity, load_factor)
 								}
-												use TotalEq for HashMap

Closes #5283

											
										
										
											2014-03-22 16:30:45 -04:00
+								impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    /// Get the number of elements which will force the capacity to shrink.
 								    /// When size == self.shrink_at(), we halve the capacity.
 								    fn shrink_at(&self) -> uint {
 								        self.table.capacity() >> 2
 								    }
 								    // Probe the `idx`th bucket for a given hash, returning the index of the
 								    // target bucket.
 								    //
 								    // This exploits the power-of-two size of the hashtable. As long as this
 								    // is always true, we can use a bitmask of cap-1 to do modular arithmetic.
 								    //
 								    // Prefer to use this with increasing values of `idx` rather than repeatedly
 								    // calling `probe_next`. This reduces data-dependencies between loops, which
 								    // can help the optimizer, and certainly won't hurt it. `probe_next` is
 								    // simply for convenience, and is no more efficient than `probe`.
 								    fn probe(&self, hash: &table::SafeHash, idx: uint) -> uint {
 								        let hash_mask = self.table.capacity() - 1;
 								        // So I heard a rumor that unsigned overflow is safe in rust..
 								        ((hash.inspect() as uint) + idx) & hash_mask
 								    }
 								    // Generate the next probe in a sequence. Prefer to use 'probe' by itself,
 								    // but this can sometimes be useful.
 								    fn probe_next(&self, probe: uint) -> uint {
 								        let hash_mask = self.table.capacity() - 1;
 								        (probe + 1) & hash_mask
 								    }
 								    fn make_hash<X: Hash<S>>(&self, x: &X) -> table::SafeHash {
 								        table::make_hash(&self.hasher, x)
 								    }
 								    /// Get the distance of the bucket at the given index that it lies
 								    /// from its 'ideal' location.
 								    ///
 								    /// In the cited blog posts above, this is called the "distance to
 								    /// inital bucket", or DIB.
 								    fn bucket_distance(&self, index_of_elem: &table::FullIndex) -> uint {
 								        // where the hash of the element that happens to reside at
 								        // `index_of_elem` tried to place itself first.
 								        let first_probe_index = self.probe(&index_of_elem.hash(), 0);
 								        let raw_index = index_of_elem.raw_index();
 								        if first_probe_index <= raw_index {
 								             // probe just went forward
 								            raw_index - first_probe_index
 								        } else {
 								            // probe wrapped around the hashtable
 								            raw_index + (self.table.capacity() - first_probe_index)
 								        }
 								    }
 								    /// Search for a pre-hashed key.
 								    fn search_hashed_generic(&self, hash: &table::SafeHash, is_match: |&K| -> bool)
 								        -> Option<table::FullIndex> {
 								        for num_probes in range(0u, self.table.size()) {
 								            let probe = self.probe(hash, num_probes);
 								            let idx = match self.table.peek(probe) {
 								                table::Empty(_)  => return None, // hit an empty bucket
 								                table::Full(idx) => idx
 								            };
 								            // We can finish the search early if we hit any bucket
 								            // with a lower distance to initial bucket than we've probed.
 								            if self.bucket_distance(&idx) < num_probes { return None }
 								            // If the hash doesn't match, it can't be this one..
 								            if hash != &idx.hash() { continue }
 								            let (k, _) = self.table.read(&idx);
 								            // If the key doesn't match, it can't be this one..
 								            if !is_match(k) { continue }
 								            return Some(idx);
 								        }
 								        return None
 								    }
 								    fn search_hashed(&self, hash: &table::SafeHash, k: &K) -> Option<table::FullIndex> {
 								        self.search_hashed_generic(hash, |k_| *k == *k_)
 								    }
 								    fn search_equiv<Q: Hash<S> + Equiv<K>>(&self, q: &Q) -> Option<table::FullIndex> {
 								        self.search_hashed_generic(&self.make_hash(q), |k| q.equiv(k))
 								    }
 								    /// Search for a key, yielding the index if it's found in the hashtable.
 								    /// If you already have the hash for the key lying around, use
 								    /// search_hashed.
 								    fn search(&self, k: &K) -> Option<table::FullIndex> {
 								        self.search_hashed(&self.make_hash(k), k)
-												wip: add a sendable hashmap to stdlib

caveats:
- not efficient nor heavily tested
- API subject to change and currently incompatible with map

											
										
										
											2012-07-26 16:09:22 -07:00
+								    }
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								}
-												use TotalEq for HashMap

Closes #5283

											
										
										
											2014-03-22 16:30:45 -04:00
+								impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> Container for HashMap<K, V, H> {
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    /// Return the number of elements in the map
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    fn len(&self) -> uint { self.table.size() }
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								}
-												add a base Container trait

											
										
										
											2013-01-21 21:59:19 -05:00
-												use TotalEq for HashMap

Closes #5283

											
										
										
											2014-03-22 16:30:45 -04:00
+								impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> Mutable for HashMap<K, V, H> {
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    /// Clear the map, removing all key-value pairs.
 								    fn clear(&mut self) {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        self.minimum_capacity = self.table.size();
 								        for i in range(0, self.table.capacity()) {
 								            match self.table.peek(i) {
 								                table::Empty(_)  => {},
 								                table::Full(idx) => { self.table.take(idx); }
 								            }
-												add a Mutable container trait with clear

											
										
										
											2013-01-21 17:25:57 -05:00
+								        }
 								    }
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								}
-												add a Mutable container trait with clear

											
										
										
											2013-01-21 17:25:57 -05:00
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
-												use TotalEq for HashMap

Closes #5283

											
										
										
											2014-03-22 16:30:45 -04:00
+								impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> Map<K, V> for HashMap<K, V, H> {
-												core: changes in response to #5656

											
										
										
											2013-04-10 13:11:35 -07:00
+								    fn find<'a>(&'a self, k: &K) -> Option<&'a V> {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        self.search(k).map(|idx| {
 								            let (_, v) = self.table.read(&idx);
 								            v
 								        })
 								    }
 								    fn contains_key(&self, k: &K) -> bool {
 								        self.search(k).is_some()
-												core: changes in response to #5656

											
										
										
											2013-04-10 13:11:35 -07:00
+								    }
-												Split mutable methods out of Set and Map

Fixes most of #4989. I didn't add Persistent{Set,Map} since the only
persistent data structure is fun_treemap and its functionality is
currently too limited to build a trait out of.

											
										
										
											2013-07-13 19:44:36 -07:00
+								}
-												core: changes in response to #5656

											
										
										
											2013-04-10 13:11:35 -07:00
-												use TotalEq for HashMap

Closes #5283

											
										
										
											2014-03-22 16:30:45 -04:00
+								impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> MutableMap<K, V> for HashMap<K, V, H> {
-												remove borrowck workarounds from the containers

											
										
										
											2013-05-06 23:29:54 -04:00
+								    fn find_mut<'a>(&'a mut self, k: &K) -> Option<&'a mut V> {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        match self.search(k) {
 								            None => None,
 								            Some(idx) => {
 								                let (_, v) = self.table.read_mut(&idx);
 								                Some(v)
 								            }
 								        }
-												remove borrowck workarounds from the containers

											
										
										
											2013-05-06 23:29:54 -04:00
+								    }
-												Add pop() and swap() to the Map trait

											
										
										
											2013-05-04 09:54:58 -04:00
+								    fn swap(&mut self, k: K, v: V) -> Option<V> {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        let hash = self.make_hash(&k);
 								        let potential_new_size = self.table.size() + 1;
 								        self.make_some_room(potential_new_size);
 								        for dib in range_inclusive(0u, self.table.size()) {
 								            let probe = self.probe(&hash, dib);
 								            let idx = match self.table.peek(probe) {
 								                table::Empty(idx) => {
 								                    // Found a hole!
 								                    self.table.put(idx, hash, k, v);
 								                    return None;
 								                },
 								                table::Full(idx) => idx
 								            };
 								            if idx.hash() == hash {
 								                let (bucket_k, bucket_v) = self.table.read_mut(&idx);
 								                if k == *bucket_k {
 								                    // Found an existing value.
 								                    return Some(replace(bucket_v, v));
 								                }
 								            }
-												Add pop() and swap() to the Map trait

											
										
										
											2013-05-04 09:54:58 -04:00
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								            let probe_dib = self.bucket_distance(&idx);
 								            if probe_dib < dib {
 								                // Found a luckier bucket. This implies that the key does not
 								                // already exist in the hashtable. Just do a robin hood
 								                // insertion, then.
 								                self.robin_hood(idx, probe_dib, hash, k, v);
 								                return None;
 								            }
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        }
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        // We really shouldn't be here.
 								        fail!("Internal HashMap error: Out of space.");
-												add a container::Map trait

											
										
										
											2013-01-21 18:22:03 -05:00
+								    }
-												wip: add a sendable hashmap to stdlib

caveats:
- not efficient nor heavily tested
- API subject to change and currently incompatible with map

											
										
										
											2012-07-26 16:09:22 -07:00
-												Add pop() and swap() to the Map trait

											
										
										
											2013-05-04 09:54:58 -04:00
+								    fn pop(&mut self, k: &K) -> Option<V> {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        if self.table.size() == 0 {
 								            return None
 								        }
 								        let potential_new_size = self.table.size() - 1;
 								        self.make_some_room(potential_new_size);
 								        let starting_index = match self.search(k) {
 								            Some(idx) => idx,
 								            None      => return None,
 								        };
 								        let starting_probe = starting_index.raw_index();
 								        let ending_probe = {
 								            let mut probe = self.probe_next(starting_probe);
 								            for _ in range(0u, self.table.size()) {
 								                match self.table.peek(probe) {
 								                    table::Empty(_) => {}, // empty bucket. this is the end of our shifting.
 								                    table::Full(idx) => {
 								                        // Bucket that isn't us, which has a non-zero probe distance.
 								                        // This isn't the ending index, so keep searching.
 								                        if self.bucket_distance(&idx) != 0 {
 								                            probe = self.probe_next(probe);
 								                            continue;
 								                        }
 								                        // if we do have a bucket_distance of zero, we're at the end
 								                        // of what we need to shift.
 								                    }
 								                }
 								                break;
 								            }
 								            probe
 								        };
 								        let (_, _, retval) = self.table.take(starting_index);
 								        let mut      probe = starting_probe;
 								        let mut next_probe = self.probe_next(probe);
 								        // backwards-shift all the elements after our newly-deleted one.
 								        while next_probe != ending_probe {
 								            match self.table.peek(next_probe) {
 								                table::Empty(_) => {
 								                    // nothing to shift in. just empty it out.
 								                    match self.table.peek(probe) {
 								                        table::Empty(_) => {},
 								                        table::Full(idx) => { self.table.take(idx); }
 								                    }
 								                },
 								                table::Full(next_idx) => {
 								                    // something to shift. move it over!
 								                    let next_hash = next_idx.hash();
 								                    let (_, next_key, next_val) = self.table.take(next_idx);
 								                    match self.table.peek(probe) {
 								                        table::Empty(idx) => {
 								                            self.table.put(idx, next_hash, next_key, next_val);
 								                        },
 								                        table::Full(idx) => {
 								                            let (emptyidx, _, _) = self.table.take(idx);
 								                            self.table.put(emptyidx, next_hash, next_key, next_val);
 								                        }
 								                    }
 								                }
 								            }
 								            probe = next_probe;
 								            next_probe = self.probe_next(next_probe);
 								        }
 								        // Done the backwards shift, but there's still an element left!
 								        // Empty it out.
 								        match self.table.peek(probe) {
 								            table::Empty(_) => {},
 								            table::Full(idx) => { self.table.take(idx); }
 								        }
 								        // Now we're done all our shifting. Return the value we grabbed
 								        // earlier.
 								        return Some(retval);
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    }
 								}
-												core: add LinearMap::with_capacity

											
										
										
											2013-03-28 20:30:50 -07:00
-												use TotalEq for HashMap

Closes #5283

											
										
										
											2014-03-22 16:30:45 -04:00
+								impl<K: Hash + TotalEq, V> HashMap<K, V, sip::SipHasher> {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    /// Create an empty HashMap.
 								    pub fn new() -> HashMap<K, V, sip::SipHasher> {
-												rename Linear{Map,Set} => Hash{Map,Set}

											
										
										
											2013-04-03 09:28:36 -04:00
+								        HashMap::with_capacity(INITIAL_CAPACITY)
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    }
-												switch LinearMap to current constructor convention

											
										
										
											2013-01-23 17:06:32 -05:00
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    pub fn with_capacity(capacity: uint) -> HashMap<K, V, sip::SipHasher> {
-												std: expose the keyed HashMap constructor, for runtime-less use.

The `new` constructor uses the task-local RNG to retrieve seeds for the
two key values, which requires the runtime. Exposing a constructor that
takes the keys directly allows HashMaps to be used in programs that wish
to avoid the runtime.

											
										
										
											2013-08-01 18:11:25 +10:00
+								        let mut r = rand::task_rng();
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        let r0 = r.gen();
 								        let r1 = r.gen();
 								        let hasher = sip::SipHasher::new_with_keys(r0, r1);
-												collections: Correct with_capacity_and_hasher

The arguments were accidentally swapped in the wrong order.

Closes #12743

											
										
										
											2014-03-06 18:07:52 -08:00
+								        HashMap::with_capacity_and_hasher(capacity, hasher)
-												collections: allow `HashMap` to work with generic hashers

											
										
										
											2014-02-25 08:04:38 -08:00
+								    }
 								}
-												use TotalEq for HashMap

Closes #5283

											
										
										
											2014-03-22 16:30:45 -04:00
+								impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
-												collections: allow `HashMap` to work with generic hashers

											
										
										
											2014-02-25 08:04:38 -08:00
+								    pub fn with_hasher(hasher: H) -> HashMap<K, V, H> {
-												collections: Correct with_capacity_and_hasher

The arguments were accidentally swapped in the wrong order.

Closes #12743

											
										
										
											2014-03-06 18:07:52 -08:00
+								        HashMap::with_capacity_and_hasher(INITIAL_CAPACITY, hasher)
-												std: expose the keyed HashMap constructor, for runtime-less use.

The `new` constructor uses the task-local RNG to retrieve seeds for the
two key values, which requires the runtime. Exposing a constructor that
takes the keys directly allows HashMaps to be used in programs that wish
to avoid the runtime.

											
										
										
											2013-08-01 18:11:25 +10:00
+								    }
 								    /// Create an empty HashMap with space for at least `capacity`
-												collections: allow `HashMap` to work with generic hashers

											
										
										
											2014-02-25 08:04:38 -08:00
+								    /// elements, using `hasher` to hash the keys.
-												std: expose the keyed HashMap constructor, for runtime-less use.

The `new` constructor uses the task-local RNG to retrieve seeds for the
two key values, which requires the runtime. Exposing a constructor that
takes the keys directly allows HashMaps to be used in programs that wish
to avoid the runtime.

											
										
										
											2013-08-01 18:11:25 +10:00
+								    ///
-												collections: allow `HashMap` to work with generic hashers

											
										
										
											2014-02-25 08:04:38 -08:00
+								    /// Warning: `hasher` is normally randomly generated, and
 								    /// is designed to allow HashMaps to be resistant to attacks that
 								    /// cause many collisions and very poor performance. Setting it
-												std: expose the keyed HashMap constructor, for runtime-less use.

The `new` constructor uses the task-local RNG to retrieve seeds for the
two key values, which requires the runtime. Exposing a constructor that
takes the keys directly allows HashMaps to be used in programs that wish
to avoid the runtime.

											
										
										
											2013-08-01 18:11:25 +10:00
+								    /// manually using this function can expose a DoS attack vector.
-												collections: Correct with_capacity_and_hasher

The arguments were accidentally swapped in the wrong order.

Closes #12743

											
										
										
											2014-03-06 18:07:52 -08:00
+								    pub fn with_capacity_and_hasher(capacity: uint, hasher: H) -> HashMap<K, V, H> {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        let cap = num::next_power_of_two(max(INITIAL_CAPACITY, capacity));
-												std: expose the keyed HashMap constructor, for runtime-less use.

The `new` constructor uses the task-local RNG to retrieve seeds for the
two key values, which requires the runtime. Exposing a constructor that
takes the keys directly allows HashMaps to be used in programs that wish
to avoid the runtime.

											
										
										
											2013-08-01 18:11:25 +10:00
+								        HashMap {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								            hasher:           hasher,
 								            load_factor:      INITIAL_LOAD_FACTOR,
 								            grow_at:          grow_at(cap, INITIAL_LOAD_FACTOR),
 								            minimum_capacity: cap,
 								            table:            table::RawTable::new(cap),
 								        }
 								    }
 								    /// The hashtable will never try to shrink below this size. You can use
 								    /// this function to reduce reallocations if your hashtable frequently
 								    /// grows and shrinks by large amounts.
 								    ///
 								    /// This function has no effect on the operational semantics of the
 								    /// hashtable, only on performance.
 								    pub fn reserve(&mut self, new_minimum_capacity: uint) {
 								        let cap = num::next_power_of_two(
 								            max(INITIAL_CAPACITY, new_minimum_capacity));
 								        self.minimum_capacity = cap;
 								        if self.table.capacity() < cap {
 								            self.resize(cap);
 								        }
 								    }
 								    /// Resizes the internal vectors to a new capacity. It's your responsibility to:
 								    ///   1) Make sure the new capacity is enough for all the elements, accounting
 								    ///      for the load factor.
 								    ///   2) Ensure new_capacity is a power of two.
 								    fn resize(&mut self, new_capacity: uint) {
 								        assert!(self.table.size() <= new_capacity);
 								        assert!((new_capacity - 1) & new_capacity == 0);
 								        self.grow_at = grow_at(new_capacity, self.load_factor);
 								        let old_table = replace(&mut self.table, table::RawTable::new(new_capacity));
 								        let old_size  = old_table.size();
 								        for (h, k, v) in old_table.move_iter() {
 								            self.manual_insert_hashed_nocheck(h, k, v);
 								        }
 								        assert_eq!(self.table.size(), old_size);
 								    }
 								    /// Performs any necessary resize operations, such that there's space for
 								    /// new_size elements.
 								    fn make_some_room(&mut self, new_size: uint) {
 								        let should_shrink = new_size <= self.shrink_at();
 								        let should_grow   = self.grow_at <= new_size;
 								        if should_grow {
 								            let new_capacity = self.table.capacity() << 1;
 								            self.resize(new_capacity);
 								        } else if should_shrink {
 								            let new_capacity = self.table.capacity() >> 1;
 								            // Never shrink below the minimum capacity
 								            if self.minimum_capacity <= new_capacity {
 								                self.resize(new_capacity);
 								            }
 								        }
 								    }
 								    /// Perform robin hood bucket stealing at the given 'index'. You must
 								    /// also pass that probe's "distance to initial bucket" so we don't have
 								    /// to recalculate it, as well as the total number of probes already done
 								    /// so we have some sort of upper bound on the number of probes to do.
 								    ///
 								    /// 'hash', 'k', and 'v' are the elements to robin hood into the hashtable.
-												collections: Don't recurse in hashmap robin_hood

This switches a "tail call" to a manual loop to get around LLVM not optimizing
to a tail call.

Close #12860

											
										
										
											2014-03-13 09:46:38 -07:00
+								    fn robin_hood(&mut self, mut index: table::FullIndex, mut dib_param: uint,
 								                  mut hash: table::SafeHash, mut k: K, mut v: V) {
 								        'outer: loop {
 								            let (old_hash, old_key, old_val) = {
 								                let (old_hash_ref, old_key_ref, old_val_ref) =
 								                        self.table.read_all_mut(&index);
 								                let old_hash = replace(old_hash_ref, hash);
 								                let old_key  = replace(old_key_ref,  k);
 								                let old_val  = replace(old_val_ref,  v);
 								                (old_hash, old_key, old_val)
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								            };
-												collections: Don't recurse in hashmap robin_hood

This switches a "tail call" to a manual loop to get around LLVM not optimizing
to a tail call.

Close #12860

											
										
										
											2014-03-13 09:46:38 -07:00
+								            let mut probe = self.probe_next(index.raw_index());
 								            for dib in range(dib_param + 1, self.table.size()) {
 								                let full_index = match self.table.peek(probe) {
 								                    table::Empty(idx) => {
 								                        // Finally. A hole!
 								                        self.table.put(idx, old_hash, old_key, old_val);
 								                        return;
 								                    },
 								                    table::Full(idx) => idx
 								                };
 								                let probe_dib = self.bucket_distance(&full_index);
 								                // Robin hood! Steal the spot.
 								                if probe_dib < dib {
 								                    index = full_index;
 								                    dib_param = probe_dib;
 								                    hash = old_hash;
 								                    k = old_key;
 								                    v = old_val;
 								                    continue 'outer;
 								                }
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
-												collections: Don't recurse in hashmap robin_hood

This switches a "tail call" to a manual loop to get around LLVM not optimizing
to a tail call.

Close #12860

											
										
										
											2014-03-13 09:46:38 -07:00
+								                probe = self.probe_next(probe);
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								            }
-												collections: Don't recurse in hashmap robin_hood

This switches a "tail call" to a manual loop to get around LLVM not optimizing
to a tail call.

Close #12860

											
										
										
											2014-03-13 09:46:38 -07:00
+								            fail!("HashMap fatal error: 100% load factor?");
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        }
 								    }
 								    /// Manually insert a pre-hashed key-value pair, without first checking
 								    /// that there's enough room in the buckets. Returns a reference to the
 								    /// newly insert value.
 								    ///
 								    /// If the key already exists, the hashtable will be returned untouched
 								    /// and a reference to the existing element will be returned.
 								    fn manual_insert_hashed_nocheck<'a>(
 								        &'a mut self, hash: table::SafeHash, k: K, v: V) -> &'a mut V {
 								        for dib in range_inclusive(0u, self.table.size()) {
 								            let probe = self.probe(&hash, dib);
 								            let idx = match self.table.peek(probe) {
 								                table::Empty(idx) => {
 								                    // Found a hole!
 								                    let fullidx  = self.table.put(idx, hash, k, v);
 								                    let (_, val) = self.table.read_mut(&fullidx);
 								                    return val;
 								                },
 								                table::Full(idx) => idx
 								            };
 								            if idx.hash() == hash {
 								                let (bucket_k, bucket_v) = self.table.read_mut(&idx);
 								                // FIXME #12147 the conditional return confuses
 								                // borrowck if we return bucket_v directly
 								                let bv: *mut V = bucket_v;
 								                if k == *bucket_k {
 								                    // Key already exists. Get its reference.
 								                    return unsafe {&mut *bv};
 								                }
 								            }
 								            let probe_dib = self.bucket_distance(&idx);
 								            if  probe_dib < dib {
 								                // Found a luckier bucket than me. Better steal his spot.
 								                self.robin_hood(idx, probe_dib, hash, k, v);
 								                // Now that it's stolen, just read the value's pointer
 								                // right out of the table!
 								                match self.table.peek(probe) {
 								                    table::Empty(_)  => fail!("Just stole a spot, but now that spot's empty."),
 								                    table::Full(idx) => {
 								                        let (_, v) = self.table.read_mut(&idx);
 								                        return v;
 								                    }
 								                }
 								            }
-												std: expose the keyed HashMap constructor, for runtime-less use.

The `new` constructor uses the task-local RNG to retrieve seeds for the
two key values, which requires the runtime. Exposing a constructor that
takes the keys directly allows HashMaps to be used in programs that wish
to avoid the runtime.

											
										
										
											2013-08-01 18:11:25 +10:00
+								        }
-												add reserve_at_least method to LinearMap/LinearSet

											
										
										
											2013-02-15 01:35:15 -05:00
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        // We really shouldn't be here.
 								        fail!("Internal HashMap error: Out of space.");
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    }
-												wip: add a sendable hashmap to stdlib

caveats:
- not efficient nor heavily tested
- API subject to change and currently incompatible with map

											
										
										
											2012-07-26 16:09:22 -07:00
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    fn manual_insert_hashed<'a>(&'a mut self, hash: table::SafeHash, k: K, v: V) -> &'a mut V {
 								        let potential_new_size = self.table.size() + 1;
 								        self.make_some_room(potential_new_size);
 								        self.manual_insert_hashed_nocheck(hash, k, v)
 								    }
-												remove borrowck workarounds from the containers

											
										
										
											2013-05-06 23:29:54 -04:00
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    /// Inserts an element, returning a reference to that element inside the
 								    /// hashtable.
 								    fn manual_insert<'a>(&'a mut self, k: K, v: V) -> &'a mut V {
 								        let hash = self.make_hash(&k);
 								        self.manual_insert_hashed(hash, k, v)
-												Refactor some hashmap code into a new private function mangle()

Add new private hashmap function

    fn mangle(&mut self,
              k: K,
              not_found: &fn(&K) -> V,
              found: &fn(&K, &mut V)) -> uint

Rewrite find_or_insert() and find_or_insert_with() on top of mangle().

Also take the opportunity to change the return type of find_or_insert()
and find_or_insert_with() to &'a mut V. This fixes #6394.

											
										
										
											2013-05-31 18:48:06 -07:00
+								    }
 								    /// Return the value corresponding to the key in the map, or insert
 								    /// and return the value if it doesn't exist.
 								    pub fn find_or_insert<'a>(&'a mut self, k: K, v: V) -> &'a mut V {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        match self.search(&k) {
 								            Some(idx) => {
 								                let (_, v_ref) = self.table.read_mut(&idx);
 								                v_ref
 								            },
 								            None => self.manual_insert(k, v)
 								        }
-												remove borrowck workarounds from the containers

											
										
										
											2013-05-06 23:29:54 -04:00
+								    }
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    /// Return the value corresponding to the key in the map, or create,
 								    /// insert, and return a new value if it doesn't exist.
-												libstd: Change all uses of `&fn(A)->B` over to `|A|->B` in libstd

											
										
										
											2013-11-18 21:15:42 -08:00
+								    pub fn find_or_insert_with<'a>(&'a mut self, k: K, f: |&K| -> V)
-												Refactor some hashmap code into a new private function mangle()

Add new private hashmap function

    fn mangle(&mut self,
              k: K,
              not_found: &fn(&K) -> V,
              found: &fn(&K, &mut V)) -> uint

Rewrite find_or_insert() and find_or_insert_with() on top of mangle().

Also take the opportunity to change the return type of find_or_insert()
and find_or_insert_with() to &'a mut V. This fixes #6394.

											
										
										
											2013-05-31 18:48:06 -07:00
+								                               -> &'a mut V {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        match self.search(&k) {
 								            Some(idx) => {
 								                let (_, v_ref) = self.table.read_mut(&idx);
 								                v_ref
 								            },
 								            None      => {
 								                let v = f(&k);
 								                self.manual_insert(k, v)
 								            }
 								        }
-												remove borrowck workarounds from the containers

											
										
										
											2013-05-06 23:29:54 -04:00
+								    }
-												Add new function hashmap.insert_or_update_with()

fn insert_or_update_with<'a>(&'a mut self,
                             k: K,
                             f: &fn(&K, &mut V)) -> &'a V

											
										
										
											2013-05-31 18:50:20 -07:00
+								    /// Insert a key-value pair into the map if the key is not already present.
 								    /// Otherwise, modify the existing value for the key.
 								    /// Returns the new or modified value for the key.
-												libstd: Change all uses of `&fn(A)->B` over to `|A|->B` in libstd

											
										
										
											2013-11-18 21:15:42 -08:00
+								    pub fn insert_or_update_with<'a>(
 								                                 &'a mut self,
 								                                 k: K,
 								                                 v: V,
 								                                 f: |&K, &mut V|)
 								                                 -> &'a mut V {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        match self.search(&k) {
 								            None      => self.manual_insert(k, v),
 								            Some(idx) => {
 								                let (_, v_ref) = self.table.read_mut(&idx);
 								                f(&k, v_ref);
 								                v_ref
 								            }
 								        }
-												Add new function hashmap.insert_or_update_with()

fn insert_or_update_with<'a>(&'a mut self,
                             k: K,
                             f: &fn(&K, &mut V)) -> &'a V

											
										
										
											2013-05-31 18:50:20 -07:00
+								    }
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    /// Retrieves a value for the given key, failing if the key is not present.
-												Remove all uses of `pub impl`. rs=style

											
										
										
											2013-05-31 15:17:22 -07:00
+								    pub fn get<'a>(&'a self, k: &K) -> &'a V {
-												core: changes in response to #5656

											
										
										
											2013-04-10 13:11:35 -07:00
+								        match self.find(k) {
 								            Some(v) => v,
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								            None => fail!("No entry found for key: {:?}", k)
-												core: changes in response to #5656

											
										
										
											2013-04-10 13:11:35 -07:00
+								        }
 								    }
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    /// Retrieves a (mutable) value for the given key, failing if the key is not present.
-												Add a get_mut method to accompany the get method.
											
										
										
											2013-05-31 20:56:05 +01:00
+								    pub fn get_mut<'a>(&'a mut self, k: &K) -> &'a mut V {
 								        match self.find_mut(k) {
 								            Some(v) => v,
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								            None => fail!("No entry found for key: {:?}", k)
-												Add a get_mut method to accompany the get method.
											
										
										
											2013-05-31 20:56:05 +01:00
+								        }
 								    }
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    /// Return true if the map contains a value for the specified key,
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    /// using equivalence.
 								    pub fn contains_key_equiv<Q: Hash<S> + Equiv<K>>(&self, key: &Q) -> bool {
 								        self.search_equiv(key).is_some()
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    }
-												libcore: Implement an `Equiv` trait and use it on hashmaps.

7.3x speedup in string map search speed on a microbenchmark of pure hashmap
searching against a constant string, due to the lack of allocations.

I ran into a few snags.

1. The way the coherence check is set up, I can't implement `Equiv<@str>` and
   `Equiv<~str>` for `&str` simultaneously.

2. I wanted to implement `Equiv<T>` for all `T:Eq` (i.e. every type can be
   compared to itself if it implements `Eq`), but the coherence check didn't
   like that either.

3. I couldn't add this to the `Map` trait because `LinearMap` needs special
   handling for its `Q` type parameter: it must not only implement `Equiv<T>`
   but also `Hash` and `Eq`.

4. `find_equiv(&&"foo")` doesn't parse, because of the double ampersand. It has
   to be written `find_equiv(& &"foo")`. We can probably just fix this.

Nevertheless, this is a huge win; it should address a major source of
performance problems, including the one here:

http://maniagnosis.crsr.net/2013/02/creating-letterpress-cheating-program.html

											
										
										
											2013-03-04 19:43:14 -08:00
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    /// Return the value corresponding to the key in the map, using
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    /// equivalence.
 								    pub fn find_equiv<'a, Q: Hash<S> + Equiv<K>>(&'a self, k: &Q) -> Option<&'a V> {
 								        match self.search_equiv(k) {
 								            None      => None,
 								            Some(idx) => {
 								                let (_, v_ref) = self.table.read(&idx);
 								                Some(v_ref)
 								            }
-												core: changes in response to #5656

											
										
										
											2013-04-10 13:11:35 -07:00
+								        }
 								    }
-												std::hashmap: Implement external iterator for HashMap and HashSet

											
										
										
											2013-06-21 17:05:05 +02:00
-												Fix issue #11216 - Replace std::hashmap::{each_key, each_value} with iterators.

											
										
										
											2013-12-31 19:09:18 +00:00
+								    /// An iterator visiting all keys in arbitrary order.
 								    /// Iterator element type is &'a K.
-												Rename iterators for consistency

Rename existing iterators to get rid of the Iterator suffix and to
give them names that better describe the things being iterated over.

											
										
										
											2014-01-14 22:32:24 -05:00
+								    pub fn keys<'a>(&'a self) -> Keys<'a, K, V> {
-												Fix issue #11216 - Replace std::hashmap::{each_key, each_value} with iterators.

											
										
										
											2013-12-31 19:09:18 +00:00
+								        self.iter().map(|(k, _v)| k)
-												container: remove internal iterators from Map

the maps are being migrated to external iterators

											
										
										
											2013-06-24 17:45:00 -04:00
+								    }
-												Fix issue #11216 - Replace std::hashmap::{each_key, each_value} with iterators.

											
										
										
											2013-12-31 19:09:18 +00:00
+								    /// An iterator visiting all values in arbitrary order.
 								    /// Iterator element type is &'a V.
-												Rename iterators for consistency

Rename existing iterators to get rid of the Iterator suffix and to
give them names that better describe the things being iterated over.

											
										
										
											2014-01-14 22:32:24 -05:00
+								    pub fn values<'a>(&'a self) -> Values<'a, K, V> {
-												Fix issue #11216 - Replace std::hashmap::{each_key, each_value} with iterators.

											
										
										
											2013-12-31 19:09:18 +00:00
+								        self.iter().map(|(_k, v)| v)
-												container: remove internal iterators from Map

the maps are being migrated to external iterators

											
										
										
											2013-06-24 17:45:00 -04:00
+								    }
-												std::hashmap: Implement external iterator for HashMap and HashSet

											
										
										
											2013-06-21 17:05:05 +02:00
+								    /// An iterator visiting all key-value pairs in arbitrary order.
 								    /// Iterator element type is (&'a K, &'a V).
-												Rename iterators for consistency

Rename existing iterators to get rid of the Iterator suffix and to
give them names that better describe the things being iterated over.

											
										
										
											2014-01-14 22:32:24 -05:00
+								    pub fn iter<'a>(&'a self) -> Entries<'a, K, V> {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        self.table.iter()
-												std::hashmap: Implement external iterator for HashMap and HashSet

											
										
										
											2013-06-21 17:05:05 +02:00
+								    }
 								    /// An iterator visiting all key-value pairs in arbitrary order,
 								    /// with mutable references to the values.
 								    /// Iterator element type is (&'a K, &'a mut V).
-												Rename iterators for consistency

Rename existing iterators to get rid of the Iterator suffix and to
give them names that better describe the things being iterated over.

											
										
										
											2014-01-14 22:32:24 -05:00
+								    pub fn mut_iter<'a>(&'a mut self) -> MutEntries<'a, K, V> {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        self.table.mut_iter()
-												std::hashmap: Implement external iterator for HashMap and HashSet

											
										
										
											2013-06-21 17:05:05 +02:00
+								    }
-												hashmap: Remove .consume() has rename .consume_iter() to .consume()

Updated all users of HashMap, HashSet old .consume() to use .consume()
with a for loop.

Since .consume() takes the map or set by value, it needs awkward
extra code to in librusti's use of @mut HashMap, where the map value can
not be directly moved out.

											
										
										
											2013-07-16 16:39:24 +02:00
 								    /// Creates a consuming iterator, that is, one that moves each key-value
 								    /// pair out of the map in arbitrary order. The map cannot be used after
 								    /// calling this.
-												Rename iterators for consistency

Rename existing iterators to get rid of the Iterator suffix and to
give them names that better describe the things being iterated over.

											
										
										
											2014-01-14 22:32:24 -05:00
+								    pub fn move_iter(self) -> MoveEntries<K, V> {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        self.table.move_iter().map(|(_, k, v)| (k, v))
-												hashmap: Remove .consume() has rename .consume_iter() to .consume()

Updated all users of HashMap, HashSet old .consume() to use .consume()
with a for loop.

Since .consume() takes the map or set by value, it needs awkward
extra code to in librusti's use of @mut HashMap, where the map value can
not be directly moved out.

											
										
										
											2013-07-16 16:39:24 +02:00
+								    }
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								}
-												send_map: each -> each_ref, and add each* copying versions; add 'iterate' test

											
										
										
											2012-07-31 17:11:57 -04:00
-												use TotalEq for HashMap

Closes #5283

											
										
										
											2014-03-22 16:30:45 -04:00
+								impl<K: TotalEq + Hash<S>, V: Clone, S, H: Hasher<S>> HashMap<K, V, H> {
-												add warning for #6248 and remove instances of it

											
										
										
											2013-05-05 12:17:59 -04:00
+								    /// Like `find`, but returns a copy of the value.
-												Remove all uses of `pub impl`. rs=style

											
										
										
											2013-05-31 15:17:22 -07:00
+								    pub fn find_copy(&self, k: &K) -> Option<V> {
-												option: rewrite the API to use composition

											
										
										
											2013-09-20 02:08:47 -04:00
+								        self.find(k).map(|v| (*v).clone())
-												add warning for #6248 and remove instances of it

											
										
										
											2013-05-05 12:17:59 -04:00
+								    }
 								    /// Like `get`, but returns a copy of the value.
-												Remove all uses of `pub impl`. rs=style

											
										
										
											2013-05-31 15:17:22 -07:00
+								    pub fn get_copy(&self, k: &K) -> V {
-												librustc: Remove all uses of "copy".

											
										
										
											2013-07-02 12:47:32 -07:00
+								        (*self.get(k)).clone()
-												add warning for #6248 and remove instances of it

											
										
										
											2013-05-05 12:17:59 -04:00
+								    }
 								}
-												use TotalEq for HashMap

Closes #5283

											
										
										
											2014-03-22 16:30:45 -04:00
+								impl<K: TotalEq + Hash<S>, V: Eq, S, H: Hasher<S>> Eq for HashMap<K, V, H> {
-												collections: allow `HashMap` to work with generic hashers

											
										
										
											2014-02-25 08:04:38 -08:00
+								    fn eq(&self, other: &HashMap<K, V, H>) -> bool {
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        if self.len() != other.len() { return false; }
-												core: add Eq impl to LinearMap.

											
										
										
											2012-12-10 09:00:52 -08:00
-												libstd: Remove all non-`proc` uses of `do` from libstd

											
										
										
											2013-11-20 14:17:12 -08:00
+								        self.iter().all(|(key, value)| {
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								            match other.find(key) {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								                None    => false,
 								                Some(v) => *value == *v
-												core: add Eq impl to LinearMap.

											
										
										
											2012-12-10 09:00:52 -08:00
+								            }
-												libstd: Remove all non-`proc` uses of `do` from libstd

											
										
										
											2013-11-20 14:17:12 -08:00
+								        })
-												core: add Eq impl to LinearMap.

											
										
										
											2012-12-10 09:00:52 -08:00
+								    }
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								}
-												add a LinearSet type (implementing the Set trait)

											
										
										
											2013-01-20 14:18:24 -05:00
-												use TotalEq for HashMap

Closes #5283

											
										
										
											2014-03-22 16:30:45 -04:00
+								impl<K: TotalEq + Hash<S> + Show, V: Show, S, H: Hasher<S>> Show for HashMap<K, V, H> {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 								        try!(write!(f.buf, r"\{"));
 								        for (i, (k, v)) in self.iter().enumerate() {
 								            if i != 0 { try!(write!(f.buf, ", ")); }
 								            try!(write!(f.buf, "{}: {}", *k, *v));
-												librustc: Remove all uses of "copy".

											
										
										
											2013-07-02 12:47:32 -07:00
+								        }
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
 								        write!(f.buf, r"\}")
-												librustc: Remove all uses of "copy".

											
										
										
											2013-07-02 12:47:32 -07:00
+								    }
 								}
-												use TotalEq for HashMap

Closes #5283

											
										
										
											2014-03-22 16:30:45 -04:00
+								impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S> + Default> Default for HashMap<K, V, H> {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    fn default() -> HashMap<K, V, H> {
 								        HashMap::with_capacity_and_hasher(INITIAL_CAPACITY, Default::default())
-												Add some missing Show implementations in libstd

											
										
										
											2014-02-13 06:41:34 +11:00
+								    }
 								}
-												std::hashmap: Implement external iterator for HashMap and HashSet

											
										
										
											2013-06-21 17:05:05 +02:00
+								/// HashMap iterator
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								pub type Entries<'a, K, V> = table::Entries<'a, K, V>;
-												std::hashmap: Implement external iterator for HashMap and HashSet

											
										
										
											2013-06-21 17:05:05 +02:00
 								/// HashMap mutable values iterator
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								pub type MutEntries<'a, K, V> = table::MutEntries<'a, K, V>;
-												std::hashmap: Implement external iterator for HashMap and HashSet

											
										
										
											2013-06-21 17:05:05 +02:00
-												Mass rename of .consume{,_iter}() to .move_iter()

cc #7887

											
										
										
											2013-08-07 19:21:36 -07:00
+								/// HashMap move iterator
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								pub type MoveEntries<K, V> =
 								    iter::Map<'static, (table::SafeHash, K, V), (K, V), table::MoveEntries<K, V>>;
-												std: add consuming iterators for `HashMap` and `HashSet`

											
										
										
											2013-07-15 14:43:16 -04:00
-												Fix issue #11216 - Replace std::hashmap::{each_key, each_value} with iterators.

											
										
										
											2013-12-31 19:09:18 +00:00
+								/// HashMap keys iterator
-												Rename iterators for consistency

Rename existing iterators to get rid of the Iterator suffix and to
give them names that better describe the things being iterated over.

											
										
										
											2014-01-14 22:32:24 -05:00
+								pub type Keys<'a, K, V> =
 								    iter::Map<'static, (&'a K, &'a V), &'a K, Entries<'a, K, V>>;
-												Fix issue #11216 - Replace std::hashmap::{each_key, each_value} with iterators.

											
										
										
											2013-12-31 19:09:18 +00:00
 								/// HashMap values iterator
-												Rename iterators for consistency

Rename existing iterators to get rid of the Iterator suffix and to
give them names that better describe the things being iterated over.

											
										
										
											2014-01-14 22:32:24 -05:00
+								pub type Values<'a, K, V> =
 								    iter::Map<'static, (&'a K, &'a V), &'a V, Entries<'a, K, V>>;
-												Fix issue #11216 - Replace std::hashmap::{each_key, each_value} with iterators.

											
										
										
											2013-12-31 19:09:18 +00:00
-												use TotalEq for HashMap

Closes #5283

											
										
										
											2014-03-22 16:30:45 -04:00
+								impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S> + Default> FromIterator<(K, V)> for HashMap<K, V, H> {
-												collections: allow `HashMap` to work with generic hashers

											
										
										
											2014-02-25 08:04:38 -08:00
+								    fn from_iterator<T: Iterator<(K, V)>>(iter: &mut T) -> HashMap<K, V, H> {
-												Implemented FromIterator for std::hashmap

											
										
										
											2013-07-14 11:26:03 -04:00
+								        let (lower, _) = iter.size_hint();
-												collections: Correct with_capacity_and_hasher

The arguments were accidentally swapped in the wrong order.

Closes #12743

											
										
										
											2014-03-06 18:07:52 -08:00
+								        let mut map = HashMap::with_capacity_and_hasher(lower, Default::default());
-												std: Implement Extendable for hashmap, str and trie

											
										
										
											2013-07-30 02:17:17 +02:00
+								        map.extend(iter);
 								        map
 								    }
 								}
-												Implemented FromIterator for std::hashmap

											
										
										
											2013-07-14 11:26:03 -04:00
-												use TotalEq for HashMap

Closes #5283

											
										
										
											2014-03-22 16:30:45 -04:00
+								impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S> + Default> Extendable<(K, V)> for HashMap<K, V, H> {
-												std: Move the iterator param on FromIterator and Extendable to the method.

If they are on the trait then it is extremely annoying to use them as
generic parameters to a function, e.g. with the iterator param on the trait
itself, if one was to pass an Extendable<int> to a function that filled it
either from a Range or a Map<VecIterator>, one needs to write something
like:

    fn foo<E: Extendable<int, Range<int>> +
              Extendable<int, Map<&'self int, int, VecIterator<int>>>
          (e: &mut E, ...) { ... }

since using a generic, i.e. `foo<E: Extendable<int, I>, I: Iterator<int>>`
means that `foo` takes 2 type parameters, and the caller has to specify them
(which doesn't work anyway, as they'll mismatch with the iterators used in
`foo` itself).

This patch changes it to:

    fn foo<E: Extendable<int>>(e: &mut E, ...) { ... }

											
										
										
											2013-08-13 23:08:14 +10:00
+								    fn extend<T: Iterator<(K, V)>>(&mut self, iter: &mut T) {
-												remove obsolete `foreach` keyword

this has been replaced by `for`

											
										
										
											2013-08-03 12:45:23 -04:00
+								        for (k, v) in *iter {
-												std: Implement Extendable for hashmap, str and trie

											
										
										
											2013-07-30 02:17:17 +02:00
+								            self.insert(k, v);
-												Implemented FromIterator for std::hashmap

											
										
										
											2013-07-14 11:26:03 -04:00
+								        }
 								    }
-												Stripped trailing spaces; Implemented FromIterator for TreeMap and PriorityQueue

											
										
										
											2013-07-14 13:18:50 -04:00
+								}
-												std::hashmap: Implement external iterator for HashMap and HashSet

											
										
										
											2013-06-21 17:05:05 +02:00
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								/// HashSet iterator
 								pub type SetItems<'a, K> =
 								    iter::Map<'static, (&'a K, &'a ()), &'a K, Entries<'a, K, ()>>;
 								/// HashSet move iterator
 								pub type SetMoveItems<K> =
 								    iter::Map<'static, (K, ()), K, MoveEntries<K, ()>>;
-												std: add default implementations to HashMap

											
										
										
											2013-09-09 19:28:05 -07:00
-												Require documentation by default for libstd

Adds documentation for various things that I understand.
Adds #[allow(missing_doc)] for lots of things that I don't understand.

											
										
										
											2013-05-28 16:35:52 -05:00
+								/// An implementation of a hash set using the underlying representation of a
 								/// HashMap where the value is (). As with the `HashMap` type, a `HashSet`
 								/// requires that the elements implement the `Eq` and `Hash` traits.
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								#[deriving(Clone)]
 								pub struct HashSet<T, H = sip::SipHasher> {
-												collections: allow `HashMap` to work with generic hashers

											
										
										
											2014-02-25 08:04:38 -08:00
+								    priv map: HashMap<T, (), H>
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								}
-												add a LinearSet type (implementing the Set trait)

											
										
										
											2013-01-20 14:18:24 -05:00
-												use TotalEq for HashMap

Closes #5283

											
										
										
											2014-03-22 16:30:45 -04:00
+								impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> Eq for HashSet<T, H> {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    // FIXME #11998: Since the value is a (), and `find` returns a Some(&()),
 								    // we trigger #11998 when matching on it. I've fallen back to manual
 								    // iteration until this is fixed.
 								    fn eq(&self, other: &HashSet<T, H>) -> bool {
 								        if self.len() != other.len() { return false; }
 								        self.iter().all(|key| other.map.contains_key(key))
 								    }
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								}
-												improve hashmap/treemap documentation

											
										
										
											2013-01-23 16:47:27 -05:00
-												use TotalEq for HashMap

Closes #5283

											
										
										
											2014-03-22 16:30:45 -04:00
+								impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> Container for HashSet<T, H> {
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    /// Return the number of elements in the set
-												Removing a lot of usage of '&const'

											
										
										
											2013-06-23 20:44:11 -07:00
+								    fn len(&self) -> uint { self.map.len() }
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								}
-												add a Mutable container trait with clear

											
										
										
											2013-01-21 17:25:57 -05:00
-												use TotalEq for HashMap

Closes #5283

											
										
										
											2014-03-22 16:30:45 -04:00
+								impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> Mutable for HashSet<T, H> {
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    /// Clear the set, removing all values.
 								    fn clear(&mut self) { self.map.clear() }
 								}
-												add a LinearSet type (implementing the Set trait)

											
										
										
											2013-01-20 14:18:24 -05:00
-												use TotalEq for HashMap

Closes #5283

											
										
										
											2014-03-22 16:30:45 -04:00
+								impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> Set<T> for HashSet<T, H> {
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    /// Return true if the set contains a value
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    fn contains(&self, value: &T) -> bool { self.map.search(value).is_some() }
-												add a LinearSet type (implementing the Set trait)

											
										
										
											2013-01-20 14:18:24 -05:00
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    /// Return true if the set has no elements in common with `other`.
 								    /// This is equivalent to checking for an empty intersection.
-												collections: allow `HashMap` to work with generic hashers

											
										
										
											2014-02-25 08:04:38 -08:00
+								    fn is_disjoint(&self, other: &HashSet<T, H>) -> bool {
-												std::hashmap: Use .iter() instead of .each and similar

											
										
										
											2013-06-21 17:05:26 +02:00
+								        self.iter().all(|v| !other.contains(v))
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    }
-												add is_subset and is_superset to the Set trait

											
										
										
											2013-01-29 16:07:11 -05:00
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    /// Return true if the set is a subset of another
-												collections: allow `HashMap` to work with generic hashers

											
										
										
											2014-02-25 08:04:38 -08:00
+								    fn is_subset(&self, other: &HashSet<T, H>) -> bool {
-												std::hashmap: Use .iter() instead of .each and similar

											
										
										
											2013-06-21 17:05:26 +02:00
+								        self.iter().all(|v| other.contains(v))
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    }
-												add difference and symmetric_difference to Set

											
										
										
											2013-01-29 19:30:26 -05:00
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    /// Return true if the set is a superset of another
-												collections: allow `HashMap` to work with generic hashers

											
										
										
											2014-02-25 08:04:38 -08:00
+								    fn is_superset(&self, other: &HashSet<T, H>) -> bool {
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        other.is_subset(self)
 								    }
 								}
-												add a LinearSet type (implementing the Set trait)

											
										
										
											2013-01-20 14:18:24 -05:00
-												use TotalEq for HashMap

Closes #5283

											
										
										
											2014-03-22 16:30:45 -04:00
+								impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> MutableSet<T> for HashSet<T, H> {
-												Split mutable methods out of Set and Map

Fixes most of #4989. I didn't add Persistent{Set,Map} since the only
persistent data structure is fun_treemap and its functionality is
currently too limited to build a trait out of.

											
										
										
											2013-07-13 19:44:36 -07:00
+								    /// Add a value to the set. Return true if the value was not already
 								    /// present in the set.
 								    fn insert(&mut self, value: T) -> bool { self.map.insert(value, ()) }
 								    /// Remove a value from the set. Return true if the value was
 								    /// present in the set.
 								    fn remove(&mut self, value: &T) -> bool { self.map.remove(value) }
 								}
-												use TotalEq for HashMap

Closes #5283

											
										
										
											2014-03-22 16:30:45 -04:00
+								impl<T: Hash + TotalEq> HashSet<T, sip::SipHasher> {
-												rename Linear{Map,Set} => Hash{Map,Set}

											
										
										
											2013-04-03 09:28:36 -04:00
+								    /// Create an empty HashSet
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    pub fn new() -> HashSet<T, sip::SipHasher> {
-												rename Linear{Map,Set} => Hash{Map,Set}

											
										
										
											2013-04-03 09:28:36 -04:00
+								        HashSet::with_capacity(INITIAL_CAPACITY)
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    }
-												core: add LinearMap::with_capacity

											
										
										
											2013-03-28 20:30:50 -07:00
-												rename Linear{Map,Set} => Hash{Map,Set}

											
										
										
											2013-04-03 09:28:36 -04:00
+								    /// Create an empty HashSet with space for at least `n` elements in
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    /// the hash table.
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    pub fn with_capacity(capacity: uint) -> HashSet<T, sip::SipHasher> {
-												rename Linear{Map,Set} => Hash{Map,Set}

											
										
										
											2013-04-03 09:28:36 -04:00
+								        HashSet { map: HashMap::with_capacity(capacity) }
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    }
-												collections: allow `HashMap` to work with generic hashers

											
										
										
											2014-02-25 08:04:38 -08:00
+								}
-												use TotalEq for HashMap

Closes #5283

											
										
										
											2014-03-22 16:30:45 -04:00
+								impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> HashSet<T, H> {
-												collections: allow `HashMap` to work with generic hashers

											
										
										
											2014-02-25 08:04:38 -08:00
+								    pub fn with_hasher(hasher: H) -> HashSet<T, H> {
-												collections: Correct with_capacity_and_hasher

The arguments were accidentally swapped in the wrong order.

Closes #12743

											
										
										
											2014-03-06 18:07:52 -08:00
+								        HashSet::with_capacity_and_hasher(INITIAL_CAPACITY, hasher)
-												collections: allow `HashMap` to work with generic hashers

											
										
										
											2014-02-25 08:04:38 -08:00
+								    }
-												add reserve_at_least method to LinearMap/LinearSet

											
										
										
											2013-02-15 01:35:15 -05:00
-												Add HashSet::with_capacity_and_keys() function

This function can be use to create HashSets before the tls is
initialized.

											
										
										
											2013-09-11 00:39:26 +02:00
+								    /// Create an empty HashSet with space for at least `capacity`
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    /// elements in the hash table, using `hasher` to hash the keys.
-												Add HashSet::with_capacity_and_keys() function

This function can be use to create HashSets before the tls is
initialized.

											
										
										
											2013-09-11 00:39:26 +02:00
+								    ///
-												collections: allow `HashMap` to work with generic hashers

											
										
										
											2014-02-25 08:04:38 -08:00
+								    /// Warning: `hasher` is normally randomly generated, and
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    /// is designed to allow `HashSet`s to be resistant to attacks that
 								    /// cause many collisions and very poor performance. Setting it
-												Add HashSet::with_capacity_and_keys() function

This function can be use to create HashSets before the tls is
initialized.

											
										
										
											2013-09-11 00:39:26 +02:00
+								    /// manually using this function can expose a DoS attack vector.
-												collections: Correct with_capacity_and_hasher

The arguments were accidentally swapped in the wrong order.

Closes #12743

											
										
										
											2014-03-06 18:07:52 -08:00
+								    pub fn with_capacity_and_hasher(capacity: uint, hasher: H) -> HashSet<T, H> {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        HashSet { map: HashMap::with_capacity_and_hasher(capacity, hasher) }
-												Add HashSet::with_capacity_and_keys() function

This function can be use to create HashSets before the tls is
initialized.

											
										
										
											2013-09-11 00:39:26 +02:00
+								    }
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    /// Reserve space for at least `n` elements in the hash table.
-												Rename reserve to reserve_exact and reserve_at_least to reserve

Changes in std::{str,vec,hashmap} and extra::{priority_queue,ringbuf}.
Fixes #11949

											
										
										
											2014-02-01 00:03:20 +11:00
+								    pub fn reserve(&mut self, n: uint) {
 								        self.map.reserve(n)
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    }
-												syntax: Removing uses of HashMap

											
										
										
											2013-03-21 15:41:37 -04:00
-												Remove all uses of `pub impl`. rs=style

											
										
										
											2013-05-31 15:17:22 -07:00
+								    /// Returns true if the hash set contains a value equivalent to the
 								    /// given query value.
-												collections: allow `HashMap` to work with generic hashers

											
										
										
											2014-02-25 08:04:38 -08:00
+								    pub fn contains_equiv<Q: Hash<S> + Equiv<T>>(&self, value: &Q) -> bool {
-												Allow static strings to be used with keyword checks

											
										
										
											2013-05-11 17:42:59 +02:00
+								      self.map.contains_key_equiv(value)
 								    }
-												std::hashmap: Implement external iterator for HashMap and HashSet

											
										
										
											2013-06-21 17:05:05 +02:00
 								    /// An iterator visiting all elements in arbitrary order.
 								    /// Iterator element type is &'a T.
-												Rename iterators for consistency

Rename existing iterators to get rid of the Iterator suffix and to
give them names that better describe the things being iterated over.

											
										
										
											2014-01-14 22:32:24 -05:00
+								    pub fn iter<'a>(&'a self) -> SetItems<'a, T> {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        self.map.keys()
-												std::hashmap: Implement external iterator for HashMap and HashSet

											
										
										
											2013-06-21 17:05:05 +02:00
+								    }
-												hashmap: Iterators for hashset diff, sym. diff, intersec, union

Implement the difference, union, etc iterators with the help of a custom
iterator combinator with explicit closure environment. Reported issue #7814
to be able to use the std::iterator filter combinator.

											
										
										
											2013-07-16 03:55:52 +02:00
-												hashmap: Remove .consume() has rename .consume_iter() to .consume()

Updated all users of HashMap, HashSet old .consume() to use .consume()
with a for loop.

Since .consume() takes the map or set by value, it needs awkward
extra code to in librusti's use of @mut HashMap, where the map value can
not be directly moved out.

											
										
										
											2013-07-16 16:39:24 +02:00
+								    /// Creates a consuming iterator, that is, one that moves each value out
 								    /// of the set in arbitrary order. The set cannot be used after calling
 								    /// this.
-												Rename iterators for consistency

Rename existing iterators to get rid of the Iterator suffix and to
give them names that better describe the things being iterated over.

											
										
										
											2014-01-14 22:32:24 -05:00
+								    pub fn move_iter(self) -> SetMoveItems<T> {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        self.map.move_iter().map(|(k, _)| k)
-												hashmap: Remove .consume() has rename .consume_iter() to .consume()

Updated all users of HashMap, HashSet old .consume() to use .consume()
with a for loop.

Since .consume() takes the map or set by value, it needs awkward
extra code to in librusti's use of @mut HashMap, where the map value can
not be directly moved out.

											
										
										
											2013-07-16 16:39:24 +02:00
+								    }
-												hashmap: Iterators for hashset diff, sym. diff, intersec, union

Implement the difference, union, etc iterators with the help of a custom
iterator combinator with explicit closure environment. Reported issue #7814
to be able to use the std::iterator filter combinator.

											
										
										
											2013-07-16 03:55:52 +02:00
+								    /// Visit the values representing the difference
-												collections: allow `HashMap` to work with generic hashers

											
										
										
											2014-02-25 08:04:38 -08:00
+								    pub fn difference<'a>(&'a self, other: &'a HashSet<T, H>) -> SetAlgebraItems<'a, T, H> {
-												std: Rewrite the HashSet set operation iterators

Use the Repeat iterator to carry the "explicit closure capture" that was
previously done with the custom EnvFilterIterator.

											
										
										
											2013-08-03 21:34:00 +02:00
+								        Repeat::new(other)
 								            .zip(self.iter())
 								            .filter_map(|(other, elt)| {
 								                if !other.contains(elt) { Some(elt) } else { None }
 								            })
-												hashmap: Iterators for hashset diff, sym. diff, intersec, union

Implement the difference, union, etc iterators with the help of a custom
iterator combinator with explicit closure environment. Reported issue #7814
to be able to use the std::iterator filter combinator.

											
										
										
											2013-07-16 03:55:52 +02:00
+								    }
 								    /// Visit the values representing the symmetric difference
-												collections: allow `HashMap` to work with generic hashers

											
										
										
											2014-02-25 08:04:38 -08:00
+								    pub fn symmetric_difference<'a>(&'a self, other: &'a HashSet<T, H>)
 								        -> Chain<SetAlgebraItems<'a, T, H>, SetAlgebraItems<'a, T, H>> {
-												Removed unneccessary `_iter` suffixes from various APIs

											
										
										
											2013-11-23 11:18:51 +01:00
+								        self.difference(other).chain(other.difference(self))
-												hashmap: Iterators for hashset diff, sym. diff, intersec, union

Implement the difference, union, etc iterators with the help of a custom
iterator combinator with explicit closure environment. Reported issue #7814
to be able to use the std::iterator filter combinator.

											
										
										
											2013-07-16 03:55:52 +02:00
+								    }
 								    /// Visit the values representing the intersection
-												collections: allow `HashMap` to work with generic hashers

											
										
										
											2014-02-25 08:04:38 -08:00
+								    pub fn intersection<'a>(&'a self, other: &'a HashSet<T, H>)
 								        -> SetAlgebraItems<'a, T, H> {
-												std: Rewrite the HashSet set operation iterators

Use the Repeat iterator to carry the "explicit closure capture" that was
previously done with the custom EnvFilterIterator.

											
										
										
											2013-08-03 21:34:00 +02:00
+								        Repeat::new(other)
 								            .zip(self.iter())
 								            .filter_map(|(other, elt)| {
 								                if other.contains(elt) { Some(elt) } else { None }
 								            })
-												hashmap: Iterators for hashset diff, sym. diff, intersec, union

Implement the difference, union, etc iterators with the help of a custom
iterator combinator with explicit closure environment. Reported issue #7814
to be able to use the std::iterator filter combinator.

											
										
										
											2013-07-16 03:55:52 +02:00
+								    }
 								    /// Visit the values representing the union
-												collections: allow `HashMap` to work with generic hashers

											
										
										
											2014-02-25 08:04:38 -08:00
+								    pub fn union<'a>(&'a self, other: &'a HashSet<T, H>)
 								        -> Chain<SetItems<'a, T>, SetAlgebraItems<'a, T, H>> {
-												Removed unneccessary `_iter` suffixes from various APIs

											
										
										
											2013-11-23 11:18:51 +01:00
+								        self.iter().chain(other.difference(self))
-												hashmap: Iterators for hashset diff, sym. diff, intersec, union

Implement the difference, union, etc iterators with the help of a custom
iterator combinator with explicit closure environment. Reported issue #7814
to be able to use the std::iterator filter combinator.

											
										
										
											2013-07-16 03:55:52 +02:00
+								    }
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								}
-												use TotalEq for HashMap

Closes #5283

											
										
										
											2014-03-22 16:30:45 -04:00
+								impl<T: TotalEq + Hash<S> + fmt::Show, S, H: Hasher<S>> fmt::Show for HashSet<T, H> {
-												Add some missing Show implementations in libstd

											
										
										
											2014-02-13 06:41:34 +11:00
+								    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        try!(write!(f.buf, r"\{"));
 								        for (i, x) in self.iter().enumerate() {
 								            if i != 0 { try!(write!(f.buf, ", ")); }
-												Mass rename if_ok! to try!

This "bubble up an error" macro was originally named if_ok! in order to get it
landed, but after the fact it was discovered that this name is not exactly
desirable.

The name `if_ok!` isn't immediately clear that is has much to do with error
handling, and it doesn't look fantastic in all contexts (if if_ok!(...) {}). In
general, the agreed opinion about `if_ok!` is that is came in as subpar.

The name `try!` is more invocative of error handling, it's shorter by 2 letters,
and it looks fitting in almost all circumstances. One concern about the word
`try!` is that it's too invocative of exceptions, but the belief is that this
will be overcome with documentation and examples.

Close #12037

											
										
										
											2014-02-19 10:07:49 -08:00
+								            try!(write!(f.buf, "{}", *x));
-												Add some missing Show implementations in libstd

											
										
										
											2014-02-13 06:41:34 +11:00
+								        }
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
-												Add some missing Show implementations in libstd

											
										
										
											2014-02-13 06:41:34 +11:00
+								        write!(f.buf, r"\}")
 								    }
 								}
-												use TotalEq for HashMap

Closes #5283

											
										
										
											2014-03-22 16:30:45 -04:00
+								impl<T: TotalEq + Hash<S>, S, H: Hasher<S> + Default> FromIterator<T> for HashSet<T, H> {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    fn from_iterator<I: Iterator<T>>(iter: &mut I) -> HashSet<T, H> {
-												Implemented FromIterator for TrieMap and TrieSet

											
										
										
											2013-07-14 12:20:48 -04:00
+								        let (lower, _) = iter.size_hint();
-												collections: Correct with_capacity_and_hasher

The arguments were accidentally swapped in the wrong order.

Closes #12743

											
										
										
											2014-03-06 18:07:52 -08:00
+								        let mut set = HashSet::with_capacity_and_hasher(lower, Default::default());
-												std: Implement Extendable for hashmap, str and trie

											
										
										
											2013-07-30 02:17:17 +02:00
+								        set.extend(iter);
 								        set
 								    }
 								}
-												Implemented FromIterator for TrieMap and TrieSet

											
										
										
											2013-07-14 12:20:48 -04:00
-												use TotalEq for HashMap

Closes #5283

											
										
										
											2014-03-22 16:30:45 -04:00
+								impl<T: TotalEq + Hash<S>, S, H: Hasher<S> + Default> Extendable<T> for HashSet<T, H> {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    fn extend<I: Iterator<T>>(&mut self, iter: &mut I) {
-												remove obsolete `foreach` keyword

this has been replaced by `for`

											
										
										
											2013-08-03 12:45:23 -04:00
+								        for k in *iter {
-												std: Implement Extendable for hashmap, str and trie

											
										
										
											2013-07-30 02:17:17 +02:00
+								            self.insert(k);
-												Implemented FromIterator for TrieMap and TrieSet

											
										
										
											2013-07-14 12:20:48 -04:00
+								        }
 								    }
 								}
-												Stripped trailing spaces; Implemented FromIterator for TreeMap and PriorityQueue

											
										
										
											2013-07-14 13:18:50 -04:00
-												use TotalEq for HashMap

Closes #5283

											
										
										
											2014-03-22 16:30:45 -04:00
+								impl<T: TotalEq + Hash> Default for HashSet<T, sip::SipHasher> {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    fn default() -> HashSet<T> { HashSet::new() }
-												std: add default implementations to HashMap

											
										
										
											2013-09-09 19:28:05 -07:00
+								}
-												std: Rewrite the HashSet set operation iterators

Use the Repeat iterator to carry the "explicit closure capture" that was
previously done with the custom EnvFilterIterator.

											
										
										
											2013-08-03 21:34:00 +02:00
+								// `Repeat` is used to feed the filter closure an explicit capture
 								// of a reference to the other set
-												hashmap: Iterators for hashset diff, sym. diff, intersec, union

Implement the difference, union, etc iterators with the help of a custom
iterator combinator with explicit closure environment. Reported issue #7814
to be able to use the std::iterator filter combinator.

											
										
										
											2013-07-16 03:55:52 +02:00
+								/// Set operations iterator
-												collections: allow `HashMap` to work with generic hashers

											
										
										
											2014-02-25 08:04:38 -08:00
+								pub type SetAlgebraItems<'a, T, H> =
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    FilterMap<'static, (&'a HashSet<T, H>, &'a T), &'a T,
-												collections: allow `HashMap` to work with generic hashers

											
										
										
											2014-02-25 08:04:38 -08:00
+								              Zip<Repeat<&'a HashSet<T, H>>, SetItems<'a, T>>>;
-												hashmap: Iterators for hashset diff, sym. diff, intersec, union

Implement the difference, union, etc iterators with the help of a custom
iterator combinator with explicit closure environment. Reported issue #7814
to be able to use the std::iterator filter combinator.

											
										
										
											2013-07-16 03:55:52 +02:00
-												mod items need to be marked with `cfg(test)` not `test`.

											
										
										
											2013-04-30 13:02:29 +02:00
+								#[cfg(test)]
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								mod test_map {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    use super::HashMap;
 								    use std::iter::{Iterator,range_inclusive,range_step_inclusive};
 								    use std::local_data;
-												rename std::vec_ng -> std::vec

Closes #12771

											
										
										
											2014-03-20 03:35:51 -04:00
+								    use std::vec;
-												core: add LinearMap::find_or_insert{,_with}

This allows for inserting a new value into the map only if it doesn't
already exist in the map.

											
										
										
											2013-02-10 15:30:44 -08:00
-												Fix a bug with HashMap::consume

											
										
										
											2013-05-30 12:03:11 -05:00
+								    #[test]
 								    fn test_create_capacity_zero() {
 								        let mut m = HashMap::with_capacity(0);
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
-												Fix a bug with HashMap::consume

											
										
										
											2013-05-30 12:03:11 -05:00
+								        assert!(m.insert(1, 1));
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
 								        assert!(m.contains_key(&1));
 								        assert!(!m.contains_key(&0));
-												Fix a bug with HashMap::consume

											
										
										
											2013-05-30 12:03:11 -05:00
+								    }
-												libcore: add pop/swap/consume to SendMap

											
										
										
											2012-10-08 07:05:01 -07:00
+								    #[test]
-												libcore,std,syntax,rustc: move tests into `mod tests`, make them private (no pub mod or pub fn).

											
										
										
											2013-04-16 01:08:52 +10:00
+								    fn test_insert() {
-												rename Linear{Map,Set} => Hash{Map,Set}

											
										
										
											2013-04-03 09:28:36 -04:00
+								        let mut m = HashMap::new();
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        assert_eq!(m.len(), 0);
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        assert!(m.insert(1, 2));
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        assert_eq!(m.len(), 1);
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        assert!(m.insert(2, 4));
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        assert_eq!(m.len(), 2);
 								        assert_eq!(*m.find(&1).unwrap(), 2);
 								        assert_eq!(*m.find(&2).unwrap(), 4);
 								    }
-												rename std::vec_ng -> std::vec

Closes #12771

											
										
										
											2014-03-20 03:35:51 -04:00
+								    local_data_key!(drop_vector: vec::Vec<int>)
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
-												use TotalEq for HashMap

Closes #5283

											
										
										
											2014-03-22 16:30:45 -04:00
+								    #[deriving(Hash, Eq, TotalEq)]
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    struct Dropable {
 								        k: int
 								    }
 								    impl Dropable {
 								        fn new(k: int) -> Dropable {
 								            local_data::get_mut(drop_vector,
 								                |v| { v.unwrap().as_mut_slice()[k] += 1; });
 								            Dropable { k: k }
 								        }
 								    }
 								    impl Drop for Dropable {
 								        fn drop(&mut self) {
 								            local_data::get_mut(drop_vector, |v|
 								                { v.unwrap().as_mut_slice()[self.k] -= 1; });
 								        }
 								    }
 								    #[test]
 								    fn test_drops() {
-												rename std::vec_ng -> std::vec

Closes #12771

											
										
										
											2014-03-20 03:35:51 -04:00
+								        local_data::set(drop_vector, vec::Vec::from_elem(200, 0));
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
 								        {
 								            let mut m = HashMap::new();
 								            local_data::get(drop_vector, |v| {
 								                for i in range(0, 200) {
 								                    assert_eq!(v.unwrap().as_slice()[i], 0);
 								                }
 								            });
 								            for i in range(0, 100) {
 								                let d1 = Dropable::new(i);
 								                let d2 = Dropable::new(i+100);
 								                m.insert(d1, d2);
 								            }
 								            local_data::get(drop_vector, |v| {
 								                for i in range(0, 200) {
 								                    assert_eq!(v.unwrap().as_slice()[i], 1);
 								                }
 								            });
 								            for i in range(0, 50) {
 								                let k = Dropable::new(i);
 								                let v = m.pop(&k);
 								                assert!(v.is_some());
 								                local_data::get(drop_vector, |v| {
 								                    assert_eq!(v.unwrap().as_slice()[i], 1);
 								                    assert_eq!(v.unwrap().as_slice()[i+100], 1);
 								                });
 								            }
 								            local_data::get(drop_vector, |v| {
 								                for i in range(0, 50) {
 								                    assert_eq!(v.unwrap().as_slice()[i], 0);
 								                    assert_eq!(v.unwrap().as_slice()[i+100], 0);
 								                }
 								                for i in range(50, 100) {
 								                    assert_eq!(v.unwrap().as_slice()[i], 1);
 								                    assert_eq!(v.unwrap().as_slice()[i+100], 1);
 								                }
 								            });
 								        }
 								        local_data::get(drop_vector, |v| {
 								            for i in range(0, 200) {
 								                assert_eq!(v.unwrap().as_slice()[i], 0);
 								            }
 								        });
 								    }
 								    #[test]
 								    fn test_empty_pop() {
 								        let mut m: HashMap<int, bool> = HashMap::new();
 								        assert_eq!(m.pop(&0), None);
 								    }
 								    #[test]
 								    fn test_lots_of_insertions() {
 								        let mut m = HashMap::new();
 								        // Try this a few times to make sure we never screw up the hashmap's
 								        // internal state.
 								        for _ in range(0, 10) {
 								            assert!(m.is_empty());
 								            for i in range_inclusive(1, 1000) {
 								                assert!(m.insert(i, i));
 								                for j in range_inclusive(1, i) {
 								                    let r = m.find(&j);
 								                    assert_eq!(r, Some(&j));
 								                }
 								                for j in range_inclusive(i+1, 1000) {
 								                    let r = m.find(&j);
 								                    assert_eq!(r, None);
 								                }
 								            }
 								            for i in range_inclusive(1001, 2000) {
 								                assert!(!m.contains_key(&i));
 								            }
 								            // remove forwards
 								            for i in range_inclusive(1, 1000) {
 								                assert!(m.remove(&i));
 								                for j in range_inclusive(1, i) {
 								                    assert!(!m.contains_key(&j));
 								                }
 								                for j in range_inclusive(i+1, 1000) {
 								                    assert!(m.contains_key(&j));
 								                }
 								            }
 								            for i in range_inclusive(1, 1000) {
 								                assert!(!m.contains_key(&i));
 								            }
 								            for i in range_inclusive(1, 1000) {
 								                assert!(m.insert(i, i));
 								            }
 								            // remove backwards
 								            for i in range_step_inclusive(1000, 1, -1) {
 								                assert!(m.remove(&i));
 								                for j in range_inclusive(i, 1000) {
 								                    assert!(!m.contains_key(&j));
 								                }
 								                for j in range_inclusive(1, i-1) {
 								                    assert!(m.contains_key(&j));
 								                }
 								            }
 								        }
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    }
-												make LinearMap fields private

Closes #4764

											
										
										
											2013-03-02 05:09:36 -05:00
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    #[test]
 								    fn test_find_mut() {
-												rename Linear{Map,Set} => Hash{Map,Set}

											
										
										
											2013-04-03 09:28:36 -04:00
+								        let mut m = HashMap::new();
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        assert!(m.insert(1, 12));
 								        assert!(m.insert(2, 8));
 								        assert!(m.insert(5, 14));
 								        let new = 100;
 								        match m.find_mut(&5) {
-												Drop the '2' suffix from logging macros

Who doesn't like a massive renaming?

											
										
										
											2013-10-21 13:08:31 -07:00
+								            None => fail!(), Some(x) => *x = new
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        }
 								        assert_eq!(m.find(&5), Some(&new));
 								    }
-												hashmap: add find_mut method

											
										
										
											2013-03-24 19:07:36 -04:00
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    #[test]
-												libcore,std,syntax,rustc: move tests into `mod tests`, make them private (no pub mod or pub fn).

											
										
										
											2013-04-16 01:08:52 +10:00
+								    fn test_insert_overwrite() {
-												rename Linear{Map,Set} => Hash{Map,Set}

											
										
										
											2013-04-03 09:28:36 -04:00
+								        let mut m = HashMap::new();
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        assert!(m.insert(1, 2));
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        assert_eq!(*m.find(&1).unwrap(), 2);
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        assert!(!m.insert(1, 3));
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        assert_eq!(*m.find(&1).unwrap(), 3);
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    }
-												make LinearMap fields private

Closes #4764

											
										
										
											2013-03-02 05:09:36 -05:00
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    #[test]
-												libcore,std,syntax,rustc: move tests into `mod tests`, make them private (no pub mod or pub fn).

											
										
										
											2013-04-16 01:08:52 +10:00
+								    fn test_insert_conflicts() {
-												std: expose the keyed HashMap constructor, for runtime-less use.

The `new` constructor uses the task-local RNG to retrieve seeds for the
two key values, which requires the runtime. Exposing a constructor that
takes the keys directly allows HashMaps to be used in programs that wish
to avoid the runtime.

											
										
										
											2013-08-01 18:11:25 +10:00
+								        let mut m = HashMap::with_capacity(4);
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        assert!(m.insert(1, 2));
 								        assert!(m.insert(5, 3));
 								        assert!(m.insert(9, 4));
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        assert_eq!(*m.find(&9).unwrap(), 4);
 								        assert_eq!(*m.find(&5).unwrap(), 3);
 								        assert_eq!(*m.find(&1).unwrap(), 2);
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    }
-												make LinearMap fields private

Closes #4764

											
										
										
											2013-03-02 05:09:36 -05:00
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    #[test]
-												libcore,std,syntax,rustc: move tests into `mod tests`, make them private (no pub mod or pub fn).

											
										
										
											2013-04-16 01:08:52 +10:00
+								    fn test_conflict_remove() {
-												std: expose the keyed HashMap constructor, for runtime-less use.

The `new` constructor uses the task-local RNG to retrieve seeds for the
two key values, which requires the runtime. Exposing a constructor that
takes the keys directly allows HashMaps to be used in programs that wish
to avoid the runtime.

											
										
										
											2013-08-01 18:11:25 +10:00
+								        let mut m = HashMap::with_capacity(4);
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        assert!(m.insert(1, 2));
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        assert_eq!(*m.find(&1).unwrap(), 2);
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        assert!(m.insert(5, 3));
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        assert_eq!(*m.find(&1).unwrap(), 2);
 								        assert_eq!(*m.find(&5).unwrap(), 3);
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        assert!(m.insert(9, 4));
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        assert_eq!(*m.find(&1).unwrap(), 2);
 								        assert_eq!(*m.find(&5).unwrap(), 3);
 								        assert_eq!(*m.find(&9).unwrap(), 4);
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        assert!(m.remove(&1));
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        assert_eq!(*m.find(&9).unwrap(), 4);
 								        assert_eq!(*m.find(&5).unwrap(), 3);
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    }
-												make LinearMap fields private

Closes #4764

											
										
										
											2013-03-02 05:09:36 -05:00
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    #[test]
-												libcore,std,syntax,rustc: move tests into `mod tests`, make them private (no pub mod or pub fn).

											
										
										
											2013-04-16 01:08:52 +10:00
+								    fn test_is_empty() {
-												std: expose the keyed HashMap constructor, for runtime-less use.

The `new` constructor uses the task-local RNG to retrieve seeds for the
two key values, which requires the runtime. Exposing a constructor that
takes the keys directly allows HashMaps to be used in programs that wish
to avoid the runtime.

											
										
										
											2013-08-01 18:11:25 +10:00
+								        let mut m = HashMap::with_capacity(4);
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        assert!(m.insert(1, 2));
 								        assert!(!m.is_empty());
 								        assert!(m.remove(&1));
 								        assert!(m.is_empty());
 								    }
-												make LinearMap fields private

Closes #4764

											
										
										
											2013-03-02 05:09:36 -05:00
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    #[test]
-												libcore,std,syntax,rustc: move tests into `mod tests`, make them private (no pub mod or pub fn).

											
										
										
											2013-04-16 01:08:52 +10:00
+								    fn test_pop() {
-												rename Linear{Map,Set} => Hash{Map,Set}

											
										
										
											2013-04-03 09:28:36 -04:00
+								        let mut m = HashMap::new();
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        m.insert(1, 2);
-												Use assert_eq! rather than assert! where possible

											
										
										
											2013-05-18 22:02:45 -04:00
+								        assert_eq!(m.pop(&1), Some(2));
 								        assert_eq!(m.pop(&1), None);
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    }
-												make LinearMap fields private

Closes #4764

											
										
										
											2013-03-02 05:09:36 -05:00
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    #[test]
-												libcore,std,syntax,rustc: move tests into `mod tests`, make them private (no pub mod or pub fn).

											
										
										
											2013-04-16 01:08:52 +10:00
+								    fn test_swap() {
-												rename Linear{Map,Set} => Hash{Map,Set}

											
										
										
											2013-04-03 09:28:36 -04:00
+								        let mut m = HashMap::new();
-												Use assert_eq! rather than assert! where possible

											
										
										
											2013-05-18 22:02:45 -04:00
+								        assert_eq!(m.swap(1, 2), None);
 								        assert_eq!(m.swap(1, 3), Some(2));
 								        assert_eq!(m.swap(1, 4), Some(3));
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    }
-												make LinearMap fields private

Closes #4764

											
										
										
											2013-03-02 05:09:36 -05:00
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    #[test]
-												Mass rename of .consume{,_iter}() to .move_iter()

cc #7887

											
										
										
											2013-08-07 19:21:36 -07:00
+								    fn test_move_iter() {
-												std: add consuming iterators for `HashMap` and `HashSet`

											
										
										
											2013-07-15 14:43:16 -04:00
+								        let hm = {
 								            let mut hm = HashMap::new();
 								            hm.insert('a', 1);
 								            hm.insert('b', 2);
 								            hm
 								        };
-												Mass rename of .consume{,_iter}() to .move_iter()

cc #7887

											
										
										
											2013-08-07 19:21:36 -07:00
+								        let v = hm.move_iter().collect::<~[(char, int)]>();
-												std: add consuming iterators for `HashMap` and `HashSet`

											
										
										
											2013-07-15 14:43:16 -04:00
+								        assert!([('a', 1), ('b', 2)] == v || [('b', 2), ('a', 1)] == v);
 								    }
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    #[test]
-												libcore,std,syntax,rustc: move tests into `mod tests`, make them private (no pub mod or pub fn).

											
										
										
											2013-04-16 01:08:52 +10:00
+								    fn test_iterate() {
-												std: expose the keyed HashMap constructor, for runtime-less use.

The `new` constructor uses the task-local RNG to retrieve seeds for the
two key values, which requires the runtime. Exposing a constructor that
takes the keys directly allows HashMaps to be used in programs that wish
to avoid the runtime.

											
										
										
											2013-08-01 18:11:25 +10:00
+								        let mut m = HashMap::with_capacity(4);
-												remove obsolete `foreach` keyword

this has been replaced by `for`

											
										
										
											2013-08-03 12:45:23 -04:00
+								        for i in range(0u, 32) {
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								            assert!(m.insert(i, i*2));
-												send_map: each -> each_ref, and add each* copying versions; add 'iterate' test

											
										
										
											2012-07-31 17:11:57 -04:00
+								        }
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        assert_eq!(m.len(), 32);
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        let mut observed = 0;
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
-												remove obsolete `foreach` keyword

this has been replaced by `for`

											
										
										
											2013-08-03 12:45:23 -04:00
+								        for (k, v) in m.iter() {
-												Use assert_eq! rather than assert! where possible

											
										
										
											2013-05-18 22:02:45 -04:00
+								            assert_eq!(*v, *k * 2);
-												std: Change assert_eq!() to use {} instead of {:?}

Formatting via reflection has been a little questionable for some time now, and
it's a little unfortunate that one of the standard macros will silently use
reflection when you weren't expecting it. This adds small bits of code bloat to
libraries, as well as not always being necessary. In light of this information,
this commit switches assert_eq!() to using {} in the error message instead of
{:?}.

In updating existing code, there were a few error cases that I encountered:

* It's impossible to define Show for [T, ..N]. I think DST will alleviate this
  because we can define Show for [T].
* A few types here and there just needed a #[deriving(Show)]
* Type parameters needed a Show bound, I often moved this to `assert!(a == b)`
* `Path` doesn't implement `Show`, so assert_eq!() cannot be used on two paths.
  I don't think this is much of a regression though because {:?} on paths looks
  awful (it's a byte array).

Concretely speaking, this shaved 10K off a 656K binary. Not a lot, but sometime
significant for smaller binaries.

											
										
										
											2014-02-28 01:23:06 -08:00
+								            observed |= 1 << *k;
-												replace with_find_ref() with find_ref(), which is just nicer to use

											
										
										
											2012-09-14 15:19:54 -07:00
+								        }
-												Use assert_eq! rather than assert! where possible

											
										
										
											2013-05-18 22:02:45 -04:00
+								        assert_eq!(observed, 0xFFFF_FFFF);
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    }
-												core: add Eq impl to LinearMap.

											
										
										
											2012-12-10 09:00:52 -08:00
-												Fix issue #11216 - Replace std::hashmap::{each_key, each_value} with iterators.

											
										
										
											2013-12-31 19:09:18 +00:00
+								    #[test]
 								    fn test_keys() {
 								        let vec = ~[(1, 'a'), (2, 'b'), (3, 'c')];
 								        let map = vec.move_iter().collect::<HashMap<int, char>>();
 								        let keys = map.keys().map(|&k| k).collect::<~[int]>();
 								        assert_eq!(keys.len(), 3);
 								        assert!(keys.contains(&1));
 								        assert!(keys.contains(&2));
 								        assert!(keys.contains(&3));
 								    }
 								    #[test]
 								    fn test_values() {
 								        let vec = ~[(1, 'a'), (2, 'b'), (3, 'c')];
 								        let map = vec.move_iter().collect::<HashMap<int, char>>();
 								        let values = map.values().map(|&v| v).collect::<~[char]>();
 								        assert_eq!(values.len(), 3);
 								        assert!(values.contains(&'a'));
 								        assert!(values.contains(&'b'));
 								        assert!(values.contains(&'c'));
 								    }
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    #[test]
-												libcore,std,syntax,rustc: move tests into `mod tests`, make them private (no pub mod or pub fn).

											
										
										
											2013-04-16 01:08:52 +10:00
+								    fn test_find() {
-												rename Linear{Map,Set} => Hash{Map,Set}

											
										
										
											2013-04-03 09:28:36 -04:00
+								        let mut m = HashMap::new();
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        assert!(m.find(&1).is_none());
 								        m.insert(1, 2);
 								        match m.find(&1) {
-												Drop the '2' suffix from logging macros

Who doesn't like a massive renaming?

											
										
										
											2013-10-21 13:08:31 -07:00
+								            None => fail!(),
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								            Some(v) => assert!(*v == 2)
 								        }
 								    }
-												core: add Eq impl to LinearMap.

											
										
										
											2012-12-10 09:00:52 -08:00
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    #[test]
-												libcore,std,syntax,rustc: move tests into `mod tests`, make them private (no pub mod or pub fn).

											
										
										
											2013-04-16 01:08:52 +10:00
+								    fn test_eq() {
-												rename Linear{Map,Set} => Hash{Map,Set}

											
										
										
											2013-04-03 09:28:36 -04:00
+								        let mut m1 = HashMap::new();
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        m1.insert(1, 2);
 								        m1.insert(2, 3);
 								        m1.insert(3, 4);
-												core: add Eq impl to LinearMap.

											
										
										
											2012-12-10 09:00:52 -08:00
-												rename Linear{Map,Set} => Hash{Map,Set}

											
										
										
											2013-04-03 09:28:36 -04:00
+								        let mut m2 = HashMap::new();
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        m2.insert(1, 2);
 								        m2.insert(2, 3);
-												core: add Eq impl to LinearMap.

											
										
										
											2012-12-10 09:00:52 -08:00
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        assert!(m1 != m2);
-												core: add Eq impl to LinearMap.

											
										
										
											2012-12-10 09:00:52 -08:00
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        m2.insert(3, 4);
-												reset LinearMap.size when expanding buckets

											
										
										
											2012-12-30 19:35:03 +01:00
-												Use assert_eq! rather than assert! where possible

											
										
										
											2013-05-18 22:02:45 -04:00
+								        assert_eq!(m1, m2);
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    }
-												reset LinearMap.size when expanding buckets

											
										
										
											2012-12-30 19:35:03 +01:00
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    #[test]
-												libcore,std,syntax,rustc: move tests into `mod tests`, make them private (no pub mod or pub fn).

											
										
										
											2013-04-16 01:08:52 +10:00
+								    fn test_expand() {
-												rename Linear{Map,Set} => Hash{Map,Set}

											
										
										
											2013-04-03 09:28:36 -04:00
+								        let mut m = HashMap::new();
-												reset LinearMap.size when expanding buckets

											
										
										
											2012-12-30 19:35:03 +01:00
-												Use assert_eq! rather than assert! where possible

											
										
										
											2013-05-18 22:02:45 -04:00
+								        assert_eq!(m.len(), 0);
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        assert!(m.is_empty());
-												reset LinearMap.size when expanding buckets

											
										
										
											2012-12-30 19:35:03 +01:00
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        let mut i = 0u;
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        let old_resize_at = m.grow_at;
 								        while old_resize_at == m.grow_at {
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								            m.insert(i, i);
 								            i += 1;
-												make LinearMap fields private

Closes #4764

											
										
										
											2013-03-02 05:09:36 -05:00
+								        }
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
-												Use assert_eq! rather than assert! where possible

											
										
										
											2013-05-18 22:02:45 -04:00
+								        assert_eq!(m.len(), i);
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        assert!(!m.is_empty());
-												reset LinearMap.size when expanding buckets

											
										
										
											2012-12-30 19:35:03 +01:00
+								    }
-												std: add a test for HashMap::find_equiv.

											
										
										
											2013-06-11 20:33:30 +10:00
 								    #[test]
 								    fn test_find_equiv() {
 								        let mut m = HashMap::new();
 								        let (foo, bar, baz) = (1,2,3);
 								        m.insert(~"foo", foo);
 								        m.insert(~"bar", bar);
 								        m.insert(~"baz", baz);
 								        assert_eq!(m.find_equiv(&("foo")), Some(&foo));
 								        assert_eq!(m.find_equiv(&("bar")), Some(&bar));
 								        assert_eq!(m.find_equiv(&("baz")), Some(&baz));
 								        assert_eq!(m.find_equiv(&("qux")), None);
 								    }
-												Implemented FromIterator for std::hashmap

											
										
										
											2013-07-14 11:26:03 -04:00
 								    #[test]
 								    fn test_from_iter() {
 								        let xs = ~[(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)];
-												Stripped trailing spaces; Implemented FromIterator for TreeMap and PriorityQueue

											
										
										
											2013-07-14 13:18:50 -04:00
-												std: Rename Iterator.transform -> .map

cc #5898

											
										
										
											2013-08-09 20:09:47 -07:00
+								        let map: HashMap<int, int> = xs.iter().map(|&x| x).collect();
-												Implemented FromIterator for std::hashmap

											
										
										
											2013-07-14 11:26:03 -04:00
-												remove obsolete `foreach` keyword

this has been replaced by `for`

											
										
										
											2013-08-03 12:45:23 -04:00
+								        for &(k, v) in xs.iter() {
-												Implemented FromIterator for std::hashmap

											
										
										
											2013-07-14 11:26:03 -04:00
+								            assert_eq!(map.find(&k), Some(&v));
 								        }
 								    }
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								}
-												add is_subset and is_superset to the Set trait

											
										
										
											2013-01-29 16:07:11 -05:00
-												mod items need to be marked with `cfg(test)` not `test`.

											
										
										
											2013-04-30 13:02:29 +02:00
+								#[cfg(test)]
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								mod test_set {
-												Move std::{trie, hashmap} to libcollections

These two containers are indeed collections, so their place is in
libcollections, not in libstd. There will always be a hash map as part of the
standard distribution of Rust, but by moving it out of the standard library it
makes libstd that much more portable to more platforms and environments.

This conveniently also removes the stuttering of 'std::hashmap::HashMap',
although 'collections::HashMap' is only one character shorter.

											
										
										
											2014-02-19 19:29:58 -08:00
+								    use super::HashSet;
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								    use std::container::Container;
-												rename std::vec -> std::slice

Closes #12702

											
										
										
											2014-03-08 18:11:52 -05:00
+								    use std::slice::ImmutableEqVector;
-												make LinearMap fields private

Closes #4764

											
										
										
											2013-03-02 05:09:36 -05:00
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    #[test]
 								    fn test_disjoint() {
-												rename Linear{Map,Set} => Hash{Map,Set}

											
										
										
											2013-04-03 09:28:36 -04:00
+								        let mut xs = HashSet::new();
 								        let mut ys = HashSet::new();
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        assert!(xs.is_disjoint(&ys));
 								        assert!(ys.is_disjoint(&xs));
 								        assert!(xs.insert(5));
 								        assert!(ys.insert(11));
 								        assert!(xs.is_disjoint(&ys));
 								        assert!(ys.is_disjoint(&xs));
 								        assert!(xs.insert(7));
 								        assert!(xs.insert(19));
 								        assert!(xs.insert(4));
 								        assert!(ys.insert(2));
 								        assert!(ys.insert(-11));
 								        assert!(xs.is_disjoint(&ys));
 								        assert!(ys.is_disjoint(&xs));
 								        assert!(ys.insert(7));
 								        assert!(!xs.is_disjoint(&ys));
 								        assert!(!ys.is_disjoint(&xs));
 								    }
-												make LinearMap fields private

Closes #4764

											
										
										
											2013-03-02 05:09:36 -05:00
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    #[test]
 								    fn test_subset_and_superset() {
-												rename Linear{Map,Set} => Hash{Map,Set}

											
										
										
											2013-04-03 09:28:36 -04:00
+								        let mut a = HashSet::new();
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        assert!(a.insert(0));
 								        assert!(a.insert(5));
 								        assert!(a.insert(11));
 								        assert!(a.insert(7));
-												rename Linear{Map,Set} => Hash{Map,Set}

											
										
										
											2013-04-03 09:28:36 -04:00
+								        let mut b = HashSet::new();
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								        assert!(b.insert(0));
 								        assert!(b.insert(7));
 								        assert!(b.insert(19));
 								        assert!(b.insert(250));
 								        assert!(b.insert(11));
 								        assert!(b.insert(200));
 								        assert!(!a.is_subset(&b));
 								        assert!(!a.is_superset(&b));
 								        assert!(!b.is_subset(&a));
 								        assert!(!b.is_superset(&a));
 								        assert!(b.insert(5));
 								        assert!(a.is_subset(&b));
 								        assert!(!a.is_superset(&b));
 								        assert!(!b.is_subset(&a));
 								        assert!(b.is_superset(&a));
 								    }
-												add difference and symmetric_difference to Set

											
										
										
											2013-01-29 19:30:26 -05:00
-												std::hashmap: Add test_iterate for HashSet

											
										
										
											2013-06-21 17:05:16 +02:00
+								    #[test]
 								    fn test_iterate() {
 								        let mut a = HashSet::new();
-												remove obsolete `foreach` keyword

this has been replaced by `for`

											
										
										
											2013-08-03 12:45:23 -04:00
+								        for i in range(0u, 32) {
-												std::hashmap: Add test_iterate for HashSet

											
										
										
											2013-06-21 17:05:16 +02:00
+								            assert!(a.insert(i));
 								        }
 								        let mut observed = 0;
-												remove obsolete `foreach` keyword

this has been replaced by `for`

											
										
										
											2013-08-03 12:45:23 -04:00
+								        for k in a.iter() {
-												std: Change assert_eq!() to use {} instead of {:?}

Formatting via reflection has been a little questionable for some time now, and
it's a little unfortunate that one of the standard macros will silently use
reflection when you weren't expecting it. This adds small bits of code bloat to
libraries, as well as not always being necessary. In light of this information,
this commit switches assert_eq!() to using {} in the error message instead of
{:?}.

In updating existing code, there were a few error cases that I encountered:

* It's impossible to define Show for [T, ..N]. I think DST will alleviate this
  because we can define Show for [T].
* A few types here and there just needed a #[deriving(Show)]
* Type parameters needed a Show bound, I often moved this to `assert!(a == b)`
* `Path` doesn't implement `Show`, so assert_eq!() cannot be used on two paths.
  I don't think this is much of a regression though because {:?} on paths looks
  awful (it's a byte array).

Concretely speaking, this shaved 10K off a 656K binary. Not a lot, but sometime
significant for smaller binaries.

											
										
										
											2014-02-28 01:23:06 -08:00
+								            observed |= 1 << *k;
-												std::hashmap: Add test_iterate for HashSet

											
										
										
											2013-06-21 17:05:16 +02:00
+								        }
 								        assert_eq!(observed, 0xFFFF_FFFF);
 								    }
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    #[test]
 								    fn test_intersection() {
-												rename Linear{Map,Set} => Hash{Map,Set}

											
										
										
											2013-04-03 09:28:36 -04:00
+								        let mut a = HashSet::new();
 								        let mut b = HashSet::new();
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
 								        assert!(a.insert(11));
 								        assert!(a.insert(1));
 								        assert!(a.insert(3));
 								        assert!(a.insert(77));
 								        assert!(a.insert(103));
 								        assert!(a.insert(5));
 								        assert!(a.insert(-5));
 								        assert!(b.insert(2));
 								        assert!(b.insert(11));
 								        assert!(b.insert(77));
 								        assert!(b.insert(-9));
 								        assert!(b.insert(-42));
 								        assert!(b.insert(5));
 								        assert!(b.insert(3));
 								        let mut i = 0;
 								        let expected = [3, 5, 11, 77];
-												Removed unneccessary `_iter` suffixes from various APIs

											
										
										
											2013-11-23 11:18:51 +01:00
+								        for x in a.intersection(&b) {
-												Convert vec::{rposition, rposition_elem, position_elem, contains} to methods.

											
										
										
											2013-06-29 02:08:32 +10:00
+								            assert!(expected.contains(x));
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								            i += 1
 								        }
-												Use assert_eq! rather than assert! where possible

											
										
										
											2013-05-18 22:02:45 -04:00
+								        assert_eq!(i, expected.len());
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    }
-												make LinearMap fields private

Closes #4764

											
										
										
											2013-03-02 05:09:36 -05:00
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    #[test]
 								    fn test_difference() {
-												rename Linear{Map,Set} => Hash{Map,Set}

											
										
										
											2013-04-03 09:28:36 -04:00
+								        let mut a = HashSet::new();
 								        let mut b = HashSet::new();
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
 								        assert!(a.insert(1));
 								        assert!(a.insert(3));
 								        assert!(a.insert(5));
 								        assert!(a.insert(9));
 								        assert!(a.insert(11));
 								        assert!(b.insert(3));
 								        assert!(b.insert(9));
 								        let mut i = 0;
 								        let expected = [1, 5, 11];
-												Removed unneccessary `_iter` suffixes from various APIs

											
										
										
											2013-11-23 11:18:51 +01:00
+								        for x in a.difference(&b) {
-												Convert vec::{rposition, rposition_elem, position_elem, contains} to methods.

											
										
										
											2013-06-29 02:08:32 +10:00
+								            assert!(expected.contains(x));
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								            i += 1
 								        }
-												Use assert_eq! rather than assert! where possible

											
										
										
											2013-05-18 22:02:45 -04:00
+								        assert_eq!(i, expected.len());
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    }
-												make LinearMap fields private

Closes #4764

											
										
										
											2013-03-02 05:09:36 -05:00
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    #[test]
 								    fn test_symmetric_difference() {
-												rename Linear{Map,Set} => Hash{Map,Set}

											
										
										
											2013-04-03 09:28:36 -04:00
+								        let mut a = HashSet::new();
 								        let mut b = HashSet::new();
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
 								        assert!(a.insert(1));
 								        assert!(a.insert(3));
 								        assert!(a.insert(5));
 								        assert!(a.insert(9));
 								        assert!(a.insert(11));
 								        assert!(b.insert(-2));
 								        assert!(b.insert(3));
 								        assert!(b.insert(9));
 								        assert!(b.insert(14));
 								        assert!(b.insert(22));
 								        let mut i = 0;
 								        let expected = [-2, 1, 5, 11, 14, 22];
-												Removed unneccessary `_iter` suffixes from various APIs

											
										
										
											2013-11-23 11:18:51 +01:00
+								        for x in a.symmetric_difference(&b) {
-												Convert vec::{rposition, rposition_elem, position_elem, contains} to methods.

											
										
										
											2013-06-29 02:08:32 +10:00
+								            assert!(expected.contains(x));
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								            i += 1
 								        }
-												Use assert_eq! rather than assert! where possible

											
										
										
											2013-05-18 22:02:45 -04:00
+								        assert_eq!(i, expected.len());
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								    }
 								    #[test]
 								    fn test_union() {
-												rename Linear{Map,Set} => Hash{Map,Set}

											
										
										
											2013-04-03 09:28:36 -04:00
+								        let mut a = HashSet::new();
 								        let mut b = HashSet::new();
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
 								        assert!(a.insert(1));
 								        assert!(a.insert(3));
 								        assert!(a.insert(5));
 								        assert!(a.insert(9));
 								        assert!(a.insert(11));
 								        assert!(a.insert(16));
 								        assert!(a.insert(19));
 								        assert!(a.insert(24));
 								        assert!(b.insert(-2));
 								        assert!(b.insert(1));
 								        assert!(b.insert(5));
 								        assert!(b.insert(9));
 								        assert!(b.insert(13));
 								        assert!(b.insert(19));
 								        let mut i = 0;
 								        let expected = [-2, 1, 3, 5, 9, 11, 13, 16, 19, 24];
-												Removed unneccessary `_iter` suffixes from various APIs

											
										
										
											2013-11-23 11:18:51 +01:00
+								        for x in a.union(&b) {
-												Convert vec::{rposition, rposition_elem, position_elem, contains} to methods.

											
										
										
											2013-06-29 02:08:32 +10:00
+								            assert!(expected.contains(x));
-												hashmap: rm linear namespace

											
										
										
											2013-04-03 08:45:14 -04:00
+								            i += 1
 								        }
-												Use assert_eq! rather than assert! where possible

											
										
										
											2013-05-18 22:02:45 -04:00
+								        assert_eq!(i, expected.len());
-												add intersection and union to the Set trait

											
										
										
											2013-01-29 21:58:47 -05:00
+								    }
-												Implemented FromIterator for TrieMap and TrieSet

											
										
										
											2013-07-14 12:20:48 -04:00
 								    #[test]
 								    fn test_from_iter() {
 								        let xs = ~[1, 2, 3, 4, 5, 6, 7, 8, 9];
-												Stripped trailing spaces; Implemented FromIterator for TreeMap and PriorityQueue

											
										
										
											2013-07-14 13:18:50 -04:00
-												std: Rename Iterator.transform -> .map

cc #5898

											
										
										
											2013-08-09 20:09:47 -07:00
+								        let set: HashSet<int> = xs.iter().map(|&x| x).collect();
-												Implemented FromIterator for TrieMap and TrieSet

											
										
										
											2013-07-14 12:20:48 -04:00
-												remove obsolete `foreach` keyword

this has been replaced by `for`

											
										
										
											2013-08-03 12:45:23 -04:00
+								        for x in xs.iter() {
-												Implemented FromIterator for TrieMap and TrieSet

											
										
										
											2013-07-14 12:20:48 -04:00
+								            assert!(set.contains(x));
 								        }
 								    }
-												std: add consuming iterators for `HashMap` and `HashSet`

											
										
										
											2013-07-15 14:43:16 -04:00
 								    #[test]
-												Mass rename of .consume{,_iter}() to .move_iter()

cc #7887

											
										
										
											2013-08-07 19:21:36 -07:00
+								    fn test_move_iter() {
-												std: add consuming iterators for `HashMap` and `HashSet`

											
										
										
											2013-07-15 14:43:16 -04:00
+								        let hs = {
 								            let mut hs = HashSet::new();
 								            hs.insert('a');
 								            hs.insert('b');
 								            hs
 								        };
-												Mass rename of .consume{,_iter}() to .move_iter()

cc #7887

											
										
										
											2013-08-07 19:21:36 -07:00
+								        let v = hs.move_iter().collect::<~[char]>();
-												std: add consuming iterators for `HashMap` and `HashSet`

											
										
										
											2013-07-15 14:43:16 -04:00
+								        assert!(['a', 'b'] == v || ['b', 'a'] == v);
 								    }
-												std: add a Clone impl for HashSet.

											
										
										
											2013-08-10 00:44:35 +10:00
 								    #[test]
 								    fn test_eq() {
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
+								        // These constants once happened to expose a bug in insert().
 								        // I'm keeping them around to prevent a regression.
-												std: add a Clone impl for HashSet.

											
										
										
											2013-08-10 00:44:35 +10:00
+								        let mut s1 = HashSet::new();
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
-												std: add a Clone impl for HashSet.

											
										
										
											2013-08-10 00:44:35 +10:00
+								        s1.insert(1);
 								        s1.insert(2);
 								        s1.insert(3);
 								        let mut s2 = HashSet::new();
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
-												std: add a Clone impl for HashSet.

											
										
										
											2013-08-10 00:44:35 +10:00
+								        s2.insert(1);
 								        s2.insert(2);
 								        assert!(s1 != s2);
 								        s2.insert(3);
 								        assert_eq!(s1, s2);
 								    }
-												Add some missing Show implementations in libstd

											
										
										
											2014-02-13 06:41:34 +11:00
 								    #[test]
 								    fn test_show() {
 								        let mut set: HashSet<int> = HashSet::new();
 								        let empty: HashSet<int> = HashSet::new();
 								        set.insert(1);
 								        set.insert(2);
 								        let set_str = format!("{}", set);
 								        assert!(set_str == ~"{1, 2}" || set_str == ~"{2, 1}");
 								        assert_eq!(format!("{}", empty), ~"{}");
 								    }
-												add is_subset and is_superset to the Set trait

											
										
										
											2013-01-29 16:07:11 -05:00
+								}
-												Performance-oriented hashtable.

Previously, rust's hashtable was totally unoptimized. It used an Option
per key-value pair, and used very naive open allocation.

The old hashtable had very high variance in lookup time. For an example,
see the 'find_nonexisting' benchmark below. This is fixed by keys in
'lucky' spots with a low probe sequence length getting their good spots
stolen by keys with long probe sequence lengths. This reduces hashtable
probe length variance, while maintaining the same mean.

Also, other optimization liberties were taken. Everything is as cache
aware as possible, and this hashtable should perform extremely well for
both large and small keys and values.

Benchmarks:

comprehensive_old_hashmap         378 ns/iter (+/- 8)
comprehensive_new_hashmap         206 ns/iter (+/- 4)
1.8x faster

old_hashmap_as_queue              238 ns/iter (+/- 8)
new_hashmap_as_queue              119 ns/iter (+/- 2)
2x faster

old_hashmap_insert                172 ns/iter (+/- 8)
new_hashmap_insert                146 ns/iter (+/- 11)
1.17x faster

old_hashmap_find_existing         50 ns/iter (+/- 12)
new_hashmap_find_existing         35 ns/iter (+/- 6)
1.43x faster

old_hashmap_find_notexisting      49 ns/iter (+/- 49)
new_hashmap_find_notexisting      34 ns/iter (+/- 4)
1.44x faster

Memory usage of old hashtable (64-bit assumed):

aligned(8+sizeof(K)+sizeof(V))/0.75 + 6 words

Memory usage of new hashtable:

(aligned(sizeof(K))
+ aligned(sizeof(V))
+ 8)/0.9 + 6.5 words

BUT accesses are much more cache friendly. In fact, if the probe
sequence length is below 8, only two cache lines worth of hashes will be
pulled into cache. This is unlike the old version which would have to
stride over the stoerd keys and values, and would be more cache
unfriendly the bigger the stored values got.

And did you notice the higher load factor? We can now reasonably get a
load factor of 0.9 with very good performance.

											
										
										
											2014-02-28 22:23:53 -05:00
 								#[cfg(test)]
 								mod bench {
 								    extern crate test;
 								    use self::test::BenchHarness;
 								    use std::iter::{range_inclusive};
 								    #[bench]
 								    fn insert(b: &mut BenchHarness) {
 								        use super::HashMap;
 								        let mut m = HashMap::new();
 								        for i in range_inclusive(1, 1000) {
 								            m.insert(i, i);
 								        }
 								        let mut k = 1001;
 								        b.iter(|| {
 								            m.insert(k, k);
 								            k += 1;
 								        });
 								    }
 								    #[bench]
 								    fn find_existing(b: &mut BenchHarness) {
 								        use super::HashMap;
 								        let mut m = HashMap::new();
 								        for i in range_inclusive(1, 1000) {
 								            m.insert(i, i);
 								        }
 								        b.iter(|| {
 								            m.contains_key(&412);
 								        });
 								    }
 								    #[bench]
 								    fn find_nonexisting(b: &mut BenchHarness) {
 								        use super::HashMap;
 								        let mut m = HashMap::new();
 								        for i in range_inclusive(1, 1000) {
 								            m.insert(i, i);
 								        }
 								        b.iter(|| {
 								            m.contains_key(&2048);
 								        });
 								    }
 								    #[bench]
 								    fn hashmap_as_queue(b: &mut BenchHarness) {
 								        use super::HashMap;
 								        let mut m = HashMap::new();
 								        for i in range_inclusive(1, 1000) {
 								            m.insert(i, i);
 								        }
 								        let mut k = 1;
 								        b.iter(|| {
 								            m.pop(&k);
 								            m.insert(k + 1000, k + 1000);
 								            k += 1;
 								        });
 								    }
 								    #[bench]
 								    fn find_pop_insert(b: &mut BenchHarness) {
 								        use super::HashMap;
 								        let mut m = HashMap::new();
 								        for i in range_inclusive(1, 1000) {
 								            m.insert(i, i);
 								        }
 								        let mut k = 1;
 								        b.iter(|| {
 								            m.find(&(k + 400));
 								            m.find(&(k + 2000));
 								            m.pop(&k);
 								            m.insert(k + 1000, k + 1000);
 								            k += 1;
 								        })
 								    }
 								}