Reduce size overhead of adaptative hashmap
Exposes a boolean flag in RawTable and use it instead of a bool field in HashMap. Fixes: #40042
This commit is contained in:
parent
1476105dd3
commit
3273003912
@ -396,8 +396,6 @@ pub struct HashMap<K, V, S = RandomState> {
|
||||
table: RawTable<K, V>,
|
||||
|
||||
resize_policy: DefaultResizePolicy,
|
||||
|
||||
long_probes: bool,
|
||||
}
|
||||
|
||||
/// Search for a pre-hashed key.
|
||||
@ -655,7 +653,6 @@ pub fn with_hasher(hash_builder: S) -> HashMap<K, V, S> {
|
||||
hash_builder: hash_builder,
|
||||
resize_policy: DefaultResizePolicy::new(),
|
||||
table: RawTable::new(0),
|
||||
long_probes: false,
|
||||
}
|
||||
}
|
||||
|
||||
@ -688,7 +685,6 @@ pub fn with_capacity_and_hasher(capacity: usize, hash_builder: S) -> HashMap<K,
|
||||
hash_builder: hash_builder,
|
||||
resize_policy: resize_policy,
|
||||
table: RawTable::new(raw_cap),
|
||||
long_probes: false,
|
||||
}
|
||||
}
|
||||
|
||||
@ -746,7 +742,7 @@ pub fn reserve(&mut self, additional: usize) {
|
||||
let min_cap = self.len().checked_add(additional).expect("reserve overflow");
|
||||
let raw_cap = self.resize_policy.raw_capacity(min_cap);
|
||||
self.resize(raw_cap);
|
||||
} else if self.long_probes && remaining <= self.len() {
|
||||
} else if self.table.tag() && remaining <= self.len() {
|
||||
// Probe sequence is too long and table is half full,
|
||||
// resize early to reduce probing length.
|
||||
let new_capacity = self.table.capacity() * 2;
|
||||
@ -763,7 +759,6 @@ fn resize(&mut self, new_raw_cap: usize) {
|
||||
assert!(self.table.size() <= new_raw_cap);
|
||||
assert!(new_raw_cap.is_power_of_two() || new_raw_cap == 0);
|
||||
|
||||
self.long_probes = false;
|
||||
let mut old_table = replace(&mut self.table, RawTable::new(new_raw_cap));
|
||||
let old_size = old_table.size();
|
||||
|
||||
@ -844,8 +839,7 @@ pub fn shrink_to_fit(&mut self) {
|
||||
/// If the key already exists, the hashtable will be returned untouched
|
||||
/// and a reference to the existing element will be returned.
|
||||
fn insert_hashed_nocheck(&mut self, hash: SafeHash, k: K, v: V) -> Option<V> {
|
||||
let entry = search_hashed(&mut self.table, hash, |key| *key == k)
|
||||
.into_entry(k, &mut self.long_probes);
|
||||
let entry = search_hashed(&mut self.table, hash, |key| *key == k).into_entry(k);
|
||||
match entry {
|
||||
Some(Occupied(mut elem)) => Some(elem.insert(v)),
|
||||
Some(Vacant(elem)) => {
|
||||
@ -1002,7 +996,7 @@ pub fn entry(&mut self, key: K) -> Entry<K, V> {
|
||||
self.reserve(1);
|
||||
let hash = self.make_hash(&key);
|
||||
search_hashed(&mut self.table, hash, |q| q.eq(&key))
|
||||
.into_entry(key, &mut self.long_probes).expect("unreachable")
|
||||
.into_entry(key).expect("unreachable")
|
||||
}
|
||||
|
||||
/// Returns the number of elements in the map.
|
||||
@ -1456,7 +1450,7 @@ fn into_occupied_bucket(self) -> Option<FullBucket<K, V, M>> {
|
||||
|
||||
impl<'a, K, V> InternalEntry<K, V, &'a mut RawTable<K, V>> {
|
||||
#[inline]
|
||||
fn into_entry(self, key: K, long_probes: &'a mut bool) -> Option<Entry<'a, K, V>> {
|
||||
fn into_entry(self, key: K) -> Option<Entry<'a, K, V>> {
|
||||
match self {
|
||||
InternalEntry::Occupied { elem } => {
|
||||
Some(Occupied(OccupiedEntry {
|
||||
@ -1469,7 +1463,6 @@ fn into_entry(self, key: K, long_probes: &'a mut bool) -> Option<Entry<'a, K, V>
|
||||
hash: hash,
|
||||
key: key,
|
||||
elem: elem,
|
||||
long_probes: long_probes,
|
||||
}))
|
||||
}
|
||||
InternalEntry::TableIsEmpty => None,
|
||||
@ -1542,7 +1535,6 @@ pub struct VacantEntry<'a, K: 'a, V: 'a> {
|
||||
hash: SafeHash,
|
||||
key: K,
|
||||
elem: VacantEntryState<K, V, &'a mut RawTable<K, V>>,
|
||||
long_probes: &'a mut bool,
|
||||
}
|
||||
|
||||
#[stable(feature= "debug_hash_map", since = "1.12.0")]
|
||||
@ -2117,15 +2109,15 @@ pub fn into_key(self) -> K {
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
pub fn insert(self, value: V) -> &'a mut V {
|
||||
match self.elem {
|
||||
NeqElem(bucket, disp) => {
|
||||
NeqElem(mut bucket, disp) => {
|
||||
if disp >= DISPLACEMENT_THRESHOLD {
|
||||
*self.long_probes = true;
|
||||
bucket.table_mut().set_tag(true);
|
||||
}
|
||||
robin_hood(bucket, disp, self.hash, self.key, value)
|
||||
},
|
||||
NoElem(bucket, disp) => {
|
||||
NoElem(mut bucket, disp) => {
|
||||
if disp >= DISPLACEMENT_THRESHOLD {
|
||||
*self.long_probes = true;
|
||||
bucket.table_mut().set_tag(true);
|
||||
}
|
||||
bucket.put(self.hash, self.key, value).into_mut_refs().1
|
||||
},
|
||||
|
@ -34,6 +34,42 @@
|
||||
|
||||
const EMPTY_BUCKET: HashUint = 0;
|
||||
|
||||
/// Special `Unique<HashUint>` that uses the lower bit of the pointer
|
||||
/// to expose a boolean tag.
|
||||
/// Note: when the pointer is initialized to EMPTY `.ptr()` will return
|
||||
/// null and the tag functions shouldn't be used.
|
||||
struct TaggedHashUintPtr(Unique<HashUint>);
|
||||
|
||||
impl TaggedHashUintPtr {
|
||||
#[inline]
|
||||
unsafe fn new(ptr: *mut HashUint) -> Self {
|
||||
debug_assert!(ptr as usize & 1 == 0 || ptr as usize == EMPTY as usize);
|
||||
TaggedHashUintPtr(Unique::new(ptr))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn set_tag(&mut self, value: bool) {
|
||||
let usize_ptr = &*self.0 as *const *mut HashUint as *mut usize;
|
||||
unsafe {
|
||||
if value {
|
||||
*usize_ptr |= 1;
|
||||
} else {
|
||||
*usize_ptr &= !1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn tag(&self) -> bool {
|
||||
(*self.0 as usize) & 1 == 1
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ptr(&self) -> *mut HashUint {
|
||||
(*self.0 as usize & !1) as *mut HashUint
|
||||
}
|
||||
}
|
||||
|
||||
/// The raw hashtable, providing safe-ish access to the unzipped and highly
|
||||
/// optimized arrays of hashes, and key-value pairs.
|
||||
///
|
||||
@ -72,10 +108,14 @@
|
||||
/// around just the "table" part of the hashtable. It enforces some
|
||||
/// invariants at the type level and employs some performance trickery,
|
||||
/// but in general is just a tricked out `Vec<Option<(u64, K, V)>>`.
|
||||
///
|
||||
/// The hashtable also exposes a special boolean tag. The tag defaults to false
|
||||
/// when the RawTable is created and is accessible with the `tag` and `set_tag`
|
||||
/// functions.
|
||||
pub struct RawTable<K, V> {
|
||||
capacity: usize,
|
||||
size: usize,
|
||||
hashes: Unique<HashUint>,
|
||||
hashes: TaggedHashUintPtr,
|
||||
|
||||
// Because K/V do not appear directly in any of the types in the struct,
|
||||
// inform rustc that in fact instances of K and V are reachable from here.
|
||||
@ -208,6 +248,10 @@ impl<K, V, M> FullBucket<K, V, M> {
|
||||
pub fn table(&self) -> &M {
|
||||
&self.table
|
||||
}
|
||||
/// Borrow a mutable reference to the table.
|
||||
pub fn table_mut(&mut self) -> &mut M {
|
||||
&mut self.table
|
||||
}
|
||||
/// Move out the reference to the table.
|
||||
pub fn into_table(self) -> M {
|
||||
self.table
|
||||
@ -227,6 +271,10 @@ impl<K, V, M> EmptyBucket<K, V, M> {
|
||||
pub fn table(&self) -> &M {
|
||||
&self.table
|
||||
}
|
||||
/// Borrow a mutable reference to the table.
|
||||
pub fn table_mut(&mut self) -> &mut M {
|
||||
&mut self.table
|
||||
}
|
||||
}
|
||||
|
||||
impl<K, V, M> Bucket<K, V, M> {
|
||||
@ -687,7 +735,7 @@ unsafe fn new_uninitialized(capacity: usize) -> RawTable<K, V> {
|
||||
return RawTable {
|
||||
size: 0,
|
||||
capacity: 0,
|
||||
hashes: Unique::new(EMPTY as *mut HashUint),
|
||||
hashes: TaggedHashUintPtr::new(EMPTY as *mut HashUint),
|
||||
marker: marker::PhantomData,
|
||||
};
|
||||
}
|
||||
@ -728,7 +776,7 @@ unsafe fn new_uninitialized(capacity: usize) -> RawTable<K, V> {
|
||||
RawTable {
|
||||
capacity: capacity,
|
||||
size: 0,
|
||||
hashes: Unique::new(hashes),
|
||||
hashes: TaggedHashUintPtr::new(hashes),
|
||||
marker: marker::PhantomData,
|
||||
}
|
||||
}
|
||||
@ -737,13 +785,13 @@ fn first_bucket_raw(&self) -> RawBucket<K, V> {
|
||||
let hashes_size = self.capacity * size_of::<HashUint>();
|
||||
let pairs_size = self.capacity * size_of::<(K, V)>();
|
||||
|
||||
let buffer = *self.hashes as *mut u8;
|
||||
let buffer = self.hashes.ptr() as *mut u8;
|
||||
let (pairs_offset, _, oflo) =
|
||||
calculate_offsets(hashes_size, pairs_size, align_of::<(K, V)>());
|
||||
debug_assert!(!oflo, "capacity overflow");
|
||||
unsafe {
|
||||
RawBucket {
|
||||
hash: *self.hashes,
|
||||
hash: self.hashes.ptr(),
|
||||
pair: buffer.offset(pairs_offset as isize) as *const _,
|
||||
_marker: marker::PhantomData,
|
||||
}
|
||||
@ -755,7 +803,7 @@ fn first_bucket_raw(&self) -> RawBucket<K, V> {
|
||||
pub fn new(capacity: usize) -> RawTable<K, V> {
|
||||
unsafe {
|
||||
let ret = RawTable::new_uninitialized(capacity);
|
||||
ptr::write_bytes(*ret.hashes, 0, capacity);
|
||||
ptr::write_bytes(ret.hashes.ptr(), 0, capacity);
|
||||
ret
|
||||
}
|
||||
}
|
||||
@ -774,7 +822,7 @@ pub fn size(&self) -> usize {
|
||||
fn raw_buckets(&self) -> RawBuckets<K, V> {
|
||||
RawBuckets {
|
||||
raw: self.first_bucket_raw(),
|
||||
hashes_end: unsafe { self.hashes.offset(self.capacity as isize) },
|
||||
hashes_end: unsafe { self.hashes.ptr().offset(self.capacity as isize) },
|
||||
marker: marker::PhantomData,
|
||||
}
|
||||
}
|
||||
@ -832,6 +880,16 @@ unsafe fn rev_move_buckets(&mut self) -> RevMoveBuckets<K, V> {
|
||||
marker: marker::PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the table tag
|
||||
pub fn set_tag(&mut self, value: bool) {
|
||||
self.hashes.set_tag(value)
|
||||
}
|
||||
|
||||
/// Get the table tag
|
||||
pub fn tag(&self) -> bool {
|
||||
self.hashes.tag()
|
||||
}
|
||||
}
|
||||
|
||||
/// A raw iterator. The basis for some other iterators in this module. Although
|
||||
@ -1156,7 +1214,7 @@ fn drop(&mut self) {
|
||||
debug_assert!(!oflo, "should be impossible");
|
||||
|
||||
unsafe {
|
||||
deallocate(*self.hashes as *mut u8, size, align);
|
||||
deallocate(self.hashes.ptr() as *mut u8, size, align);
|
||||
// Remember how everything was allocated out of one buffer
|
||||
// during initialization? We only need one call to free here.
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user