rust/compiler/rustc_data_structures/src/sharded.rs

use crate::fx::{FxHashMap, FxHasher};
#[cfg(parallel_compiler)]
use crate::sync::is_dyn_thread_safe;
use crate::sync::{CacheAligned, Lock, LockGuard};
use std::borrow::Borrow;
use std::collections::hash_map::RawEntryMut;
use std::hash::{Hash, Hasher};
use std::mem;

#[cfg(parallel_compiler)]
// 32 shards is sufficient to reduce contention on an 8-core Ryzen 7 1700,
// but this should be tested on higher core count CPUs. How the `Sharded` type gets used
// may also affect the ideal number of shards.
const SHARD_BITS: usize = 5;

#[cfg(not(parallel_compiler))]
const SHARD_BITS: usize = 0;

pub const SHARDS: usize = 1 << SHARD_BITS;

/// An array of cache-line aligned inner locked structures with convenience methods.
pub struct Sharded<T> {
    /// This mask is used to ensure that accesses are inbounds of `shards`.
    /// When dynamic thread safety is off, this field is set to 0 causing only
    /// a single shard to be used for greater cache efficiency.
    #[cfg(parallel_compiler)]
    mask: usize,
    shards: [CacheAligned<Lock<T>>; SHARDS],
}

impl<T: Default> Default for Sharded<T> {
    #[inline]
    fn default() -> Self {
        Self::new(T::default)
    }
}

impl<T> Sharded<T> {
    #[inline]
    pub fn new(mut value: impl FnMut() -> T) -> Self {
        Sharded {
            #[cfg(parallel_compiler)]
            mask: if is_dyn_thread_safe() { SHARDS - 1 } else { 0 },
            shards: [(); SHARDS].map(|()| CacheAligned(Lock::new(value()))),
        }
    }

    #[inline(always)]
    fn mask(&self) -> usize {
        #[cfg(parallel_compiler)]
        {
            if SHARDS == 1 { 0 } else { self.mask }
        }
        #[cfg(not(parallel_compiler))]
        {
            0
        }
    }

    #[inline(always)]
    fn count(&self) -> usize {
        // `self.mask` is always one below the used shard count
        self.mask() + 1
    }

    /// The shard is selected by hashing `val` with `FxHasher`.
    #[inline]
    pub fn get_shard_by_value<K: Hash + ?Sized>(&self, val: &K) -> &Lock<T> {
        self.get_shard_by_hash(if SHARDS == 1 { 0 } else { make_hash(val) })
    }

    #[inline]
    pub fn get_shard_by_hash(&self, hash: u64) -> &Lock<T> {
        self.get_shard_by_index(get_shard_hash(hash))
    }

    #[inline]
    pub fn get_shard_by_index(&self, i: usize) -> &Lock<T> {
        // SAFETY: The index get ANDed with the mask, ensuring it is always inbounds.
        unsafe { &self.shards.get_unchecked(i & self.mask()).0 }
    }

    pub fn lock_shards(&self) -> Vec<LockGuard<'_, T>> {
        (0..self.count()).map(|i| self.get_shard_by_index(i).lock()).collect()
    }

    pub fn try_lock_shards(&self) -> Option<Vec<LockGuard<'_, T>>> {
        (0..self.count()).map(|i| self.get_shard_by_index(i).try_lock()).collect()
    }
}

pub type ShardedHashMap<K, V> = Sharded<FxHashMap<K, V>>;

impl<K: Eq, V> ShardedHashMap<K, V> {
    pub fn len(&self) -> usize {
        self.lock_shards().iter().map(|shard| shard.len()).sum()
    }
}

impl<K: Eq + Hash + Copy> ShardedHashMap<K, ()> {
    #[inline]
    pub fn intern_ref<Q: ?Sized>(&self, value: &Q, make: impl FnOnce() -> K) -> K
    where
        K: Borrow<Q>,
        Q: Hash + Eq,
    {
        let hash = make_hash(value);
        let mut shard = self.get_shard_by_hash(hash).lock();
        let entry = shard.raw_entry_mut().from_key_hashed_nocheck(hash, value);

        match entry {
            RawEntryMut::Occupied(e) => *e.key(),
            RawEntryMut::Vacant(e) => {
                let v = make();
                e.insert_hashed_nocheck(hash, v, ());
                v
            }
        }
    }

    #[inline]
    pub fn intern<Q>(&self, value: Q, make: impl FnOnce(Q) -> K) -> K
    where
        K: Borrow<Q>,
        Q: Hash + Eq,
    {
        let hash = make_hash(&value);
        let mut shard = self.get_shard_by_hash(hash).lock();
        let entry = shard.raw_entry_mut().from_key_hashed_nocheck(hash, &value);

        match entry {
            RawEntryMut::Occupied(e) => *e.key(),
            RawEntryMut::Vacant(e) => {
                let v = make(value);
                e.insert_hashed_nocheck(hash, v, ());
                v
            }
        }
    }
}

pub trait IntoPointer {
    /// Returns a pointer which outlives `self`.
    fn into_pointer(&self) -> *const ();
}

impl<K: Eq + Hash + Copy + IntoPointer> ShardedHashMap<K, ()> {
    pub fn contains_pointer_to<T: Hash + IntoPointer>(&self, value: &T) -> bool {
        let hash = make_hash(&value);
        let shard = self.get_shard_by_hash(hash).lock();
        let value = value.into_pointer();
        shard.raw_entry().from_hash(hash, |entry| entry.into_pointer() == value).is_some()
    }
}

#[inline]
pub fn make_hash<K: Hash + ?Sized>(val: &K) -> u64 {
    let mut state = FxHasher::default();
    val.hash(&mut state);
    state.finish()
}

/// Get a shard with a pre-computed hash value. If `get_shard_by_value` is
/// ever used in combination with `get_shard_by_hash` on a single `Sharded`
/// instance, then `hash` must be computed with `FxHasher`. Otherwise,
/// `hash` can be computed with any hasher, so long as that hasher is used
/// consistently for each `Sharded` instance.
#[inline]
fn get_shard_hash(hash: u64) -> usize {
    let hash_len = mem::size_of::<usize>();
    // Ignore the top 7 bits as hashbrown uses these and get the next SHARD_BITS highest bits.
    // hashbrown also uses the lowest bits, so we can't use those
    (hash >> (hash_len * 8 - 7 - SHARD_BITS)) as usize
}
Use sharded maps for interning 2019-06-12 07:39:12 -05:00			`use crate::fx::{FxHashMap, FxHasher};`
Use only one shard with a single thread 2020-10-30 21:14:32 -05:00			`#[cfg(parallel_compiler)]`
			`use crate::sync::is_dyn_thread_safe;`
Move the WorkerLocal type from the rustc-rayon fork into rustc_data_structures 2020-10-31 06:01:54 -05:00			`use crate::sync::{CacheAligned, Lock, LockGuard};`
Use sharded maps for interning 2019-06-12 07:39:12 -05:00			`use std::borrow::Borrow;`
			`use std::collections::hash_map::RawEntryMut;`
			`use std::hash::{Hash, Hasher};`
			`use std::mem;`

			`#[cfg(parallel_compiler)]`
			`// 32 shards is sufficient to reduce contention on an 8-core Ryzen 7 1700,`
			// but this should be tested on higher core count CPUs. How the `Sharded` type gets used
fix various typos 2020-03-06 05:13:55 -06:00			`// may also affect the ideal number of shards.`
Use sharded maps for interning 2019-06-12 07:39:12 -05:00			`const SHARD_BITS: usize = 5;`

			`#[cfg(not(parallel_compiler))]`
			`const SHARD_BITS: usize = 0;`

Use a sharded dep node to dep node index map 2019-06-13 16:14:44 -05:00			`pub const SHARDS: usize = 1 << SHARD_BITS;`
Use sharded maps for interning 2019-06-12 07:39:12 -05:00
			`/// An array of cache-line aligned inner locked structures with convenience methods.`
			`pub struct Sharded<T> {`
Add some comments 2023-05-28 08:54:52 -05:00			/// This mask is used to ensure that accesses are inbounds of `shards`.
			`/// When dynamic thread safety is off, this field is set to 0 causing only`
			`/// a single shard to be used for greater cache efficiency.`
Use only one shard with a single thread 2020-10-30 21:14:32 -05:00			`#[cfg(parallel_compiler)]`
			`mask: usize,`
Use sharded maps for interning 2019-06-12 07:39:12 -05:00			`shards: [CacheAligned<Lock<T>>; SHARDS],`
			`}`

			`impl<T: Default> Default for Sharded<T> {`
			`#[inline]`
			`fn default() -> Self {`
remove redundant closures (clippy::redundant_closure) 2020-03-22 06:43:19 -05:00			`Self::new(T::default)`
Use a sharded dep node to dep node index map 2019-06-13 16:14:44 -05:00			`}`
			`}`

			`impl<T> Sharded<T> {`
			`#[inline]`
			`pub fn new(mut value: impl FnMut() -> T) -> Self {`
Use only one shard with a single thread 2020-10-30 21:14:32 -05:00			`Sharded {`
			`#[cfg(parallel_compiler)]`
			`mask: if is_dyn_thread_safe() { SHARDS - 1 } else { 0 },`
			`shards: [(); SHARDS].map(\|()\| CacheAligned(Lock::new(value()))),`
			`}`
			`}`

			`#[inline(always)]`
			`fn mask(&self) -> usize {`
			`#[cfg(parallel_compiler)]`
			`{`
Don't access self.mask with a single shard 2023-05-28 08:47:44 -05:00			`if SHARDS == 1 { 0 } else { self.mask }`
Use only one shard with a single thread 2020-10-30 21:14:32 -05:00			`}`
			`#[cfg(not(parallel_compiler))]`
			`{`
			`0`
			`}`
			`}`

			`#[inline(always)]`
			`fn count(&self) -> usize {`
Add some comments 2023-05-28 08:54:52 -05:00			// `self.mask` is always one below the used shard count
Use only one shard with a single thread 2020-10-30 21:14:32 -05:00			`self.mask() + 1`
Use sharded maps for interning 2019-06-12 07:39:12 -05:00			`}`

Avoid hashing the key twice in `get_query()`. For a single-threaded parallel compiler, this reduces instruction counts across several benchmarks, by up to 2.8%. The commit also adds documentation about `Sharded`'s use of `FxHasher`. 2019-10-31 16:52:14 -05:00			/// The shard is selected by hashing `val` with `FxHasher`.
Use sharded maps for interning 2019-06-12 07:39:12 -05:00			`#[inline]`
			`pub fn get_shard_by_value<K: Hash + ?Sized>(&self, val: &K) -> &Lock<T> {`
Use only one shard with a single thread 2020-10-30 21:14:32 -05:00			`self.get_shard_by_hash(if SHARDS == 1 { 0 } else { make_hash(val) })`
Use sharded maps for interning 2019-06-12 07:39:12 -05:00			`}`

Use a counter instead of pointers to the stack 2020-02-12 04:50:00 -06:00			`#[inline]`
			`pub fn get_shard_by_hash(&self, hash: u64) -> &Lock<T> {`
Use only one shard with a single thread 2020-10-30 21:14:32 -05:00			`self.get_shard_by_index(get_shard_hash(hash))`
Use a counter instead of pointers to the stack 2020-02-12 04:50:00 -06:00			`}`

			`#[inline]`
			`pub fn get_shard_by_index(&self, i: usize) -> &Lock<T> {`
Use only one shard with a single thread 2020-10-30 21:14:32 -05:00			`// SAFETY: The index get ANDed with the mask, ensuring it is always inbounds.`
			`unsafe { &self.shards.get_unchecked(i & self.mask()).0 }`
Use sharded maps for interning 2019-06-12 07:39:12 -05:00			`}`

			`pub fn lock_shards(&self) -> Vec<LockGuard<'_, T>> {`
Use only one shard with a single thread 2020-10-30 21:14:32 -05:00			`(0..self.count()).map(\|i\| self.get_shard_by_index(i).lock()).collect()`
Use sharded maps for interning 2019-06-12 07:39:12 -05:00			`}`

			`pub fn try_lock_shards(&self) -> Option<Vec<LockGuard<'_, T>>> {`
Use only one shard with a single thread 2020-10-30 21:14:32 -05:00			`(0..self.count()).map(\|i\| self.get_shard_by_index(i).try_lock()).collect()`
Use sharded maps for interning 2019-06-12 07:39:12 -05:00			`}`
			`}`

			`pub type ShardedHashMap<K, V> = Sharded<FxHashMap<K, V>>;`

Remove unnecessary `Hash` bounds from various types. 2019-10-19 18:29:11 -05:00			`impl<K: Eq, V> ShardedHashMap<K, V> {`
Use sharded maps for interning 2019-06-12 07:39:12 -05:00			`pub fn len(&self) -> usize {`
			`self.lock_shards().iter().map(\|shard\| shard.len()).sum()`
			`}`
			`}`

			`impl<K: Eq + Hash + Copy> ShardedHashMap<K, ()> {`
			`#[inline]`
			`pub fn intern_ref<Q: ?Sized>(&self, value: &Q, make: impl FnOnce() -> K) -> K`
			`where`
			`K: Borrow<Q>,`
			`Q: Hash + Eq,`
			`{`
			`let hash = make_hash(value);`
			`let mut shard = self.get_shard_by_hash(hash).lock();`
			`let entry = shard.raw_entry_mut().from_key_hashed_nocheck(hash, value);`

			`match entry {`
			`RawEntryMut::Occupied(e) => *e.key(),`
			`RawEntryMut::Vacant(e) => {`
			`let v = make();`
			`e.insert_hashed_nocheck(hash, v, ());`
			`v`
			`}`
			`}`
			`}`

			`#[inline]`
			`pub fn intern<Q>(&self, value: Q, make: impl FnOnce(Q) -> K) -> K`
			`where`
			`K: Borrow<Q>,`
			`Q: Hash + Eq,`
			`{`
			`let hash = make_hash(&value);`
			`let mut shard = self.get_shard_by_hash(hash).lock();`
			`let entry = shard.raw_entry_mut().from_key_hashed_nocheck(hash, &value);`

			`match entry {`
			`RawEntryMut::Occupied(e) => *e.key(),`
			`RawEntryMut::Vacant(e) => {`
			`let v = make(value);`
			`e.insert_hashed_nocheck(hash, v, ());`
			`v`
			`}`
			`}`
			`}`
			`}`

Lift using interners instead of in_arena 2020-01-01 17:44:34 -06:00			`pub trait IntoPointer {`
			/// Returns a pointer which outlives `self`.
			`fn into_pointer(&self) -> *const ();`
			`}`

			`impl<K: Eq + Hash + Copy + IntoPointer> ShardedHashMap<K, ()> {`
			`pub fn contains_pointer_to<T: Hash + IntoPointer>(&self, value: &T) -> bool {`
			`let hash = make_hash(&value);`
			`let shard = self.get_shard_by_hash(hash).lock();`
			`let value = value.into_pointer();`
			`shard.raw_entry().from_hash(hash, \|entry\| entry.into_pointer() == value).is_some()`
			`}`
			`}`

Use sharded maps for interning 2019-06-12 07:39:12 -05:00			`#[inline]`
Move Sharded maps into each QueryCache impl 2022-02-19 21:44:19 -06:00			`pub fn make_hash<K: Hash + ?Sized>(val: &K) -> u64 {`
Use sharded maps for interning 2019-06-12 07:39:12 -05:00			`let mut state = FxHasher::default();`
			`val.hash(&mut state);`
			`state.finish()`
			`}`
Separate the query cache from the query state. 2021-02-06 06:49:08 -06:00
			/// Get a shard with a pre-computed hash value. If `get_shard_by_value` is
			/// ever used in combination with `get_shard_by_hash` on a single `Sharded`
			/// instance, then `hash` must be computed with `FxHasher`. Otherwise,
			/// `hash` can be computed with any hasher, so long as that hasher is used
			/// consistently for each `Sharded` instance.
			`#[inline]`
Use only one shard with a single thread 2020-10-30 21:14:32 -05:00			`fn get_shard_hash(hash: u64) -> usize {`
Separate the query cache from the query state. 2021-02-06 06:49:08 -06:00			`let hash_len = mem::size_of::<usize>();`
			`// Ignore the top 7 bits as hashbrown uses these and get the next SHARD_BITS highest bits.`
			`// hashbrown also uses the lowest bits, so we can't use those`
Use only one shard with a single thread 2020-10-30 21:14:32 -05:00			`(hash >> (hash_len * 8 - 7 - SHARD_BITS)) as usize`
Separate the query cache from the query state. 2021-02-06 06:49:08 -06:00			`}`