Auto merge of #115388 - Zoxc:sharded-lock, r=SparrowLii

Add optimized lock methods for `Sharded` and refactor `Lock`

This adds methods to `Sharded` which pick a shard and also locks it. These branch on parallelism just once instead of twice, improving performance.

Benchmark for `cfg(parallel_compiler)` and 1 thread:
<table><tr><td rowspan="2">Benchmark</td><td colspan="1"><b>Before</b></th><td colspan="2"><b>After</b></th></tr><tr><td align="right">Time</td><td align="right">Time</td><td align="right">%</th></tr><tr><td>🟣 <b>clap</b>:check</td><td align="right">1.6461s</td><td align="right">1.6345s</td><td align="right"> -0.70%</td></tr><tr><td>🟣 <b>hyper</b>:check</td><td align="right">0.2414s</td><td align="right">0.2394s</td><td align="right"> -0.83%</td></tr><tr><td>🟣 <b>regex</b>:check</td><td align="right">0.9205s</td><td align="right">0.9143s</td><td align="right"> -0.67%</td></tr><tr><td>🟣 <b>syn</b>:check</td><td align="right">1.4981s</td><td align="right">1.4869s</td><td align="right"> -0.75%</td></tr><tr><td>🟣 <b>syntex_syntax</b>:check</td><td align="right">5.7629s</td><td align="right">5.7256s</td><td align="right"> -0.65%</td></tr><tr><td>Total</td><td align="right">10.0690s</td><td align="right">10.0008s</td><td align="right"> -0.68%</td></tr><tr><td>Summary</td><td align="right">1.0000s</td><td align="right">0.9928s</td><td align="right"> -0.72%</td></tr></table>

cc `@SparrowLii`
This commit is contained in:
bors 2023-09-11 01:43:29 +00:00
commit 9b72cc9abf
6 changed files with 267 additions and 225 deletions

View File

@ -1,7 +1,7 @@
use crate::fx::{FxHashMap, FxHasher};
#[cfg(parallel_compiler)]
use crate::sync::{is_dyn_thread_safe, CacheAligned};
use crate::sync::{Lock, LockGuard};
use crate::sync::{Lock, LockGuard, Mode};
#[cfg(parallel_compiler)]
use itertools::Either;
use std::borrow::Borrow;
@ -73,6 +73,56 @@ pub fn get_shard_by_index(&self, _i: usize) -> &Lock<T> {
}
}
/// The shard is selected by hashing `val` with `FxHasher`.
#[inline]
#[track_caller]
pub fn lock_shard_by_value<K: Hash + ?Sized>(&self, _val: &K) -> LockGuard<'_, T> {
match self {
Self::Single(single) => {
// Syncronization is disabled so use the `lock_assume_no_sync` method optimized
// for that case.
// SAFETY: We know `is_dyn_thread_safe` was false when creating the lock thus
// `might_be_dyn_thread_safe` was also false.
unsafe { single.lock_assume(Mode::NoSync) }
}
#[cfg(parallel_compiler)]
Self::Shards(..) => self.lock_shard_by_hash(make_hash(_val)),
}
}
#[inline]
#[track_caller]
pub fn lock_shard_by_hash(&self, hash: u64) -> LockGuard<'_, T> {
self.lock_shard_by_index(get_shard_hash(hash))
}
#[inline]
#[track_caller]
pub fn lock_shard_by_index(&self, _i: usize) -> LockGuard<'_, T> {
match self {
Self::Single(single) => {
// Syncronization is disabled so use the `lock_assume_no_sync` method optimized
// for that case.
// SAFETY: We know `is_dyn_thread_safe` was false when creating the lock thus
// `might_be_dyn_thread_safe` was also false.
unsafe { single.lock_assume(Mode::NoSync) }
}
#[cfg(parallel_compiler)]
Self::Shards(shards) => {
// Syncronization is enabled so use the `lock_assume_sync` method optimized
// for that case.
// SAFETY (get_unchecked): The index gets ANDed with the shard mask, ensuring it is
// always inbounds.
// SAFETY (lock_assume_sync): We know `is_dyn_thread_safe` was true when creating
// the lock thus `might_be_dyn_thread_safe` was also true.
unsafe { shards.get_unchecked(_i & (SHARDS - 1)).0.lock_assume(Mode::Sync) }
}
}
}
#[inline]
pub fn lock_shards(&self) -> impl Iterator<Item = LockGuard<'_, T>> {
match self {
@ -124,7 +174,7 @@ pub fn intern_ref<Q: ?Sized>(&self, value: &Q, make: impl FnOnce() -> K) -> K
Q: Hash + Eq,
{
let hash = make_hash(value);
let mut shard = self.get_shard_by_hash(hash).lock();
let mut shard = self.lock_shard_by_hash(hash);
let entry = shard.raw_entry_mut().from_key_hashed_nocheck(hash, value);
match entry {
@ -144,7 +194,7 @@ pub fn intern<Q>(&self, value: Q, make: impl FnOnce(Q) -> K) -> K
Q: Hash + Eq,
{
let hash = make_hash(&value);
let mut shard = self.get_shard_by_hash(hash).lock();
let mut shard = self.lock_shard_by_hash(hash);
let entry = shard.raw_entry_mut().from_key_hashed_nocheck(hash, &value);
match entry {
@ -166,7 +216,7 @@ pub trait IntoPointer {
impl<K: Eq + Hash + Copy + IntoPointer> ShardedHashMap<K, ()> {
pub fn contains_pointer_to<T: Hash + IntoPointer>(&self, value: &T) -> bool {
let hash = make_hash(&value);
let shard = self.get_shard_by_hash(hash).lock();
let shard = self.lock_shard_by_hash(hash);
let value = value.into_pointer();
shard.raw_entry().from_hash(hash, |entry| entry.into_pointer() == value).is_some()
}

View File

@ -49,7 +49,7 @@
use std::panic::{catch_unwind, resume_unwind, AssertUnwindSafe};
mod lock;
pub use lock::{Lock, LockGuard};
pub use lock::{Lock, LockGuard, Mode};
mod worker_local;
pub use worker_local::{Registry, WorkerLocal};
@ -86,7 +86,6 @@ pub fn is_dyn_thread_safe() -> bool {
// Whether thread safety might be enabled.
#[inline]
#[cfg(parallel_compiler)]
pub fn might_be_dyn_thread_safe() -> bool {
DYN_THREAD_SAFE_MODE.load(Ordering::Relaxed) != DYN_NOT_THREAD_SAFE
}

View File

@ -3,224 +3,229 @@
//!
//! When `cfg(parallel_compiler)` is not set, the lock is instead a wrapper around `RefCell`.
#![allow(dead_code)]
use std::fmt;
#[cfg(parallel_compiler)]
pub use maybe_sync::*;
#[cfg(not(parallel_compiler))]
use std::cell::RefCell;
#[cfg(parallel_compiler)]
use {
crate::cold_path,
crate::sync::DynSend,
crate::sync::DynSync,
parking_lot::lock_api::RawMutex,
std::cell::Cell,
std::cell::UnsafeCell,
std::fmt,
std::intrinsics::{likely, unlikely},
std::marker::PhantomData,
std::mem::ManuallyDrop,
std::ops::{Deref, DerefMut},
};
pub use no_sync::*;
#[cfg(not(parallel_compiler))]
pub use std::cell::RefMut as LockGuard;
#[cfg(not(parallel_compiler))]
#[derive(Debug)]
pub struct Lock<T>(RefCell<T>);
#[cfg(not(parallel_compiler))]
impl<T> Lock<T> {
#[inline(always)]
pub fn new(inner: T) -> Self {
Lock(RefCell::new(inner))
}
#[inline(always)]
pub fn into_inner(self) -> T {
self.0.into_inner()
}
#[inline(always)]
pub fn get_mut(&mut self) -> &mut T {
self.0.get_mut()
}
#[inline(always)]
pub fn try_lock(&self) -> Option<LockGuard<'_, T>> {
self.0.try_borrow_mut().ok()
}
#[inline(always)]
#[track_caller]
pub fn lock(&self) -> LockGuard<'_, T> {
self.0.borrow_mut()
}
#[derive(Clone, Copy, PartialEq)]
pub enum Mode {
NoSync,
Sync,
}
/// A guard holding mutable access to a `Lock` which is in a locked state.
#[cfg(parallel_compiler)]
#[must_use = "if unused the Lock will immediately unlock"]
pub struct LockGuard<'a, T> {
lock: &'a Lock<T>,
marker: PhantomData<&'a mut T>,
}
mod maybe_sync {
use super::Mode;
use crate::sync::mode;
#[cfg(parallel_compiler)]
use crate::sync::{DynSend, DynSync};
use parking_lot::lock_api::RawMutex as _;
use parking_lot::RawMutex;
use std::cell::Cell;
use std::cell::UnsafeCell;
use std::intrinsics::unlikely;
use std::marker::PhantomData;
use std::mem::ManuallyDrop;
use std::ops::{Deref, DerefMut};
#[cfg(parallel_compiler)]
impl<'a, T: 'a> Deref for LockGuard<'a, T> {
type Target = T;
#[inline]
fn deref(&self) -> &T {
// SAFETY: We have shared access to the mutable access owned by this type,
// so we can give out a shared reference.
unsafe { &*self.lock.data.get() }
/// A guard holding mutable access to a `Lock` which is in a locked state.
#[must_use = "if unused the Lock will immediately unlock"]
pub struct LockGuard<'a, T> {
lock: &'a Lock<T>,
marker: PhantomData<&'a mut T>,
/// The syncronization mode of the lock. This is explicitly passed to let LLVM relate it
/// to the original lock operation.
mode: Mode,
}
}
#[cfg(parallel_compiler)]
impl<'a, T: 'a> DerefMut for LockGuard<'a, T> {
#[inline]
fn deref_mut(&mut self) -> &mut T {
// SAFETY: We have mutable access to the data so we can give out a mutable reference.
unsafe { &mut *self.lock.data.get() }
}
}
#[cfg(parallel_compiler)]
impl<'a, T: 'a> Drop for LockGuard<'a, T> {
#[inline]
fn drop(&mut self) {
// SAFETY: We know that the lock is in a locked
// state because it is a invariant of this type.
unsafe { self.lock.raw.unlock() };
}
}
#[cfg(parallel_compiler)]
union LockRawUnion {
/// Indicates if the cell is locked. Only used if `LockRaw.sync` is false.
cell: ManuallyDrop<Cell<bool>>,
/// A lock implementation that's only used if `LockRaw.sync` is true.
lock: ManuallyDrop<parking_lot::RawMutex>,
}
/// A raw lock which only uses synchronization if `might_be_dyn_thread_safe` is true.
/// It contains no associated data and is used in the implementation of `Lock` which does have such data.
///
/// A manual implementation of a tagged union is used with the `sync` field and the `LockRawUnion` instead
/// of using enums as it results in better code generation.
#[cfg(parallel_compiler)]
struct LockRaw {
/// Indicates if synchronization is used via `opt.lock` if true,
/// or if a non-thread safe cell is used via `opt.cell`. This is set on initialization and never changed.
sync: bool,
opt: LockRawUnion,
}
#[cfg(parallel_compiler)]
impl LockRaw {
fn new() -> Self {
if unlikely(super::mode::might_be_dyn_thread_safe()) {
// Create the lock with synchronization enabled using the `RawMutex` type.
LockRaw {
sync: true,
opt: LockRawUnion { lock: ManuallyDrop::new(parking_lot::RawMutex::INIT) },
}
} else {
// Create the lock with synchronization disabled.
LockRaw { sync: false, opt: LockRawUnion { cell: ManuallyDrop::new(Cell::new(false)) } }
impl<'a, T: 'a> Deref for LockGuard<'a, T> {
type Target = T;
#[inline]
fn deref(&self) -> &T {
// SAFETY: We have shared access to the mutable access owned by this type,
// so we can give out a shared reference.
unsafe { &*self.lock.data.get() }
}
}
#[inline(always)]
fn try_lock(&self) -> bool {
// SAFETY: This is safe since the union fields are used in accordance with `self.sync`.
unsafe {
if likely(!self.sync) {
if self.opt.cell.get() {
false
} else {
self.opt.cell.set(true);
true
impl<'a, T: 'a> DerefMut for LockGuard<'a, T> {
#[inline]
fn deref_mut(&mut self) -> &mut T {
// SAFETY: We have mutable access to the data so we can give out a mutable reference.
unsafe { &mut *self.lock.data.get() }
}
}
impl<'a, T: 'a> Drop for LockGuard<'a, T> {
#[inline]
fn drop(&mut self) {
// SAFETY (union access): We get `self.mode` from the lock operation so it is consistent
// with the `lock.mode` state. This means we access the right union fields.
match self.mode {
Mode::NoSync => {
let cell = unsafe { &self.lock.mode_union.no_sync };
debug_assert_eq!(cell.get(), true);
cell.set(false);
}
} else {
self.opt.lock.try_lock()
// SAFETY (unlock): We know that the lock is locked as this type is a proof of that.
Mode::Sync => unsafe { self.lock.mode_union.sync.unlock() },
}
}
}
#[inline(always)]
fn lock(&self) {
if super::ERROR_CHECKING {
// We're in the debugging mode, so assert that the lock is not held so we
// get a panic instead of waiting for the lock.
assert_eq!(self.try_lock(), true, "lock must not be hold");
} else {
// SAFETY: This is safe since the union fields are used in accordance with `self.sync`.
unsafe {
if likely(!self.sync) {
if unlikely(self.opt.cell.replace(true)) {
cold_path(|| panic!("lock was already held"))
union ModeUnion {
/// Indicates if the cell is locked. Only used if `Lock.mode` is `NoSync`.
no_sync: ManuallyDrop<Cell<bool>>,
/// A lock implementation that's only used if `Lock.mode` is `Sync`.
sync: ManuallyDrop<RawMutex>,
}
/// The value representing a locked state for the `Cell`.
const LOCKED: bool = true;
/// A lock which only uses synchronization if `might_be_dyn_thread_safe` is true.
/// It implements `DynSend` and `DynSync` instead of the typical `Send` and `Sync`.
pub struct Lock<T> {
/// Indicates if synchronization is used via `mode_union.sync` if it's `Sync`, or if a
/// not thread safe cell is used via `mode_union.no_sync` if it's `NoSync`.
/// This is set on initialization and never changed.
mode: Mode,
mode_union: ModeUnion,
data: UnsafeCell<T>,
}
impl<T> Lock<T> {
#[inline(always)]
pub fn new(inner: T) -> Self {
let (mode, mode_union) = if unlikely(mode::might_be_dyn_thread_safe()) {
// Create the lock with synchronization enabled using the `RawMutex` type.
(Mode::Sync, ModeUnion { sync: ManuallyDrop::new(RawMutex::INIT) })
} else {
// Create the lock with synchronization disabled.
(Mode::NoSync, ModeUnion { no_sync: ManuallyDrop::new(Cell::new(!LOCKED)) })
};
Lock { mode, mode_union, data: UnsafeCell::new(inner) }
}
#[inline(always)]
pub fn into_inner(self) -> T {
self.data.into_inner()
}
#[inline(always)]
pub fn get_mut(&mut self) -> &mut T {
self.data.get_mut()
}
#[inline(always)]
pub fn try_lock(&self) -> Option<LockGuard<'_, T>> {
let mode = self.mode;
// SAFETY: This is safe since the union fields are used in accordance with `self.mode`.
match mode {
Mode::NoSync => {
let cell = unsafe { &self.mode_union.no_sync };
let was_unlocked = cell.get() != LOCKED;
if was_unlocked {
cell.set(LOCKED);
}
} else {
self.opt.lock.lock();
was_unlocked
}
Mode::Sync => unsafe { self.mode_union.sync.try_lock() },
}
.then(|| LockGuard { lock: self, marker: PhantomData, mode })
}
/// This acquires the lock assuming syncronization is in a specific mode.
///
/// Safety
/// This method must only be called with `Mode::Sync` if `might_be_dyn_thread_safe` was
/// true on lock creation.
#[inline(always)]
#[track_caller]
pub unsafe fn lock_assume(&self, mode: Mode) -> LockGuard<'_, T> {
#[inline(never)]
#[track_caller]
#[cold]
fn lock_held() -> ! {
panic!("lock was already held")
}
// SAFETY: This is safe since the union fields are used in accordance with `mode`
// which also must match `self.mode` due to the safety precondition.
unsafe {
match mode {
Mode::NoSync => {
if unlikely(self.mode_union.no_sync.replace(LOCKED) == LOCKED) {
lock_held()
}
}
Mode::Sync => self.mode_union.sync.lock(),
}
}
LockGuard { lock: self, marker: PhantomData, mode }
}
#[inline(always)]
#[track_caller]
pub fn lock(&self) -> LockGuard<'_, T> {
unsafe { self.lock_assume(self.mode) }
}
}
/// This unlocks the lock.
///
/// Safety
/// This method may only be called if the lock is currently held.
#[inline(always)]
unsafe fn unlock(&self) {
// SAFETY: The union use is safe since the union fields are used in accordance with
// `self.sync` and the `unlock` method precondition is upheld by the caller.
unsafe {
if likely(!self.sync) {
debug_assert_eq!(self.opt.cell.get(), true);
self.opt.cell.set(false);
} else {
self.opt.lock.unlock();
}
#[cfg(parallel_compiler)]
unsafe impl<T: DynSend> DynSend for Lock<T> {}
#[cfg(parallel_compiler)]
unsafe impl<T: DynSend> DynSync for Lock<T> {}
}
mod no_sync {
use super::Mode;
use std::cell::RefCell;
pub use std::cell::RefMut as LockGuard;
pub struct Lock<T>(RefCell<T>);
impl<T> Lock<T> {
#[inline(always)]
pub fn new(inner: T) -> Self {
Lock(RefCell::new(inner))
}
}
}
/// A lock which only uses synchronization if `might_be_dyn_thread_safe` is true.
/// It implements `DynSend` and `DynSync` instead of the typical `Send` and `Sync`.
#[cfg(parallel_compiler)]
pub struct Lock<T> {
raw: LockRaw,
data: UnsafeCell<T>,
}
#[inline(always)]
pub fn into_inner(self) -> T {
self.0.into_inner()
}
#[cfg(parallel_compiler)]
impl<T> Lock<T> {
#[inline(always)]
pub fn new(inner: T) -> Self {
Lock { raw: LockRaw::new(), data: UnsafeCell::new(inner) }
}
#[inline(always)]
pub fn get_mut(&mut self) -> &mut T {
self.0.get_mut()
}
#[inline(always)]
pub fn into_inner(self) -> T {
self.data.into_inner()
}
#[inline(always)]
pub fn try_lock(&self) -> Option<LockGuard<'_, T>> {
self.0.try_borrow_mut().ok()
}
#[inline(always)]
pub fn get_mut(&mut self) -> &mut T {
self.data.get_mut()
}
#[inline(always)]
#[track_caller]
// This is unsafe to match the API for the `parallel_compiler` case.
pub unsafe fn lock_assume(&self, _mode: Mode) -> LockGuard<'_, T> {
self.0.borrow_mut()
}
#[inline(always)]
pub fn try_lock(&self) -> Option<LockGuard<'_, T>> {
if self.raw.try_lock() { Some(LockGuard { lock: self, marker: PhantomData }) } else { None }
}
#[inline(always)]
pub fn lock(&self) -> LockGuard<'_, T> {
self.raw.lock();
LockGuard { lock: self, marker: PhantomData }
#[inline(always)]
#[track_caller]
pub fn lock(&self) -> LockGuard<'_, T> {
self.0.borrow_mut()
}
}
}
@ -244,12 +249,13 @@ pub fn borrow_mut(&self) -> LockGuard<'_, T> {
}
}
#[cfg(parallel_compiler)]
unsafe impl<T: DynSend> DynSend for Lock<T> {}
#[cfg(parallel_compiler)]
unsafe impl<T: DynSend> DynSync for Lock<T> {}
impl<T: Default> Default for Lock<T> {
#[inline]
fn default() -> Self {
Lock::new(T::default())
}
}
#[cfg(parallel_compiler)]
impl<T: fmt::Debug> fmt::Debug for Lock<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.try_lock() {
@ -267,10 +273,3 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
}
}
}
impl<T: Default> Default for Lock<T> {
#[inline]
fn default() -> Self {
Lock::new(T::default())
}
}

View File

@ -629,12 +629,7 @@ pub fn dep_node_index_of_opt(&self, dep_node: &DepNode<K>) -> Option<DepNodeInde
if let Some(prev_index) = self.previous.node_to_index_opt(dep_node) {
self.current.prev_index_to_index.lock()[prev_index]
} else {
self.current
.new_node_to_index
.get_shard_by_value(dep_node)
.lock()
.get(dep_node)
.copied()
self.current.new_node_to_index.lock_shard_by_value(dep_node).get(dep_node).copied()
}
}
@ -1201,8 +1196,7 @@ fn intern_new_node(
edges: EdgesVec,
current_fingerprint: Fingerprint,
) -> DepNodeIndex {
let dep_node_index = match self.new_node_to_index.get_shard_by_value(&key).lock().entry(key)
{
let dep_node_index = match self.new_node_to_index.lock_shard_by_value(&key).entry(key) {
Entry::Occupied(entry) => *entry.get(),
Entry::Vacant(entry) => {
let dep_node_index =
@ -1328,7 +1322,7 @@ fn debug_assert_not_in_new_nodes(
) {
let node = &prev_graph.index_to_node(prev_index);
debug_assert!(
!self.new_node_to_index.get_shard_by_value(node).lock().contains_key(node),
!self.new_node_to_index.lock_shard_by_value(node).contains_key(node),
"node from previous graph present in new node collection"
);
}

View File

@ -55,7 +55,7 @@ impl<K, V> QueryCache for DefaultCache<K, V>
#[inline(always)]
fn lookup(&self, key: &K) -> Option<(V, DepNodeIndex)> {
let key_hash = sharded::make_hash(key);
let lock = self.cache.get_shard_by_hash(key_hash).lock();
let lock = self.cache.lock_shard_by_hash(key_hash);
let result = lock.raw_entry().from_key_hashed_nocheck(key_hash, key);
if let Some((_, value)) = result { Some(*value) } else { None }
@ -63,7 +63,7 @@ fn lookup(&self, key: &K) -> Option<(V, DepNodeIndex)> {
#[inline]
fn complete(&self, key: K, value: V, index: DepNodeIndex) {
let mut lock = self.cache.get_shard_by_value(&key).lock();
let mut lock = self.cache.lock_shard_by_value(&key);
// We may be overwriting another value. This is all right, since the dep-graph
// will check that the fingerprint matches.
lock.insert(key, (value, index));
@ -148,13 +148,13 @@ impl<K, V> QueryCache for VecCache<K, V>
#[inline(always)]
fn lookup(&self, key: &K) -> Option<(V, DepNodeIndex)> {
let lock = self.cache.get_shard_by_hash(key.index() as u64).lock();
let lock = self.cache.lock_shard_by_hash(key.index() as u64);
if let Some(Some(value)) = lock.get(*key) { Some(*value) } else { None }
}
#[inline]
fn complete(&self, key: K, value: V, index: DepNodeIndex) {
let mut lock = self.cache.get_shard_by_hash(key.index() as u64).lock();
let mut lock = self.cache.lock_shard_by_hash(key.index() as u64);
lock.insert(key, (value, index));
}

View File

@ -158,7 +158,7 @@ fn complete<C>(self, cache: &C, result: C::Value, dep_node_index: DepNodeIndex)
cache.complete(key, result, dep_node_index);
let job = {
let mut lock = state.active.get_shard_by_value(&key).lock();
let mut lock = state.active.lock_shard_by_value(&key);
match lock.remove(&key).unwrap() {
QueryResult::Started(job) => job,
QueryResult::Poisoned => panic!(),
@ -180,7 +180,7 @@ fn drop(&mut self) {
// Poison the query so jobs waiting on it panic.
let state = self.state;
let job = {
let mut shard = state.active.get_shard_by_value(&self.key).lock();
let mut shard = state.active.lock_shard_by_value(&self.key);
let job = match shard.remove(&self.key).unwrap() {
QueryResult::Started(job) => job,
QueryResult::Poisoned => panic!(),
@ -303,7 +303,7 @@ fn try_execute_query<Q, Qcx, const INCR: bool>(
Qcx: QueryContext,
{
let state = query.query_state(qcx);
let mut state_lock = state.active.get_shard_by_value(&key).lock();
let mut state_lock = state.active.lock_shard_by_value(&key);
// For the parallel compiler we need to check both the query cache and query state structures
// while holding the state lock to ensure that 1) the query has not yet completed and 2) the