Rollup merge of #126953 - joboet:lazy_key, r=jhpratt

std: separate TLS key creation from TLS access

Currently, `std` performs an atomic load to get the OS key on every access to `StaticKey` even when the key is already known. This PR thus replaces `StaticKey` with the platform-specific `get` and `set` function and a new `LazyKey` type that acts as a `LazyLock<Key>`, allowing the reuse of the retreived key for multiple accesses.

Related to #110897.
This commit is contained in:
Matthias Krüger 2024-06-29 09:14:56 +02:00 committed by GitHub
commit 1e39eb7d53
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 115 additions and 128 deletions

View File

@ -4,15 +4,15 @@
use crate::ptr; use crate::ptr;
use crate::sys::thread_local::destructors; use crate::sys::thread_local::destructors;
use crate::sys::thread_local::key::StaticKey; use crate::sys::thread_local::key::{set, LazyKey};
pub fn enable() { pub fn enable() {
static DTORS: StaticKey = StaticKey::new(Some(run)); static DTORS: LazyKey = LazyKey::new(Some(run));
// Setting the key value to something other than NULL will result in the // Setting the key value to something other than NULL will result in the
// destructor being run at thread exit. // destructor being run at thread exit.
unsafe { unsafe {
DTORS.set(ptr::without_provenance_mut(1)); set(DTORS.force(), ptr::without_provenance_mut(1));
} }
unsafe extern "C" fn run(_: *mut u8) { unsafe extern "C" fn run(_: *mut u8) {

View File

@ -1,4 +1,4 @@
//! A `StaticKey` implementation using racy initialization. //! A `LazyKey` implementation using racy initialization.
//! //!
//! Unfortunately, none of the platforms currently supported by `std` allows //! Unfortunately, none of the platforms currently supported by `std` allows
//! creating TLS keys at compile-time. Thus we need a way to lazily create keys. //! creating TLS keys at compile-time. Thus we need a way to lazily create keys.
@ -10,34 +10,12 @@ use crate::sync::atomic::{self, AtomicUsize, Ordering};
/// A type for TLS keys that are statically allocated. /// A type for TLS keys that are statically allocated.
/// ///
/// This type is entirely `unsafe` to use as it does not protect against /// This is basically a `LazyLock<Key>`, but avoids blocking and circular
/// use-after-deallocation or use-during-deallocation. /// dependencies with the rest of `std`.
/// pub struct LazyKey {
/// The actual OS-TLS key is lazily allocated when this is used for the first
/// time. The key is also deallocated when the Rust runtime exits or `destroy`
/// is called, whichever comes first.
///
/// # Examples
///
/// ```ignore (cannot-doctest-private-modules)
/// use tls::os::{StaticKey, INIT};
///
/// // Use a regular global static to store the key.
/// static KEY: StaticKey = INIT;
///
/// // The state provided via `get` and `set` is thread-local.
/// unsafe {
/// assert!(KEY.get().is_null());
/// KEY.set(1 as *mut u8);
/// }
/// ```
pub struct StaticKey {
/// Inner static TLS key (internals). /// Inner static TLS key (internals).
key: AtomicUsize, key: AtomicUsize,
/// Destructor for the TLS value. /// Destructor for the TLS value.
///
/// See `Key::new` for information about when the destructor runs and how
/// it runs.
dtor: Option<unsafe extern "C" fn(*mut u8)>, dtor: Option<unsafe extern "C" fn(*mut u8)>,
} }
@ -51,32 +29,14 @@ const KEY_SENTVAL: usize = 0;
#[cfg(target_os = "nto")] #[cfg(target_os = "nto")]
const KEY_SENTVAL: usize = libc::PTHREAD_KEYS_MAX + 1; const KEY_SENTVAL: usize = libc::PTHREAD_KEYS_MAX + 1;
impl StaticKey { impl LazyKey {
#[rustc_const_unstable(feature = "thread_local_internals", issue = "none")] #[rustc_const_unstable(feature = "thread_local_internals", issue = "none")]
pub const fn new(dtor: Option<unsafe extern "C" fn(*mut u8)>) -> StaticKey { pub const fn new(dtor: Option<unsafe extern "C" fn(*mut u8)>) -> LazyKey {
StaticKey { key: atomic::AtomicUsize::new(KEY_SENTVAL), dtor } LazyKey { key: atomic::AtomicUsize::new(KEY_SENTVAL), dtor }
}
/// Gets the value associated with this TLS key
///
/// This will lazily allocate a TLS key from the OS if one has not already
/// been allocated.
#[inline]
pub unsafe fn get(&self) -> *mut u8 {
unsafe { super::get(self.key()) }
}
/// Sets this TLS key to a new value.
///
/// This will lazily allocate a TLS key from the OS if one has not already
/// been allocated.
#[inline]
pub unsafe fn set(&self, val: *mut u8) {
unsafe { super::set(self.key(), val) }
} }
#[inline] #[inline]
fn key(&self) -> super::Key { pub fn force(&self) -> super::Key {
match self.key.load(Ordering::Acquire) { match self.key.load(Ordering::Acquire) {
KEY_SENTVAL => self.lazy_init() as super::Key, KEY_SENTVAL => self.lazy_init() as super::Key,
n => n as super::Key, n => n as super::Key,

View File

@ -1,18 +1,25 @@
use super::StaticKey; use super::{get, set, LazyKey};
use crate::ptr; use crate::ptr;
#[test] #[test]
fn smoke() { fn smoke() {
static K1: StaticKey = StaticKey::new(None); static K1: LazyKey = LazyKey::new(None);
static K2: StaticKey = StaticKey::new(None); static K2: LazyKey = LazyKey::new(None);
let k1 = K1.force();
let k2 = K2.force();
assert_ne!(k1, k2);
assert_eq!(K1.force(), k1);
assert_eq!(K2.force(), k2);
unsafe { unsafe {
assert!(K1.get().is_null()); assert!(get(k1).is_null());
assert!(K2.get().is_null()); assert!(get(k2).is_null());
K1.set(ptr::without_provenance_mut(1)); set(k1, ptr::without_provenance_mut(1));
K2.set(ptr::without_provenance_mut(2)); set(k2, ptr::without_provenance_mut(2));
assert_eq!(K1.get() as usize, 1); assert_eq!(get(k1) as usize, 1);
assert_eq!(K2.get() as usize, 2); assert_eq!(get(k2) as usize, 2);
} }
} }
@ -26,25 +33,27 @@ fn destructors() {
drop(unsafe { Arc::from_raw(ptr as *const ()) }); drop(unsafe { Arc::from_raw(ptr as *const ()) });
} }
static KEY: StaticKey = StaticKey::new(Some(destruct)); static KEY: LazyKey = LazyKey::new(Some(destruct));
let shared1 = Arc::new(()); let shared1 = Arc::new(());
let shared2 = Arc::clone(&shared1); let shared2 = Arc::clone(&shared1);
let key = KEY.force();
unsafe { unsafe {
assert!(KEY.get().is_null()); assert!(get(key).is_null());
KEY.set(Arc::into_raw(shared1) as *mut u8); set(key, Arc::into_raw(shared1) as *mut u8);
} }
thread::spawn(move || unsafe { thread::spawn(move || unsafe {
assert!(KEY.get().is_null()); let key = KEY.force();
KEY.set(Arc::into_raw(shared2) as *mut u8); assert!(get(key).is_null());
set(key, Arc::into_raw(shared2) as *mut u8);
}) })
.join() .join()
.unwrap(); .unwrap();
// Leak the Arc, let the TLS destructor clean it up. // Leak the Arc, let the TLS destructor clean it up.
let shared1 = unsafe { ManuallyDrop::new(Arc::from_raw(KEY.get() as *const ())) }; let shared1 = unsafe { ManuallyDrop::new(Arc::from_raw(get(key) as *const ())) };
assert_eq!( assert_eq!(
Arc::strong_count(&shared1), Arc::strong_count(&shared1),
1, 1,

View File

@ -16,6 +16,7 @@ pub unsafe fn set(key: Key, value: *mut u8) {
} }
#[inline] #[inline]
#[cfg(any(not(target_thread_local), test))]
pub unsafe fn get(key: Key) -> *mut u8 { pub unsafe fn get(key: Key) -> *mut u8 {
unsafe { libc::pthread_getspecific(key) as *mut u8 } unsafe { libc::pthread_getspecific(key) as *mut u8 }
} }

View File

@ -1,4 +1,4 @@
//! Implementation of `StaticKey` for Windows. //! Implementation of `LazyKey` for Windows.
//! //!
//! Windows has no native support for running destructors so we manage our own //! Windows has no native support for running destructors so we manage our own
//! list of destructors to keep track of how to destroy keys. We then install a //! list of destructors to keep track of how to destroy keys. We then install a
@ -13,9 +13,9 @@
//! don't reach a fixed point after a short while then we just inevitably leak //! don't reach a fixed point after a short while then we just inevitably leak
//! something. //! something.
//! //!
//! The list is implemented as an atomic single-linked list of `StaticKey`s and //! The list is implemented as an atomic single-linked list of `LazyKey`s and
//! does not support unregistration. Unfortunately, this means that we cannot //! does not support unregistration. Unfortunately, this means that we cannot
//! use racy initialization for creating the keys in `StaticKey`, as that could //! use racy initialization for creating the keys in `LazyKey`, as that could
//! result in destructors being missed. Hence, we synchronize the creation of //! result in destructors being missed. Hence, we synchronize the creation of
//! keys with destructors through [`INIT_ONCE`](c::INIT_ONCE) (`std`'s //! keys with destructors through [`INIT_ONCE`](c::INIT_ONCE) (`std`'s
//! [`Once`](crate::sync::Once) cannot be used since it might use TLS itself). //! [`Once`](crate::sync::Once) cannot be used since it might use TLS itself).
@ -33,26 +33,26 @@ use crate::sync::atomic::{
use crate::sys::c; use crate::sys::c;
use crate::sys::thread_local::guard; use crate::sys::thread_local::guard;
type Key = c::DWORD; pub type Key = c::DWORD;
type Dtor = unsafe extern "C" fn(*mut u8); type Dtor = unsafe extern "C" fn(*mut u8);
pub struct StaticKey { pub struct LazyKey {
/// The key value shifted up by one. Since TLS_OUT_OF_INDEXES == DWORD::MAX /// The key value shifted up by one. Since TLS_OUT_OF_INDEXES == DWORD::MAX
/// is not a valid key value, this allows us to use zero as sentinel value /// is not a valid key value, this allows us to use zero as sentinel value
/// without risking overflow. /// without risking overflow.
key: AtomicU32, key: AtomicU32,
dtor: Option<Dtor>, dtor: Option<Dtor>,
next: AtomicPtr<StaticKey>, next: AtomicPtr<LazyKey>,
/// Currently, destructors cannot be unregistered, so we cannot use racy /// Currently, destructors cannot be unregistered, so we cannot use racy
/// initialization for keys. Instead, we need synchronize initialization. /// initialization for keys. Instead, we need synchronize initialization.
/// Use the Windows-provided `Once` since it does not require TLS. /// Use the Windows-provided `Once` since it does not require TLS.
once: UnsafeCell<c::INIT_ONCE>, once: UnsafeCell<c::INIT_ONCE>,
} }
impl StaticKey { impl LazyKey {
#[inline] #[inline]
pub const fn new(dtor: Option<Dtor>) -> StaticKey { pub const fn new(dtor: Option<Dtor>) -> LazyKey {
StaticKey { LazyKey {
key: AtomicU32::new(0), key: AtomicU32::new(0),
dtor, dtor,
next: AtomicPtr::new(ptr::null_mut()), next: AtomicPtr::new(ptr::null_mut()),
@ -61,18 +61,7 @@ impl StaticKey {
} }
#[inline] #[inline]
pub unsafe fn set(&'static self, val: *mut u8) { pub fn force(&'static self) -> Key {
let r = unsafe { c::TlsSetValue(self.key(), val.cast()) };
debug_assert_eq!(r, c::TRUE);
}
#[inline]
pub unsafe fn get(&'static self) -> *mut u8 {
unsafe { c::TlsGetValue(self.key()).cast() }
}
#[inline]
fn key(&'static self) -> Key {
match self.key.load(Acquire) { match self.key.load(Acquire) {
0 => unsafe { self.init() }, 0 => unsafe { self.init() },
key => key - 1, key => key - 1,
@ -141,17 +130,28 @@ impl StaticKey {
} }
} }
unsafe impl Send for StaticKey {} unsafe impl Send for LazyKey {}
unsafe impl Sync for StaticKey {} unsafe impl Sync for LazyKey {}
static DTORS: AtomicPtr<StaticKey> = AtomicPtr::new(ptr::null_mut()); #[inline]
pub unsafe fn set(key: Key, val: *mut u8) {
let r = unsafe { c::TlsSetValue(key, val.cast()) };
debug_assert_eq!(r, c::TRUE);
}
#[inline]
pub unsafe fn get(key: Key) -> *mut u8 {
unsafe { c::TlsGetValue(key).cast() }
}
static DTORS: AtomicPtr<LazyKey> = AtomicPtr::new(ptr::null_mut());
/// Should only be called once per key, otherwise loops or breaks may occur in /// Should only be called once per key, otherwise loops or breaks may occur in
/// the linked list. /// the linked list.
unsafe fn register_dtor(key: &'static StaticKey) { unsafe fn register_dtor(key: &'static LazyKey) {
guard::enable(); guard::enable();
let this = <*const StaticKey>::cast_mut(key); let this = <*const LazyKey>::cast_mut(key);
// Use acquire ordering to pass along the changes done by the previously // Use acquire ordering to pass along the changes done by the previously
// registered keys when we store the new head with release ordering. // registered keys when we store the new head with release ordering.
let mut head = DTORS.load(Acquire); let mut head = DTORS.load(Acquire);
@ -176,9 +176,9 @@ pub unsafe fn run_dtors() {
let dtor = unsafe { (*cur).dtor.unwrap() }; let dtor = unsafe { (*cur).dtor.unwrap() };
cur = unsafe { (*cur).next.load(Relaxed) }; cur = unsafe { (*cur).next.load(Relaxed) };
// In StaticKey::init, we register the dtor before setting `key`. // In LazyKey::init, we register the dtor before setting `key`.
// So if one thread's `run_dtors` races with another thread executing `init` on the same // So if one thread's `run_dtors` races with another thread executing `init` on the same
// `StaticKey`, we can encounter a key of 0 here. That means this key was never // `LazyKey`, we can encounter a key of 0 here. That means this key was never
// initialized in this thread so we can safely skip it. // initialized in this thread so we can safely skip it.
if pre_key == 0 { if pre_key == 0 {
continue; continue;

View File

@ -30,7 +30,7 @@
//! really. //! really.
//! //!
//! Perhaps one day we can fold the `Box` here into a static allocation, //! Perhaps one day we can fold the `Box` here into a static allocation,
//! expanding the `StaticKey` structure to contain not only a slot for the TLS //! expanding the `LazyKey` structure to contain not only a slot for the TLS
//! key but also a slot for the destructor queue on windows. An optimization for //! key but also a slot for the destructor queue on windows. An optimization for
//! another day! //! another day!

View File

@ -36,7 +36,7 @@ cfg_if::cfg_if! {
pub use native::{EagerStorage, LazyStorage, thread_local_inner}; pub use native::{EagerStorage, LazyStorage, thread_local_inner};
} else { } else {
mod os; mod os;
pub use os::{Key, thread_local_inner}; pub use os::{Storage, thread_local_inner};
} }
} }
@ -126,28 +126,33 @@ pub(crate) mod key {
mod unix; mod unix;
#[cfg(test)] #[cfg(test)]
mod tests; mod tests;
pub(super) use racy::StaticKey; pub(super) use racy::LazyKey;
use unix::{Key, create, destroy, get, set}; pub(super) use unix::{Key, set};
#[cfg(any(not(target_thread_local), test))]
pub(super) use unix::get;
use unix::{create, destroy};
} else if #[cfg(all(not(target_thread_local), target_os = "windows"))] { } else if #[cfg(all(not(target_thread_local), target_os = "windows"))] {
#[cfg(test)] #[cfg(test)]
mod tests; mod tests;
mod windows; mod windows;
pub(super) use windows::{StaticKey, run_dtors}; pub(super) use windows::{Key, LazyKey, get, run_dtors, set};
} else if #[cfg(all(target_vendor = "fortanix", target_env = "sgx"))] { } else if #[cfg(all(target_vendor = "fortanix", target_env = "sgx"))] {
mod racy; mod racy;
mod sgx; mod sgx;
#[cfg(test)] #[cfg(test)]
mod tests; mod tests;
pub(super) use racy::StaticKey; pub(super) use racy::LazyKey;
use sgx::{Key, create, destroy, get, set}; pub(super) use sgx::{Key, get, set};
use sgx::{create, destroy};
} else if #[cfg(target_os = "xous")] { } else if #[cfg(target_os = "xous")] {
mod racy; mod racy;
#[cfg(test)] #[cfg(test)]
mod tests; mod tests;
mod xous; mod xous;
pub(super) use racy::StaticKey; pub(super) use racy::LazyKey;
pub(crate) use xous::destroy_tls; pub(crate) use xous::destroy_tls;
use xous::{Key, create, destroy, get, set}; pub(super) use xous::{Key, get, set};
use xous::{create, destroy};
} }
} }
} }

View File

@ -2,7 +2,7 @@ use super::abort_on_dtor_unwind;
use crate::cell::Cell; use crate::cell::Cell;
use crate::marker::PhantomData; use crate::marker::PhantomData;
use crate::ptr; use crate::ptr;
use crate::sys::thread_local::key::StaticKey as OsKey; use crate::sys::thread_local::key::{get, set, Key, LazyKey};
#[doc(hidden)] #[doc(hidden)]
#[allow_internal_unstable(thread_local_internals)] #[allow_internal_unstable(thread_local_internals)]
@ -22,12 +22,12 @@ pub macro thread_local_inner {
unsafe { unsafe {
use $crate::thread::LocalKey; use $crate::thread::LocalKey;
use $crate::thread::local_impl::Key; use $crate::thread::local_impl::Storage;
// Inlining does not work on windows-gnu due to linking errors around // Inlining does not work on windows-gnu due to linking errors around
// dllimports. See https://github.com/rust-lang/rust/issues/109797. // dllimports. See https://github.com/rust-lang/rust/issues/109797.
LocalKey::new(#[cfg_attr(windows, inline(never))] |init| { LocalKey::new(#[cfg_attr(windows, inline(never))] |init| {
static VAL: Key<$t> = Key::new(); static VAL: Storage<$t> = Storage::new();
VAL.get(init, __init) VAL.get(init, __init)
}) })
} }
@ -41,22 +41,23 @@ pub macro thread_local_inner {
/// Use a regular global static to store this key; the state provided will then be /// Use a regular global static to store this key; the state provided will then be
/// thread-local. /// thread-local.
#[allow(missing_debug_implementations)] #[allow(missing_debug_implementations)]
pub struct Key<T> { pub struct Storage<T> {
os: OsKey, key: LazyKey,
marker: PhantomData<Cell<T>>, marker: PhantomData<Cell<T>>,
} }
unsafe impl<T> Sync for Key<T> {} unsafe impl<T> Sync for Storage<T> {}
struct Value<T: 'static> { struct Value<T: 'static> {
value: T, value: T,
key: &'static Key<T>, // INVARIANT: if this value is stored under a TLS key, `key` must be that `key`.
key: Key,
} }
impl<T: 'static> Key<T> { impl<T: 'static> Storage<T> {
#[rustc_const_unstable(feature = "thread_local_internals", issue = "none")] #[rustc_const_unstable(feature = "thread_local_internals", issue = "none")]
pub const fn new() -> Key<T> { pub const fn new() -> Storage<T> {
Key { os: OsKey::new(Some(destroy_value::<T>)), marker: PhantomData } Storage { key: LazyKey::new(Some(destroy_value::<T>)), marker: PhantomData }
} }
/// Get a pointer to the TLS value, potentially initializing it with the /// Get a pointer to the TLS value, potentially initializing it with the
@ -66,19 +67,23 @@ impl<T: 'static> Key<T> {
/// The resulting pointer may not be used after reentrant inialialization /// The resulting pointer may not be used after reentrant inialialization
/// or thread destruction has occurred. /// or thread destruction has occurred.
pub fn get(&'static self, i: Option<&mut Option<T>>, f: impl FnOnce() -> T) -> *const T { pub fn get(&'static self, i: Option<&mut Option<T>>, f: impl FnOnce() -> T) -> *const T {
// SAFETY: (FIXME: get should actually be safe) let key = self.key.force();
let ptr = unsafe { self.os.get() as *mut Value<T> }; let ptr = unsafe { get(key) as *mut Value<T> };
if ptr.addr() > 1 { if ptr.addr() > 1 {
// SAFETY: the check ensured the pointer is safe (its destructor // SAFETY: the check ensured the pointer is safe (its destructor
// is not running) + it is coming from a trusted source (self). // is not running) + it is coming from a trusted source (self).
unsafe { &(*ptr).value } unsafe { &(*ptr).value }
} else { } else {
self.try_initialize(ptr, i, f) // SAFETY: trivially correct.
unsafe { Self::try_initialize(key, ptr, i, f) }
} }
} }
fn try_initialize( /// # Safety
&'static self, /// * `key` must be the result of calling `self.key.force()`
/// * `ptr` must be the current value associated with `key`.
unsafe fn try_initialize(
key: Key,
ptr: *mut Value<T>, ptr: *mut Value<T>,
i: Option<&mut Option<T>>, i: Option<&mut Option<T>>,
f: impl FnOnce() -> T, f: impl FnOnce() -> T,
@ -88,14 +93,19 @@ impl<T: 'static> Key<T> {
return ptr::null(); return ptr::null();
} }
let value = i.and_then(Option::take).unwrap_or_else(f); let value = Box::new(Value { value: i.and_then(Option::take).unwrap_or_else(f), key });
let ptr = Box::into_raw(Box::new(Value { value, key: self })); let ptr = Box::into_raw(value);
// SAFETY: (FIXME: get should actually be safe)
let old = unsafe { self.os.get() as *mut Value<T> }; // SAFETY:
// SAFETY: `ptr` is a correct pointer that can be destroyed by the key destructor. // * key came from a `LazyKey` and is thus correct.
unsafe { // * `ptr` is a correct pointer that can be destroyed by the key destructor.
self.os.set(ptr as *mut u8); // * the value is stored under the key that it contains.
} let old = unsafe {
let old = get(key) as *mut Value<T>;
set(key, ptr as *mut u8);
old
};
if !old.is_null() { if !old.is_null() {
// If the variable was recursively initialized, drop the old value. // If the variable was recursively initialized, drop the old value.
// SAFETY: We cannot be inside a `LocalKey::with` scope, as the // SAFETY: We cannot be inside a `LocalKey::with` scope, as the
@ -123,8 +133,10 @@ unsafe extern "C" fn destroy_value<T: 'static>(ptr: *mut u8) {
abort_on_dtor_unwind(|| { abort_on_dtor_unwind(|| {
let ptr = unsafe { Box::from_raw(ptr as *mut Value<T>) }; let ptr = unsafe { Box::from_raw(ptr as *mut Value<T>) };
let key = ptr.key; let key = ptr.key;
unsafe { key.os.set(ptr::without_provenance_mut(1)) }; // SAFETY: `key` is the TLS key `ptr` was stored under.
unsafe { set(key, ptr::without_provenance_mut(1)) };
drop(ptr); drop(ptr);
unsafe { key.os.set(ptr::null_mut()) }; // SAFETY: `key` is the TLS key `ptr` was stored under.
unsafe { set(key, ptr::null_mut()) };
}); });
} }