Auto merge of #60341 - mtak-:macos-tlv-workaround, r=alexcrichton

macos tlv workaround

fixes: #60141

Includes:
* remove dead code: `requires_move_before_drop`. This hasn't been needed for a while now (oops I should have removed it in #57655)
* redox had a copy of `fast::Key` (not sure why?). That has been removed.
* Perform a `read_volatile` on OSX to reduce `tlv_get_addr` calls per `__getit` from (4-2 depending on context) to 1.

`tlv_get_addr` is relatively expensive (~1.5ns on my machine).

Previously, in contexts where `__getit` was inlined, 4 calls to `tlv_get_addr` were performed per lookup. For some reason when `__getit` is not inlined this is reduced to 2x - and performance improves to match.

After this PR, I have only ever seen 1x call to `tlv_get_addr` per `__getit`, and macos now benefits from situations where `__getit` is inlined.

I'm not sure if the `read_volatile(&&__KEY)` trick is working around an LLVM bug, or a rustc bug, or neither.

r? @alexcrichton
This commit is contained in:
bors 2019-06-20 02:36:49 +00:00
commit 3c805ce183
7 changed files with 196 additions and 237 deletions

View File

@ -1,111 +1,4 @@
#![cfg(target_thread_local)]
#![unstable(feature = "thread_local_internals", issue = "0")]
use crate::cell::{Cell, UnsafeCell};
use crate::mem;
use crate::ptr;
pub struct Key<T> {
inner: UnsafeCell<Option<T>>,
// Metadata to keep track of the state of the destructor. Remember that
// these variables are thread-local, not global.
dtor_registered: Cell<bool>,
dtor_running: Cell<bool>,
}
unsafe impl<T> Sync for Key<T> { }
impl<T> Key<T> {
pub const fn new() -> Key<T> {
Key {
inner: UnsafeCell::new(None),
dtor_registered: Cell::new(false),
dtor_running: Cell::new(false)
}
}
pub fn get(&'static self) -> Option<&'static UnsafeCell<Option<T>>> {
unsafe {
if mem::needs_drop::<T>() && self.dtor_running.get() {
return None
}
self.register_dtor();
}
Some(&self.inner)
}
unsafe fn register_dtor(&self) {
if !mem::needs_drop::<T>() || self.dtor_registered.get() {
return
}
register_dtor(self as *const _ as *mut u8,
destroy_value::<T>);
self.dtor_registered.set(true);
}
}
pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern fn(*mut u8)) {
// The fallback implementation uses a vanilla OS-based TLS key to track
// the list of destructors that need to be run for this thread. The key
// then has its own destructor which runs all the other destructors.
//
// The destructor for DTORS is a little special in that it has a `while`
// loop to continuously drain the list of registered destructors. It
// *should* be the case that this loop always terminates because we
// provide the guarantee that a TLS key cannot be set after it is
// flagged for destruction.
use crate::sys_common::thread_local as os;
static DTORS: os::StaticKey = os::StaticKey::new(Some(run_dtors));
type List = Vec<(*mut u8, unsafe extern fn(*mut u8))>;
if DTORS.get().is_null() {
let v: Box<List> = box Vec::new();
DTORS.set(Box::into_raw(v) as *mut u8);
}
let list: &mut List = &mut *(DTORS.get() as *mut List);
list.push((t, dtor));
unsafe extern fn run_dtors(mut ptr: *mut u8) {
while !ptr.is_null() {
let list: Box<List> = Box::from_raw(ptr as *mut List);
for (ptr, dtor) in list.into_iter() {
dtor(ptr);
}
ptr = DTORS.get();
DTORS.set(ptr::null_mut());
}
}
}
pub unsafe extern fn destroy_value<T>(ptr: *mut u8) {
let ptr = ptr as *mut Key<T>;
// Right before we run the user destructor be sure to flag the
// destructor as running for this thread so calls to `get` will return
// `None`.
(*ptr).dtor_running.set(true);
// The macOS implementation of TLS apparently had an odd aspect to it
// where the pointer we have may be overwritten while this destructor
// is running. Specifically if a TLS destructor re-accesses TLS it may
// trigger a re-initialization of all TLS variables, paving over at
// least some destroyed ones with initial values.
//
// This means that if we drop a TLS value in place on macOS that we could
// revert the value to its original state halfway through the
// destructor, which would be bad!
//
// Hence, we use `ptr::read` on macOS (to move to a "safe" location)
// instead of drop_in_place.
if cfg!(target_os = "macos") {
ptr::read((*ptr).inner.get());
} else {
ptr::drop_in_place((*ptr).inner.get());
}
}
pub fn requires_move_before_drop() -> bool {
false
}
pub use crate::sys_common::thread_local::register_dtor_fallback as register_dtor;

View File

@ -82,7 +82,3 @@ fn _tlv_atexit(dtor: unsafe extern fn(*mut u8),
}
}
}
pub fn requires_move_before_drop() -> bool {
false
}

View File

@ -2,7 +2,3 @@
#![cfg(target_thread_local)]
pub use crate::sys_common::thread_local::register_dtor_fallback as register_dtor;
pub fn requires_move_before_drop() -> bool {
false
}

View File

@ -2,10 +2,7 @@
#![unstable(feature = "thread_local_internals", issue = "0")]
use crate::cell::UnsafeCell;
use crate::fmt;
use crate::hint;
use crate::mem;
/// A thread local storage key which owns its contents.
///
@ -92,10 +89,7 @@ pub struct LocalKey<T: 'static> {
// trivially devirtualizable by LLVM because the value of `inner` never
// changes and the constant should be readonly within a crate. This mainly
// only runs into problems when TLS statics are exported across crates.
inner: unsafe fn() -> Option<&'static UnsafeCell<Option<T>>>,
// initialization routine to invoke to create a value
init: fn() -> T,
inner: unsafe fn() -> Option<&'static T>,
}
#[stable(feature = "std_debug", since = "1.16.0")]
@ -159,10 +153,7 @@ macro_rules! __thread_local_inner {
#[inline]
fn __init() -> $t { $init }
unsafe fn __getit() -> $crate::option::Option<
&'static $crate::cell::UnsafeCell<
$crate::option::Option<$t>>>
{
unsafe fn __getit() -> $crate::option::Option<&'static $t> {
#[cfg(all(target_arch = "wasm32", not(target_feature = "atomics")))]
static __KEY: $crate::thread::__StaticLocalKeyInner<$t> =
$crate::thread::__StaticLocalKeyInner::new();
@ -182,11 +173,11 @@ unsafe fn __getit() -> $crate::option::Option<
static __KEY: $crate::thread::__OsLocalKeyInner<$t> =
$crate::thread::__OsLocalKeyInner::new();
__KEY.get()
__KEY.get(__init)
}
unsafe {
$crate::thread::LocalKey::new(__getit, __init)
$crate::thread::LocalKey::new(__getit)
}
}
};
@ -221,11 +212,9 @@ impl<T: 'static> LocalKey<T> {
#[unstable(feature = "thread_local_internals",
reason = "recently added to create a key",
issue = "0")]
pub const unsafe fn new(inner: unsafe fn() -> Option<&'static UnsafeCell<Option<T>>>,
init: fn() -> T) -> LocalKey<T> {
pub const unsafe fn new(inner: unsafe fn() -> Option<&'static T>) -> LocalKey<T> {
LocalKey {
inner,
init,
}
}
@ -246,37 +235,6 @@ pub fn with<F, R>(&'static self, f: F) -> R
after it is destroyed")
}
unsafe fn init(&self, slot: &UnsafeCell<Option<T>>) -> &T {
// Execute the initialization up front, *then* move it into our slot,
// just in case initialization fails.
let value = (self.init)();
let ptr = slot.get();
// note that this can in theory just be `*ptr = Some(value)`, but due to
// the compiler will currently codegen that pattern with something like:
//
// ptr::drop_in_place(ptr)
// ptr::write(ptr, Some(value))
//
// Due to this pattern it's possible for the destructor of the value in
// `ptr` (e.g., if this is being recursively initialized) to re-access
// TLS, in which case there will be a `&` and `&mut` pointer to the same
// value (an aliasing violation). To avoid setting the "I'm running a
// destructor" flag we just use `mem::replace` which should sequence the
// operations a little differently and make this safe to call.
mem::replace(&mut *ptr, Some(value));
// After storing `Some` we want to get a reference to the contents of
// what we just stored. While we could use `unwrap` here and it should
// always work it empirically doesn't seem to always get optimized away,
// which means that using something like `try_with` can pull in
// panicking code and cause a large size bloat.
match *ptr {
Some(ref x) => x,
None => hint::unreachable_unchecked(),
}
}
/// Acquires a reference to the value in this TLS key.
///
/// This will lazily initialize the value if this thread has not referenced
@ -293,13 +251,68 @@ pub fn try_with<F, R>(&'static self, f: F) -> Result<R, AccessError>
F: FnOnce(&T) -> R,
{
unsafe {
let slot = (self.inner)().ok_or(AccessError {
let thread_local = (self.inner)().ok_or(AccessError {
_private: (),
})?;
Ok(f(match *slot.get() {
Some(ref inner) => inner,
None => self.init(slot),
}))
Ok(f(thread_local))
}
}
}
mod lazy {
use crate::cell::UnsafeCell;
use crate::mem;
use crate::hint;
pub struct LazyKeyInner<T> {
inner: UnsafeCell<Option<T>>,
}
impl<T> LazyKeyInner<T> {
pub const fn new() -> LazyKeyInner<T> {
LazyKeyInner {
inner: UnsafeCell::new(None),
}
}
pub unsafe fn get(&self) -> Option<&'static T> {
(*self.inner.get()).as_ref()
}
pub unsafe fn initialize<F: FnOnce() -> T>(&self, init: F) -> &'static T {
// Execute the initialization up front, *then* move it into our slot,
// just in case initialization fails.
let value = init();
let ptr = self.inner.get();
// note that this can in theory just be `*ptr = Some(value)`, but due to
// the compiler will currently codegen that pattern with something like:
//
// ptr::drop_in_place(ptr)
// ptr::write(ptr, Some(value))
//
// Due to this pattern it's possible for the destructor of the value in
// `ptr` (e.g., if this is being recursively initialized) to re-access
// TLS, in which case there will be a `&` and `&mut` pointer to the same
// value (an aliasing violation). To avoid setting the "I'm running a
// destructor" flag we just use `mem::replace` which should sequence the
// operations a little differently and make this safe to call.
mem::replace(&mut *ptr, Some(value));
// After storing `Some` we want to get a reference to the contents of
// what we just stored. While we could use `unwrap` here and it should
// always work it empirically doesn't seem to always get optimized away,
// which means that using something like `try_with` can pull in
// panicking code and cause a large size bloat.
match *ptr {
Some(ref x) => x,
None => hint::unreachable_unchecked(),
}
}
#[allow(unused)]
pub unsafe fn take(&mut self) -> Option<T> {
(*self.inner.get()).take()
}
}
}
@ -309,11 +322,11 @@ pub fn try_with<F, R>(&'static self, f: F) -> Result<R, AccessError>
#[doc(hidden)]
#[cfg(all(target_arch = "wasm32", not(target_feature = "atomics")))]
pub mod statik {
use crate::cell::UnsafeCell;
use super::lazy::LazyKeyInner;
use crate::fmt;
pub struct Key<T> {
inner: UnsafeCell<Option<T>>,
inner: LazyKeyInner<T>,
}
unsafe impl<T> Sync for Key<T> { }
@ -327,12 +340,16 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
impl<T> Key<T> {
pub const fn new() -> Key<T> {
Key {
inner: UnsafeCell::new(None),
inner: LazyKeyInner::new(),
}
}
pub unsafe fn get(&self) -> Option<&'static UnsafeCell<Option<T>>> {
Some(&*(&self.inner as *const _))
pub unsafe fn get(&self, init: fn() -> T) -> Option<&'static T> {
let value = match self.inner.get() {
Some(ref value) => value,
None => self.inner.initialize(init),
};
Some(value)
}
}
}
@ -340,19 +357,38 @@ pub unsafe fn get(&self) -> Option<&'static UnsafeCell<Option<T>>> {
#[doc(hidden)]
#[cfg(target_thread_local)]
pub mod fast {
use crate::cell::{Cell, UnsafeCell};
use super::lazy::LazyKeyInner;
use crate::cell::Cell;
use crate::fmt;
use crate::mem;
use crate::ptr;
use crate::sys::fast_thread_local::{register_dtor, requires_move_before_drop};
use crate::sys::fast_thread_local::register_dtor;
#[derive(Copy, Clone)]
enum DtorState {
Unregistered,
Registered,
RunningOrHasRun,
}
// This data structure has been carefully constructed so that the fast path
// only contains one branch on x86. That optimization is necessary to avoid
// duplicated tls lookups on OSX.
//
// LLVM issue: https://bugs.llvm.org/show_bug.cgi?id=41722
pub struct Key<T> {
inner: UnsafeCell<Option<T>>,
// If `LazyKeyInner::get` returns `None`, that indicates either:
// * The value has never been initialized
// * The value is being recursively initialized
// * The value has already been destroyed or is being destroyed
// To determine which kind of `None`, check `dtor_state`.
//
// This is very optimizer friendly for the fast path - initialized but
// not yet dropped.
inner: LazyKeyInner<T>,
// Metadata to keep track of the state of the destructor. Remember that
// these variables are thread-local, not global.
dtor_registered: Cell<bool>,
dtor_running: Cell<bool>,
// this variable is thread-local, not global.
dtor_state: Cell<DtorState>,
}
impl<T> fmt::Debug for Key<T> {
@ -364,54 +400,75 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
impl<T> Key<T> {
pub const fn new() -> Key<T> {
Key {
inner: UnsafeCell::new(None),
dtor_registered: Cell::new(false),
dtor_running: Cell::new(false)
inner: LazyKeyInner::new(),
dtor_state: Cell::new(DtorState::Unregistered),
}
}
pub unsafe fn get(&self) -> Option<&'static UnsafeCell<Option<T>>> {
if mem::needs_drop::<T>() && self.dtor_running.get() {
return None
pub unsafe fn get<F: FnOnce() -> T>(&self, init: F) -> Option<&'static T> {
match self.inner.get() {
Some(val) => Some(val),
None => self.try_initialize(init),
}
self.register_dtor();
Some(&*(&self.inner as *const _))
}
unsafe fn register_dtor(&self) {
if !mem::needs_drop::<T>() || self.dtor_registered.get() {
return
// `try_initialize` is only called once per fast thread local variable,
// except in corner cases where thread_local dtors reference other
// thread_local's, or it is being recursively initialized.
//
// Macos: Inlining this function can cause two `tlv_get_addr` calls to
// be performed for every call to `Key::get`. The #[cold] hint makes
// that less likely.
// LLVM issue: https://bugs.llvm.org/show_bug.cgi?id=41722
#[cold]
unsafe fn try_initialize<F: FnOnce() -> T>(&self, init: F) -> Option<&'static T> {
if !mem::needs_drop::<T>() || self.try_register_dtor() {
Some(self.inner.initialize(init))
} else {
None
}
}
register_dtor(self as *const _ as *mut u8,
destroy_value::<T>);
self.dtor_registered.set(true);
// `try_register_dtor` is only called once per fast thread local
// variable, except in corner cases where thread_local dtors reference
// other thread_local's, or it is being recursively initialized.
unsafe fn try_register_dtor(&self) -> bool {
match self.dtor_state.get() {
DtorState::Unregistered => {
// dtor registration happens before initialization.
register_dtor(self as *const _ as *mut u8,
destroy_value::<T>);
self.dtor_state.set(DtorState::Registered);
true
}
DtorState::Registered => {
// recursively initialized
true
}
DtorState::RunningOrHasRun => {
false
}
}
}
}
unsafe extern fn destroy_value<T>(ptr: *mut u8) {
let ptr = ptr as *mut Key<T>;
// Right before we run the user destructor be sure to flag the
// destructor as running for this thread so calls to `get` will return
// `None`.
(*ptr).dtor_running.set(true);
// Some implementations may require us to move the value before we drop
// it as it could get re-initialized in-place during destruction.
//
// Hence, we use `ptr::read` on those platforms (to move to a "safe"
// location) instead of drop_in_place.
if requires_move_before_drop() {
ptr::read((*ptr).inner.get());
} else {
ptr::drop_in_place((*ptr).inner.get());
}
// Right before we run the user destructor be sure to set the
// `Option<T>` to `None`, and `dtor_state` to `RunningOrHasRun`. This
// causes future calls to `get` to run `try_initialize_drop` again,
// which will now fail, and return `None`.
let value = (*ptr).inner.take();
(*ptr).dtor_state.set(DtorState::RunningOrHasRun);
drop(value);
}
}
#[doc(hidden)]
pub mod os {
use crate::cell::{Cell, UnsafeCell};
use super::lazy::LazyKeyInner;
use crate::cell::Cell;
use crate::fmt;
use crate::marker;
use crate::ptr;
@ -432,8 +489,8 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
unsafe impl<T> Sync for Key<T> { }
struct Value<T: 'static> {
inner: LazyKeyInner<T>,
key: &'static Key<T>,
value: UnsafeCell<Option<T>>,
}
impl<T: 'static> Key<T> {
@ -444,24 +501,43 @@ pub const fn new() -> Key<T> {
}
}
pub unsafe fn get(&'static self) -> Option<&'static UnsafeCell<Option<T>>> {
pub unsafe fn get(&'static self, init: fn() -> T) -> Option<&'static T> {
let ptr = self.os.get() as *mut Value<T>;
if !ptr.is_null() {
if ptr as usize == 1 {
return None
if ptr as usize > 1 {
match (*ptr).inner.get() {
Some(ref value) => return Some(value),
None => {},
}
return Some(&(*ptr).value);
}
self.try_initialize(init)
}
// `try_initialize` is only called once per os thread local variable,
// except in corner cases where thread_local dtors reference other
// thread_local's, or it is being recursively initialized.
unsafe fn try_initialize(&'static self, init: fn() -> T) -> Option<&'static T> {
let ptr = self.os.get() as *mut Value<T>;
if ptr as usize == 1 {
// destructor is running
return None
}
// If the lookup returned null, we haven't initialized our own
// local copy, so do that now.
let ptr: Box<Value<T>> = box Value {
key: self,
value: UnsafeCell::new(None),
let ptr = if ptr.is_null() {
// If the lookup returned null, we haven't initialized our own
// local copy, so do that now.
let ptr: Box<Value<T>> = box Value {
inner: LazyKeyInner::new(),
key: self,
};
let ptr = Box::into_raw(ptr);
self.os.set(ptr as *mut u8);
ptr
} else {
// recursive initialization
ptr
};
let ptr = Box::into_raw(ptr);
self.os.set(ptr as *mut u8);
Some(&(*ptr).value)
Some((*ptr).inner.initialize(init))
}
}

View File

@ -5,7 +5,7 @@
#[cfg(not(target_thread_local))]
struct Key<T> {
_data: std::cell::UnsafeCell<Option<T>>,
_flag: std::cell::Cell<bool>,
_flag: std::cell::Cell<()>,
}
#[cfg(not(target_thread_local))]
@ -13,7 +13,7 @@ impl<T> Key<T> {
const fn new() -> Self {
Key {
_data: std::cell::UnsafeCell::new(None),
_flag: std::cell::Cell::new(false),
_flag: std::cell::Cell::new(()),
}
}
}
@ -23,6 +23,6 @@ const fn new() -> Self {
static __KEY: Key<()> = Key::new();
//~^ ERROR `std::cell::UnsafeCell<std::option::Option<()>>` cannot be shared between threads
//~| ERROR `std::cell::Cell<bool>` cannot be shared between threads safely [E0277]
//~| ERROR cannot be shared between threads safely [E0277]
fn main() {}

View File

@ -13,15 +13,13 @@
static __KEY: std::thread::__OsLocalKeyInner<Foo> =
std::thread::__OsLocalKeyInner::new();
fn __getit() -> std::option::Option<
&'static std::cell::UnsafeCell<
std::option::Option<Foo>>>
fn __getit() -> std::option::Option<&'static Foo>
{
__KEY.get() //~ ERROR call to unsafe function is unsafe
__KEY.get(Default::default) //~ ERROR call to unsafe function is unsafe
}
static FOO: std::thread::LocalKey<Foo> =
std::thread::LocalKey::new(__getit, Default::default);
std::thread::LocalKey::new(__getit);
//~^ ERROR call to unsafe function is unsafe
fn main() {

View File

@ -1,16 +1,16 @@
error[E0133]: call to unsafe function is unsafe and requires unsafe function or block
--> $DIR/issue-43733.rs:20:5
--> $DIR/issue-43733.rs:18:5
|
LL | __KEY.get()
| ^^^^^^^^^^^ call to unsafe function
LL | __KEY.get(Default::default)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^ call to unsafe function
|
= note: consult the function's documentation for information on how to avoid undefined behavior
error[E0133]: call to unsafe function is unsafe and requires unsafe function or block
--> $DIR/issue-43733.rs:24:5
--> $DIR/issue-43733.rs:22:5
|
LL | std::thread::LocalKey::new(__getit, Default::default);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ call to unsafe function
LL | std::thread::LocalKey::new(__getit);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ call to unsafe function
|
= note: consult the function's documentation for information on how to avoid undefined behavior