Auto merge of #2503 - RalfJung:tls-dtor-order, r=RalfJung

notes on TLS dtor order Fixes https://github.com/rust-lang/miri/issues/2486
2022-08-22 16:21:11 +00:00 · 2022-08-22 16:21:11 +00:00 · e0f0e1fce7
commit e0f0e1fce7
parent a109994f22 afacf62cf0
6 changed files with 24 additions and 204 deletions
--- a/src/shims/tls.rs
+++ b/src/shims/tls.rs
@ -165,8 +165,8 @@ impl<'tcx> TlsData<'tcx> {
    /// and the thread has a non-NULL value associated with that key,
    /// the value of the key is set to NULL, and then the function pointed
    /// to is called with the previously associated value as its sole argument.
-    /// The order of destructor calls is unspecified if more than one destructor
-    /// exists for a thread when it exits.
+    /// **The order of destructor calls is unspecified if more than one destructor
+    /// exists for a thread when it exits.**
    ///
    /// If, after all the destructors have been called for all non-NULL values
    /// with associated destructors, there are still some non-NULL values with
@ -188,6 +188,13 @@ impl<'tcx> TlsData<'tcx> {
            Some(key) => Excluded(key),
            None => Unbounded,
        };
+        // We interpret the documentaion above (taken from POSIX) as saying that we need to iterate
+        // over all keys and run each destructor at least once before running any destructor a 2nd
+        // time. That's why we have `key` to indicate how far we got in the current iteration. If we
+        // return `None`, `schedule_next_pthread_tls_dtor` will re-try with `ket` set to `None` to
+        // start the next round.
+        // TODO: In the future, we might consider randomizing destructor order, but we still have to
+        // uphold this requirement.
        for (&key, TlsEntry { data, dtor }) in thread_local.range_mut((start, Unbounded)) {
            match data.entry(thread_id) {
                BTreeEntry::Occupied(entry) => {
--- a/tests/pass/concurrency/tls_lib_drop.rs
+++ b/tests/pass/concurrency/tls_lib_drop.rs
@ -1,5 +1,3 @@
-//@ignore-target-windows: TLS destructor order is different on Windows.
-
 use std::cell::RefCell;
 use std::thread;

@ -24,14 +22,18 @@ thread_local! {
 /// Check that destructors of the library thread locals are executed immediately
 /// after a thread terminates.
 fn check_destructors() {
+    // We use the same value for both of them, since destructor order differs between Miri on Linux
+    // (which uses `register_dtor_fallback`, in the end using a single pthread_key to manage a
+    // thread-local linked list of dtors to call), real Linux rustc (which uses
+    // `__cxa_thread_atexit_impl`), and Miri on Windows.
    thread::spawn(|| {
        A.with(|f| {
            assert_eq!(*f.value.borrow(), 0);
-            *f.value.borrow_mut() = 5;
+            *f.value.borrow_mut() = 8;
        });
        A_CONST.with(|f| {
            assert_eq!(*f.value.borrow(), 10);
-            *f.value.borrow_mut() = 15;
+            *f.value.borrow_mut() = 8;
        });
    })
    .join()
--- a/tests/pass/concurrency/tls_lib_drop.stdout
+++ b/tests/pass/concurrency/tls_lib_drop.stdout
@ -1,5 +1,5 @@
-Dropping: 5 (should be before 'Continue main 1').
-Dropping: 15 (should be before 'Continue main 1').
+Dropping: 8 (should be before 'Continue main 1').
+Dropping: 8 (should be before 'Continue main 1').
 Continue main 1.
 Joining: 7 (should be before 'Continue main 2').
 Continue main 2.
--- a/tests/pass/concurrency/tls_lib_drop_windows.rs
+++ b/tests/pass/concurrency/tls_lib_drop_windows.rs
@ -1,191 +0,0 @@
-//@only-target-windows: TLS destructor order is different on Windows.
-
-use std::cell::RefCell;
-use std::thread;
-
-struct TestCell {
-    value: RefCell<u8>,
-}
-
-impl Drop for TestCell {
-    fn drop(&mut self) {
-        for _ in 0..10 {
-            thread::yield_now();
-        }
-        println!("Dropping: {} (should be before 'Continue main 1').", *self.value.borrow())
-    }
-}
-
-thread_local! {
-    static A: TestCell = TestCell { value: RefCell::new(0) };
-    static A_CONST: TestCell = const { TestCell { value: RefCell::new(10) } };
-}
-
-/// Check that destructors of the library thread locals are executed immediately
-/// after a thread terminates.
-fn check_destructors() {
-    thread::spawn(|| {
-        A.with(|f| {
-            assert_eq!(*f.value.borrow(), 0);
-            *f.value.borrow_mut() = 5;
-        });
-        A_CONST.with(|f| {
-            assert_eq!(*f.value.borrow(), 10);
-            *f.value.borrow_mut() = 15;
-        });
-    })
-    .join()
-    .unwrap();
-    println!("Continue main 1.")
-}
-
-struct JoinCell {
-    value: RefCell<Option<thread::JoinHandle<u8>>>,
-}
-
-impl Drop for JoinCell {
-    fn drop(&mut self) {
-        for _ in 0..10 {
-            thread::yield_now();
-        }
-        let join_handle = self.value.borrow_mut().take().unwrap();
-        println!("Joining: {} (should be before 'Continue main 2').", join_handle.join().unwrap());
-    }
-}
-
-thread_local! {
-    static B: JoinCell = JoinCell { value: RefCell::new(None) };
-}
-
-/// Check that the destructor can be blocked joining another thread.
-fn check_blocking() {
-    thread::spawn(|| {
-        B.with(|f| {
-            assert!(f.value.borrow().is_none());
-            let handle = thread::spawn(|| 7);
-            *f.value.borrow_mut() = Some(handle);
-        });
-    })
-    .join()
-    .unwrap();
-    println!("Continue main 2.");
-    // Preempt the main thread so that the destructor gets executed and can join
-    // the thread.
-    thread::yield_now();
-    thread::yield_now();
-}
-
-// This test tests that TLS destructors have run before the thread joins. The
-// test has no false positives (meaning: if the test fails, there's actually
-// an ordering problem). It may have false negatives, where the test passes but
-// join is not guaranteed to be after the TLS destructors. However, false
-// negatives should be exceedingly rare due to judicious use of
-// thread::yield_now and running the test several times.
-fn join_orders_after_tls_destructors() {
-    use std::sync::atomic::{AtomicU8, Ordering};
-
-    // We emulate a synchronous MPSC rendezvous channel using only atomics and
-    // thread::yield_now. We can't use std::mpsc as the implementation itself
-    // may rely on thread locals.
-    //
-    // The basic state machine for an SPSC rendezvous channel is:
-    //           FRESH -> THREAD1_WAITING -> MAIN_THREAD_RENDEZVOUS
-    // where the first transition is done by the “receiving” thread and the 2nd
-    // transition is done by the “sending” thread.
-    //
-    // We add an additional state `THREAD2_LAUNCHED` between `FRESH` and
-    // `THREAD1_WAITING` to block until all threads are actually running.
-    //
-    // A thread that joins on the “receiving” thread completion should never
-    // observe the channel in the `THREAD1_WAITING` state. If this does occur,
-    // we switch to the “poison” state `THREAD2_JOINED` and panic all around.
-    // (This is equivalent to “sending” from an alternate producer thread.)
-    const FRESH: u8 = 0;
-    const THREAD2_LAUNCHED: u8 = 1;
-    const THREAD1_WAITING: u8 = 2;
-    const MAIN_THREAD_RENDEZVOUS: u8 = 3;
-    const THREAD2_JOINED: u8 = 4;
-    static SYNC_STATE: AtomicU8 = AtomicU8::new(FRESH);
-
-    for _ in 0..10 {
-        SYNC_STATE.store(FRESH, Ordering::SeqCst);
-
-        let jh = thread::Builder::new()
-            .name("thread1".into())
-            .spawn(move || {
-                struct TlDrop;
-
-                impl Drop for TlDrop {
-                    fn drop(&mut self) {
-                        let mut sync_state = SYNC_STATE.swap(THREAD1_WAITING, Ordering::SeqCst);
-                        loop {
-                            match sync_state {
-                                THREAD2_LAUNCHED | THREAD1_WAITING => thread::yield_now(),
-                                MAIN_THREAD_RENDEZVOUS => break,
-                                THREAD2_JOINED =>
-                                    panic!(
-                                        "Thread 1 still running after thread 2 joined on thread 1"
-                                    ),
-                                v => unreachable!("sync state: {}", v),
-                            }
-                            sync_state = SYNC_STATE.load(Ordering::SeqCst);
-                        }
-                    }
-                }
-
-                thread_local! {
-                    static TL_DROP: TlDrop = TlDrop;
-                }
-
-                TL_DROP.with(|_| {});
-
-                loop {
-                    match SYNC_STATE.load(Ordering::SeqCst) {
-                        FRESH => thread::yield_now(),
-                        THREAD2_LAUNCHED => break,
-                        v => unreachable!("sync state: {}", v),
-                    }
-                }
-            })
-            .unwrap();
-
-        let jh2 = thread::Builder::new()
-            .name("thread2".into())
-            .spawn(move || {
-                assert_eq!(SYNC_STATE.swap(THREAD2_LAUNCHED, Ordering::SeqCst), FRESH);
-                jh.join().unwrap();
-                match SYNC_STATE.swap(THREAD2_JOINED, Ordering::SeqCst) {
-                    MAIN_THREAD_RENDEZVOUS => return,
-                    THREAD2_LAUNCHED | THREAD1_WAITING => {
-                        panic!("Thread 2 running after thread 1 join before main thread rendezvous")
-                    }
-                    v => unreachable!("sync state: {:?}", v),
-                }
-            })
-            .unwrap();
-
-        loop {
-            match SYNC_STATE.compare_exchange(
-                THREAD1_WAITING,
-                MAIN_THREAD_RENDEZVOUS,
-                Ordering::SeqCst,
-                Ordering::SeqCst,
-            ) {
-                Ok(_) => break,
-                Err(FRESH) => thread::yield_now(),
-                Err(THREAD2_LAUNCHED) => thread::yield_now(),
-                Err(THREAD2_JOINED) => {
-                    panic!("Main thread rendezvous after thread 2 joined thread 1")
-                }
-                v => unreachable!("sync state: {:?}", v),
-            }
-        }
-        jh2.join().unwrap();
-    }
-}
-
-fn main() {
-    check_destructors();
-    check_blocking();
-    join_orders_after_tls_destructors();
-}
--- a/tests/pass/concurrency/tls_lib_drop_windows.stdout
+++ b/tests/pass/concurrency/tls_lib_drop_windows.stdout
@ -1,5 +0,0 @@
-Dropping: 15 (should be before 'Continue main 1').
-Dropping: 5 (should be before 'Continue main 1').
-Continue main 1.
-Joining: 7 (should be before 'Continue main 2').
-Continue main 2.
--- a/tests/pass/concurrency/tls_pthread_drop_order.rs
+++ b/tests/pass/concurrency/tls_pthread_drop_order.rs
@ -1,4 +1,9 @@
 //@ignore-target-windows: No libc on Windows
+//! Test that pthread_key destructors are run in the right order.
+//! Note that these are *not* used by actual `thread_local!` on Linux! Those use
+//! `thread_local_dtor::register_dtor` from the stdlib instead. In Miri this hits the fallback path
+//! in `register_dtor_fallback`, which uses a *single* pthread_key to manage a thread-local list of
+//! dtors to call.

 use std::mem;
 use std::ptr;
@ -44,6 +49,8 @@ unsafe extern "C" fn dtor(ptr: *mut u64) {
    // If the record is wrong, the cannary will never get cleared, leading to a leak -> test fails.
    // If the record is incomplete (i.e., more dtor calls happen), the check at the beginning of this function will fail -> test fails.
    // The correct sequence is: First key 0, then key 1, then key 0.
+    // Note that this relies on dtor order, which is not specified by POSIX, but seems to be
+    // consistent between Miri and Linux currently (as of Aug 2022).
    if RECORD == 0_1_0 {
        drop(Box::from_raw(CANNARY));
        CANNARY = ptr::null_mut();