From b6bcbf76fdd4da771977c1591eaec3faad05a9f3 Mon Sep 17 00:00:00 2001 From: Andy Wang Date: Sat, 11 Jun 2022 20:45:45 +0100 Subject: [PATCH 1/4] Prevent futex_wait from reading outdated value --- src/shims/unix/linux/sync.rs | 26 ++++++++++------ tests/pass/concurrency/linux-futex.rs | 44 +++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 10 deletions(-) diff --git a/src/shims/unix/linux/sync.rs b/src/shims/unix/linux/sync.rs index 72898baa4b0..42494da37b7 100644 --- a/src/shims/unix/linux/sync.rs +++ b/src/shims/unix/linux/sync.rs @@ -126,24 +126,26 @@ pub fn futex<'tcx>( Align::from_bytes(4).unwrap(), CheckInAllocMsg::MemoryAccessTest, )?; + // This SeqCst fence is paired with the SeqCst fence in futex_wake. + // Together, they make sure that our read on addr observes the latest + // value in modification order. + // + // If there is another thread which has changed the value of + // addr (to something other than expected) and called futex_wake + // before we get to run, then we must not block our thread + // because there'll be no one to wake us. We must see + // the value changed by the other thread and return without + // actually waiting. + this.atomic_fence(&[], AtomicFenceOp::SeqCst)?; // Read an `i32` through the pointer, regardless of any wrapper types. // It's not uncommon for `addr` to be passed as another type than `*mut i32`, such as `*const AtomicI32`. // FIXME: this fails if `addr` is not a pointer type. - // The atomic ordering for futex(https://man7.org/linux/man-pages/man2/futex.2.html): - // "The load of the value of the futex word is an - // atomic memory access (i.e., using atomic machine instructions - // of the respective architecture). This load, the comparison - // with the expected value, and starting to sleep are performed - // atomically and totally ordered with respect to other futex - // operations on the same futex word." - // SeqCst is total order over all operations. - // FIXME: check if this should be changed when weak memory orders are added. let futex_val = this .read_scalar_at_offset_atomic( &addr.into(), 0, this.machine.layouts.i32, - AtomicReadOp::SeqCst, + AtomicReadOp::Relaxed, )? .to_i32()?; if val == futex_val { @@ -203,6 +205,10 @@ pub fn futex<'tcx>( this.write_scalar(Scalar::from_machine_isize(-1, this), dest)?; return Ok(()); } + // Together with the SeqCst fence in futex_wait, this makes sure that futex_wait + // will see the latest value on addr which could be changed by our caller + // before doing the syscall. + this.atomic_fence(&[], AtomicFenceOp::SeqCst)?; let mut n = 0; for _ in 0..val { if let Some(thread) = this.futex_wake(addr_usize, bitset) { diff --git a/tests/pass/concurrency/linux-futex.rs b/tests/pass/concurrency/linux-futex.rs index b2791a42856..56aba60d534 100644 --- a/tests/pass/concurrency/linux-futex.rs +++ b/tests/pass/concurrency/linux-futex.rs @@ -6,6 +6,8 @@ extern crate libc; use std::mem::MaybeUninit; use std::ptr; +use std::sync::atomic::AtomicI32; +use std::sync::atomic::Ordering; use std::thread; use std::time::{Duration, Instant}; @@ -206,6 +208,47 @@ fn wait_wake_bitset() { t.join().unwrap(); } +const FREE: i32 = 0; +const HELD: i32 = 1; +fn concurrent_wait_wake() { + static FUTEX: AtomicI32 = AtomicI32::new(0); + for _ in 0..100 { + // Suppose the main thread is holding a lock implemented using futex... + FUTEX.store(HELD, Ordering::Relaxed); + + let t = thread::spawn(move || { + // If this syscall runs first, then we'll be woken up by + // the main thread's FUTEX_WAKE, and all is fine. + // + // If this sycall runs after the main thread's store + // and FUTEX_WAKE, the syscall must observe that + // the FUTEX is FREE != HELD and return without waiting + // or we'll deadlock. + unsafe { + libc::syscall( + libc::SYS_futex, + &FUTEX as *const AtomicI32, + libc::FUTEX_WAIT, + HELD, + ptr::null::(), + ); + } + }); + + FUTEX.store(FREE, Ordering::Relaxed); + unsafe { + libc::syscall( + libc::SYS_futex, + &FUTEX as *const AtomicI32, + libc::FUTEX_WAKE, + 1, + ); + } + + t.join().unwrap(); + } +} + fn main() { wake_nobody(); wake_dangling(); @@ -214,4 +257,5 @@ fn main() { wait_absolute_timeout(); wait_wake(); wait_wake_bitset(); + concurrent_wait_wake(); } From 56a4c132b6c8661010b5a176d334fd2a0db7cf53 Mon Sep 17 00:00:00 2001 From: Andy Wang Date: Mon, 13 Jun 2022 18:24:19 +0100 Subject: [PATCH 2/4] Reduce the number of iterations --- tests/pass/concurrency/linux-futex.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pass/concurrency/linux-futex.rs b/tests/pass/concurrency/linux-futex.rs index 56aba60d534..0d30ddff480 100644 --- a/tests/pass/concurrency/linux-futex.rs +++ b/tests/pass/concurrency/linux-futex.rs @@ -212,7 +212,7 @@ const FREE: i32 = 0; const HELD: i32 = 1; fn concurrent_wait_wake() { static FUTEX: AtomicI32 = AtomicI32::new(0); - for _ in 0..100 { + for _ in 0..20 { // Suppose the main thread is holding a lock implemented using futex... FUTEX.store(HELD, Ordering::Relaxed); From 807a19a50a663c75874bb79afeecd4b12de583e2 Mon Sep 17 00:00:00 2001 From: Andy Wang Date: Wed, 15 Jun 2022 01:44:32 +0100 Subject: [PATCH 3/4] Elaborate correctness comments --- src/shims/unix/linux/sync.rs | 51 +++++++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/src/shims/unix/linux/sync.rs b/src/shims/unix/linux/sync.rs index 42494da37b7..37d694a32f8 100644 --- a/src/shims/unix/linux/sync.rs +++ b/src/shims/unix/linux/sync.rs @@ -126,16 +126,47 @@ pub fn futex<'tcx>( Align::from_bytes(4).unwrap(), CheckInAllocMsg::MemoryAccessTest, )?; - // This SeqCst fence is paired with the SeqCst fence in futex_wake. - // Together, they make sure that our read on addr observes the latest - // value in modification order. + // There may be a concurrent thread changing the value of addr + // and then invoking the FUTEX_WAKE syscall. It is critical that the + // effects of this and the other thread are correctly observed, + // otherwise we will deadlock. // - // If there is another thread which has changed the value of - // addr (to something other than expected) and called futex_wake - // before we get to run, then we must not block our thread - // because there'll be no one to wake us. We must see - // the value changed by the other thread and return without - // actually waiting. + // There are two scenarios to consider: + // 1. If we (FUTEX_WAIT) executes first, we'll push ourselves into + // the waiters queue and go to sleep. They (addr write & FUTEX_WAKE) + // will see us in the queue and wake us up. + // 2. If they (addr write & FUTEX_WAKE) executes first, we must observe + // addr's new value. If we see an outdated value that happens to equal + // the expected val, then we'll put ourselves to sleep with no one to wake us + // up, so we end up with a deadlock. This is prevented by having a SeqCst + // fence inside FUTEX_WAKE syscall, and another SeqCst fence + // below, the atomic read on addr after the SeqCst fence is guaranteed + // not to see any value older than the addr write immediately before + // calling FUTEX_WAKE. We'll see futex_val != val and return without + // sleeping. + // + // Note that the fences do not create any happens-before relationship. + // The read sees the write immediately before the fence not because + // one happens after the other, but is instead due to a guarantee unique + // to SeqCst fences that restricts what an atomic read placed AFTER the + // fence can see. The read still has to be atomic, otherwise it's a data + // race. This guarantee cannot be achieved with acquire-release fences + // since they only talk about reads placed BEFORE a fence - and places + // no restrictions on what the read itself can see, only that there is + // a happens-before between the fences IF the read happens to see the + // right value. This is useless to us, since we need the read itself + // to see an up-to-date value. + // + // It is also critical that the fence, the atomic load, and the comparison + // altogether happen atomically. If the other thread's fence in FUTEX_WAKE + // gets interleaved after our fence, then we lose the guarantee on the + // atomic load being up-to-date; if the other thread's write on addr and FUTEX_WAKE + // call are interleaved after the load but before the comparison, then we get a TOCTOU + // race condition, and go to sleep thinking the other thread will wake us up, + // even though they have already finished. + // + // Thankfully, preemptions cannot happen inside a Miri shim, so we do not need to + // do anything special to guarantee fence-load-comparison atomicity. this.atomic_fence(&[], AtomicFenceOp::SeqCst)?; // Read an `i32` through the pointer, regardless of any wrapper types. // It's not uncommon for `addr` to be passed as another type than `*mut i32`, such as `*const AtomicI32`. @@ -149,7 +180,7 @@ pub fn futex<'tcx>( )? .to_i32()?; if val == futex_val { - // The value still matches, so we block the trait make it wait for FUTEX_WAKE. + // The value still matches, so we block the thread make it wait for FUTEX_WAKE. this.block_thread(thread); this.futex_wait(addr_usize, thread, bitset); // Succesfully waking up from FUTEX_WAIT always returns zero. From 737a5b3b9814e919d30f60323a7679697a34b153 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 18 Jun 2022 07:59:46 -0700 Subject: [PATCH 4/4] tweak correctness comment --- src/shims/unix/linux/sync.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/shims/unix/linux/sync.rs b/src/shims/unix/linux/sync.rs index 37d694a32f8..0fdbde8d677 100644 --- a/src/shims/unix/linux/sync.rs +++ b/src/shims/unix/linux/sync.rs @@ -132,10 +132,10 @@ pub fn futex<'tcx>( // otherwise we will deadlock. // // There are two scenarios to consider: - // 1. If we (FUTEX_WAIT) executes first, we'll push ourselves into + // 1. If we (FUTEX_WAIT) execute first, we'll push ourselves into // the waiters queue and go to sleep. They (addr write & FUTEX_WAKE) // will see us in the queue and wake us up. - // 2. If they (addr write & FUTEX_WAKE) executes first, we must observe + // 2. If they (addr write & FUTEX_WAKE) execute first, we must observe // addr's new value. If we see an outdated value that happens to equal // the expected val, then we'll put ourselves to sleep with no one to wake us // up, so we end up with a deadlock. This is prevented by having a SeqCst @@ -157,7 +157,9 @@ pub fn futex<'tcx>( // right value. This is useless to us, since we need the read itself // to see an up-to-date value. // - // It is also critical that the fence, the atomic load, and the comparison + // The above case distinction is valid since both FUTEX_WAIT and FUTEX_WAKE + // contain a SeqCst fence, therefore inducting a total order between the operations. + // It is also critical that the fence, the atomic load, and the comparison in FUTEX_WAIT // altogether happen atomically. If the other thread's fence in FUTEX_WAKE // gets interleaved after our fence, then we lose the guarantee on the // atomic load being up-to-date; if the other thread's write on addr and FUTEX_WAKE