From 2ab812c18176dece9058d2dc0639a0eeb5f42c7d Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Wed, 23 Oct 2019 09:30:35 +0200 Subject: [PATCH 01/12] Rename state to state_and_queue --- src/libstd/sync/once.rs | 57 +++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/src/libstd/sync/once.rs b/src/libstd/sync/once.rs index e28fbca7fa1c2..277ea954c5626 100644 --- a/src/libstd/sync/once.rs +++ b/src/libstd/sync/once.rs @@ -78,9 +78,9 @@ use crate::thread::{self, Thread}; /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub struct Once { - // This `state` word is actually an encoded version of just a pointer to a - // `Waiter`, so we add the `PhantomData` appropriately. - state: AtomicUsize, + // `state_and_queue` is actually an a pointer to a `Waiter` with extra state + // bits, so we add the `PhantomData` appropriately. + state_and_queue: AtomicUsize, _marker: marker::PhantomData<*mut Waiter>, } @@ -121,8 +121,8 @@ pub struct OnceState { )] pub const ONCE_INIT: Once = Once::new(); -// Four states that a Once can be in, encoded into the lower bits of `state` in -// the Once structure. +// Four states that a Once can be in, encoded into the lower bits of +// `state_and_queue` in the Once structure. const INCOMPLETE: usize = 0x0; const POISONED: usize = 0x1; const RUNNING: usize = 0x2; @@ -151,7 +151,7 @@ impl Once { #[stable(feature = "once_new", since = "1.2.0")] pub const fn new() -> Once { Once { - state: AtomicUsize::new(INCOMPLETE), + state_and_queue: AtomicUsize::new(INCOMPLETE), _marker: marker::PhantomData, } } @@ -330,7 +330,7 @@ impl Once { // operations visible to us, and, this being a fast path, weaker // ordering helps with performance. This `Acquire` synchronizes with // `SeqCst` operations on the slow path. - self.state.load(Ordering::Acquire) == COMPLETE + self.state_and_queue.load(Ordering::Acquire) == COMPLETE } // This is a non-generic function to reduce the monomorphization cost of @@ -352,10 +352,10 @@ impl Once { // This cold path uses SeqCst consistently because the // performance difference really does not matter there, and // SeqCst minimizes the chances of something going wrong. - let mut state = self.state.load(Ordering::SeqCst); + let mut state_and_queue = self.state_and_queue.load(Ordering::SeqCst); 'outer: loop { - match state { + match state_and_queue { // If we're complete, then there's nothing to do, we just // jettison out as we shouldn't run the closure. COMPLETE => return, @@ -372,10 +372,11 @@ impl Once { // bits). POISONED | INCOMPLETE => { - let old = self.state.compare_and_swap(state, RUNNING, - Ordering::SeqCst); - if old != state { - state = old; + let old = self.state_and_queue.compare_and_swap(state_and_queue, + RUNNING, + Ordering::SeqCst); + if old != state_and_queue { + state_and_queue = old; continue } @@ -388,7 +389,7 @@ impl Once { panicked: true, me: self, }; - init(state == POISONED); + init(state_and_queue == POISONED); complete.panicked = false; return } @@ -399,7 +400,7 @@ impl Once { // head of the list and bail out if we ever see a state that's // not RUNNING. _ => { - assert!(state & STATE_MASK == RUNNING); + assert!(state_and_queue & STATE_MASK == RUNNING); let mut node = Waiter { thread: Some(thread::current()), signaled: AtomicBool::new(false), @@ -408,13 +409,13 @@ impl Once { let me = &mut node as *mut Waiter as usize; assert!(me & STATE_MASK == 0); - while state & STATE_MASK == RUNNING { - node.next = (state & !STATE_MASK) as *mut Waiter; - let old = self.state.compare_and_swap(state, - me | RUNNING, - Ordering::SeqCst); - if old != state { - state = old; + while state_and_queue & STATE_MASK == RUNNING { + node.next = (state_and_queue & !STATE_MASK) as *mut Waiter; + let old = self.state_and_queue.compare_and_swap(state_and_queue, + me | RUNNING, + Ordering::SeqCst); + if old != state_and_queue { + state_and_queue = old; continue } @@ -424,7 +425,7 @@ impl Once { while !node.signaled.load(Ordering::SeqCst) { thread::park(); } - state = self.state.load(Ordering::SeqCst); + state_and_queue = self.state_and_queue.load(Ordering::SeqCst); continue 'outer } } @@ -444,19 +445,19 @@ impl Drop for Finish<'_> { fn drop(&mut self) { // Swap out our state with however we finished. We should only ever see // an old state which was RUNNING. - let queue = if self.panicked { - self.me.state.swap(POISONED, Ordering::SeqCst) + let state_and_queue = if self.panicked { + self.me.state_and_queue.swap(POISONED, Ordering::SeqCst) } else { - self.me.state.swap(COMPLETE, Ordering::SeqCst) + self.me.state_and_queue.swap(COMPLETE, Ordering::SeqCst) }; - assert_eq!(queue & STATE_MASK, RUNNING); + assert_eq!(state_and_queue & STATE_MASK, RUNNING); // Decode the RUNNING to a list of waiters, then walk that entire list // and wake them up. Note that it is crucial that after we store `true` // in the node it can be free'd! As a result we load the `thread` to // signal ahead of time and then unpark it after the store. unsafe { - let mut queue = (queue & !STATE_MASK) as *mut Waiter; + let mut queue = (state_and_queue & !STATE_MASK) as *mut Waiter; while !queue.is_null() { let next = (*queue).next; let thread = (*queue).thread.take().unwrap(); From 7f1e166899a90226480d564549c36a395e2d8f47 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Wed, 23 Oct 2019 09:50:32 +0200 Subject: [PATCH 02/12] Simplify loop conditions in RUNNING and add comments --- src/libstd/sync/once.rs | 46 ++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/src/libstd/sync/once.rs b/src/libstd/sync/once.rs index 277ea954c5626..7a660daf2cbae 100644 --- a/src/libstd/sync/once.rs +++ b/src/libstd/sync/once.rs @@ -354,7 +354,7 @@ impl Once { // SeqCst minimizes the chances of something going wrong. let mut state_and_queue = self.state_and_queue.load(Ordering::SeqCst); - 'outer: loop { + loop { match state_and_queue { // If we're complete, then there's nothing to do, we just // jettison out as we shouldn't run the closure. @@ -401,33 +401,45 @@ impl Once { // not RUNNING. _ => { assert!(state_and_queue & STATE_MASK == RUNNING); + // Create the node for our current thread that we are going to try to slot + // in at the head of the linked list. let mut node = Waiter { thread: Some(thread::current()), signaled: AtomicBool::new(false), next: ptr::null_mut(), }; let me = &mut node as *mut Waiter as usize; - assert!(me & STATE_MASK == 0); + assert!(me & STATE_MASK == 0); // We assume pointers have 2 free bits that + // we can use for state. + + // Try to slide in the node at the head of the linked list. + // Run in a loop where we make sure the status is still RUNNING, and that + // another thread did not just replace the head of the linked list. + let mut old_head_and_status = state_and_queue; + loop { + if old_head_and_status & STATE_MASK != RUNNING { + return; // No need anymore to enqueue ourselves. + } - while state_and_queue & STATE_MASK == RUNNING { - node.next = (state_and_queue & !STATE_MASK) as *mut Waiter; - let old = self.state_and_queue.compare_and_swap(state_and_queue, + node.next = (old_head_and_status & !STATE_MASK) as *mut Waiter; + let old = self.state_and_queue.compare_and_swap(old_head_and_status, me | RUNNING, - Ordering::SeqCst); - if old != state_and_queue { - state_and_queue = old; - continue + Ordering::Release); + if old == old_head_and_status { + break; // Success! } + old_head_and_status = old; + } - // Once we've enqueued ourselves, wait in a loop. - // Afterwards reload the state and continue with what we - // were doing from before. - while !node.signaled.load(Ordering::SeqCst) { - thread::park(); - } - state_and_queue = self.state_and_queue.load(Ordering::SeqCst); - continue 'outer + // We have enqueued ourselves, now lets wait. + // It is important not to return before being signaled, otherwise we would + // drop our `Waiter` node and leave a hole in the linked list (and a + // dangling reference). Guard against spurious wakeups by reparking + // ourselves until we are signaled. + while !node.signaled.load(Ordering::SeqCst) { + thread::park(); } + state_and_queue = self.state_and_queue.load(Ordering::SeqCst); } } } From 1479c22a390a6b95706d4280cd7be24e4410dc77 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Wed, 23 Oct 2019 09:56:41 +0200 Subject: [PATCH 03/12] Don't mutate waiter nodes --- src/libstd/sync/once.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/libstd/sync/once.rs b/src/libstd/sync/once.rs index 7a660daf2cbae..d8565e55ab2fc 100644 --- a/src/libstd/sync/once.rs +++ b/src/libstd/sync/once.rs @@ -81,7 +81,7 @@ pub struct Once { // `state_and_queue` is actually an a pointer to a `Waiter` with extra state // bits, so we add the `PhantomData` appropriately. state_and_queue: AtomicUsize, - _marker: marker::PhantomData<*mut Waiter>, + _marker: marker::PhantomData<*const Waiter>, } // The `PhantomData` of a raw pointer removes these two auto traits, but we @@ -134,9 +134,9 @@ const STATE_MASK: usize = 0x3; // Representation of a node in the linked list of waiters in the RUNNING state. struct Waiter { - thread: Option, + thread: Thread, signaled: AtomicBool, - next: *mut Waiter, + next: *const Waiter, } // Helper struct used to clean up after a closure call with a `Drop` @@ -404,11 +404,11 @@ impl Once { // Create the node for our current thread that we are going to try to slot // in at the head of the linked list. let mut node = Waiter { - thread: Some(thread::current()), + thread: thread::current(), signaled: AtomicBool::new(false), - next: ptr::null_mut(), + next: ptr::null(), }; - let me = &mut node as *mut Waiter as usize; + let me = &node as *const Waiter as usize; assert!(me & STATE_MASK == 0); // We assume pointers have 2 free bits that // we can use for state. @@ -421,7 +421,7 @@ impl Once { return; // No need anymore to enqueue ourselves. } - node.next = (old_head_and_status & !STATE_MASK) as *mut Waiter; + node.next = (old_head_and_status & !STATE_MASK) as *const Waiter; let old = self.state_and_queue.compare_and_swap(old_head_and_status, me | RUNNING, Ordering::Release); @@ -469,10 +469,10 @@ impl Drop for Finish<'_> { // in the node it can be free'd! As a result we load the `thread` to // signal ahead of time and then unpark it after the store. unsafe { - let mut queue = (state_and_queue & !STATE_MASK) as *mut Waiter; + let mut queue = (state_and_queue & !STATE_MASK) as *const Waiter; while !queue.is_null() { let next = (*queue).next; - let thread = (*queue).thread.take().unwrap(); + let thread = (*queue).thread.clone(); (*queue).signaled.store(true, Ordering::SeqCst); thread.unpark(); queue = next; From fbc242f1ef172f6fa5b7bc837b5c3a78a4c8f850 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Wed, 23 Oct 2019 11:44:31 +0200 Subject: [PATCH 04/12] Turn Finish into WaiterQueue --- src/libstd/sync/once.rs | 44 ++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/src/libstd/sync/once.rs b/src/libstd/sync/once.rs index d8565e55ab2fc..01cb7582d38dc 100644 --- a/src/libstd/sync/once.rs +++ b/src/libstd/sync/once.rs @@ -139,13 +139,15 @@ struct Waiter { next: *const Waiter, } -// Helper struct used to clean up after a closure call with a `Drop` -// implementation to also run on panic. -struct Finish<'a> { - panicked: bool, - me: &'a Once, +// Head of a linked list of waiters. +// Every node is a struct on the stack of a waiting thread. +// Will wake up the waiters when it gets dropped, i.e. also on panic. +struct WaiterQueue<'a> { + state_and_queue: &'a AtomicUsize, + set_state_on_drop_to: usize, } + impl Once { /// Creates a new `Once` value. #[stable(feature = "once_new", since = "1.2.0")] @@ -379,18 +381,16 @@ impl Once { state_and_queue = old; continue } - - // Run the initialization routine, letting it know if we're - // poisoned or not. The `Finish` struct is then dropped, and - // the `Drop` implementation here is responsible for waking - // up other waiters both in the normal return and panicking - // case. - let mut complete = Finish { - panicked: true, - me: self, + // `waiter_queue` will manage other waiting threads, and + // wake them up on drop. + let mut waiter_queue = WaiterQueue { + state_and_queue: &self.state_and_queue, + set_state_on_drop_to: POISONED, }; + // Run the initialization function, letting it know if we're + // poisoned or not. init(state_and_queue == POISONED); - complete.panicked = false; + waiter_queue.set_state_on_drop_to = COMPLETE; return } @@ -453,15 +453,13 @@ impl fmt::Debug for Once { } } -impl Drop for Finish<'_> { +impl Drop for WaiterQueue<'_> { fn drop(&mut self) { - // Swap out our state with however we finished. We should only ever see - // an old state which was RUNNING. - let state_and_queue = if self.panicked { - self.me.state_and_queue.swap(POISONED, Ordering::SeqCst) - } else { - self.me.state_and_queue.swap(COMPLETE, Ordering::SeqCst) - }; + // Swap out our state with however we finished. + let state_and_queue = self.state_and_queue.swap(self.set_state_on_drop_to, + Ordering::SeqCst); + + // We should only ever see an old state which was RUNNING. assert_eq!(state_and_queue & STATE_MASK, RUNNING); // Decode the RUNNING to a list of waiters, then walk that entire list From 2e8eb5f33d55b507da687593bbb7042416d73058 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Wed, 23 Oct 2019 10:01:22 +0200 Subject: [PATCH 05/12] Move thread parking to a seperate function --- src/libstd/sync/once.rs | 80 +++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 38 deletions(-) diff --git a/src/libstd/sync/once.rs b/src/libstd/sync/once.rs index 01cb7582d38dc..2c09fb3318b2b 100644 --- a/src/libstd/sync/once.rs +++ b/src/libstd/sync/once.rs @@ -401,44 +401,7 @@ impl Once { // not RUNNING. _ => { assert!(state_and_queue & STATE_MASK == RUNNING); - // Create the node for our current thread that we are going to try to slot - // in at the head of the linked list. - let mut node = Waiter { - thread: thread::current(), - signaled: AtomicBool::new(false), - next: ptr::null(), - }; - let me = &node as *const Waiter as usize; - assert!(me & STATE_MASK == 0); // We assume pointers have 2 free bits that - // we can use for state. - - // Try to slide in the node at the head of the linked list. - // Run in a loop where we make sure the status is still RUNNING, and that - // another thread did not just replace the head of the linked list. - let mut old_head_and_status = state_and_queue; - loop { - if old_head_and_status & STATE_MASK != RUNNING { - return; // No need anymore to enqueue ourselves. - } - - node.next = (old_head_and_status & !STATE_MASK) as *const Waiter; - let old = self.state_and_queue.compare_and_swap(old_head_and_status, - me | RUNNING, - Ordering::Release); - if old == old_head_and_status { - break; // Success! - } - old_head_and_status = old; - } - - // We have enqueued ourselves, now lets wait. - // It is important not to return before being signaled, otherwise we would - // drop our `Waiter` node and leave a hole in the linked list (and a - // dangling reference). Guard against spurious wakeups by reparking - // ourselves until we are signaled. - while !node.signaled.load(Ordering::SeqCst) { - thread::park(); - } + wait(&self.state_and_queue, state_and_queue); state_and_queue = self.state_and_queue.load(Ordering::SeqCst); } } @@ -446,6 +409,47 @@ impl Once { } } +fn wait(state_and_queue: &AtomicUsize, current_state: usize) { + // Create the node for our current thread that we are going to try to slot + // in at the head of the linked list. + let mut node = Waiter { + thread: thread::current(), + signaled: AtomicBool::new(false), + next: ptr::null(), + }; + let me = &node as *const Waiter as usize; + assert!(me & STATE_MASK == 0); // We assume pointers have 2 free bits that + // we can use for state. + + // Try to slide in the node at the head of the linked list. + // Run in a loop where we make sure the status is still RUNNING, and that + // another thread did not just replace the head of the linked list. + let mut old_head_and_status = current_state; + loop { + if old_head_and_status & STATE_MASK != RUNNING { + return; // No need anymore to enqueue ourselves. + } + + node.next = (old_head_and_status & !STATE_MASK) as *const Waiter; + let old = state_and_queue.compare_and_swap(old_head_and_status, + me | RUNNING, + Ordering::Release); + if old == old_head_and_status { + break; // Success! + } + old_head_and_status = old; + } + + // We have enqueued ourselves, now lets wait. + // It is important not to return before being signaled, otherwise we would + // drop our `Waiter` node and leave a hole in the linked list (and a + // dangling reference). Guard against spurious wakeups by reparking + // ourselves until we are signaled. + while !node.signaled.load(Ordering::SeqCst) { + thread::park(); + } +} + #[stable(feature = "std_debug", since = "1.16.0")] impl fmt::Debug for Once { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { From 4b8da9ccd528d46637c88a40f6cdd0d634c0fb22 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Wed, 23 Oct 2019 11:02:20 +0200 Subject: [PATCH 06/12] Reduce the amount of comments in call_inner --- src/libstd/sync/once.rs | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/src/libstd/sync/once.rs b/src/libstd/sync/once.rs index 2c09fb3318b2b..59cc618804533 100644 --- a/src/libstd/sync/once.rs +++ b/src/libstd/sync/once.rs @@ -355,25 +355,16 @@ impl Once { // performance difference really does not matter there, and // SeqCst minimizes the chances of something going wrong. let mut state_and_queue = self.state_and_queue.load(Ordering::SeqCst); - loop { match state_and_queue { - // If we're complete, then there's nothing to do, we just - // jettison out as we shouldn't run the closure. - COMPLETE => return, - - // If we're poisoned and we're not in a mode to ignore - // poisoning, then we panic here to propagate the poison. + COMPLETE => break, POISONED if !ignore_poisoning => { + // Panic to propagate the poison. panic!("Once instance has previously been poisoned"); } - - // Otherwise if we see a poisoned or otherwise incomplete state - // we will attempt to move ourselves into the RUNNING state. If - // we succeed, then the queue of waiters starts at null (all 0 - // bits). POISONED | INCOMPLETE => { + // Try to register this thread as the one RUNNING. let old = self.state_and_queue.compare_and_swap(state_and_queue, RUNNING, Ordering::SeqCst); @@ -391,15 +382,11 @@ impl Once { // poisoned or not. init(state_and_queue == POISONED); waiter_queue.set_state_on_drop_to = COMPLETE; - return + break } - - // All other values we find should correspond to the RUNNING - // state with an encoded waiter list in the more significant - // bits. We attempt to enqueue ourselves by moving us to the - // head of the list and bail out if we ever see a state that's - // not RUNNING. _ => { + // All other values must be RUNNING with possibly a + // pointer to the waiter queue in the more significant bits. assert!(state_and_queue & STATE_MASK == RUNNING); wait(&self.state_and_queue, state_and_queue); state_and_queue = self.state_and_queue.load(Ordering::SeqCst); From 88c70edef66b6885dec6aa8f7a4e73eff2b745ef Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Wed, 23 Oct 2019 10:10:36 +0200 Subject: [PATCH 07/12] In Waiter use interior mutability for thread --- src/libstd/sync/once.rs | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/src/libstd/sync/once.rs b/src/libstd/sync/once.rs index 59cc618804533..ef8a95eed272c 100644 --- a/src/libstd/sync/once.rs +++ b/src/libstd/sync/once.rs @@ -52,6 +52,7 @@ // You'll find a few more details in the implementation, but that's the gist of // it! +use crate::cell::Cell; use crate::fmt; use crate::marker; use crate::ptr; @@ -132,9 +133,14 @@ const COMPLETE: usize = 0x3; // this is in the RUNNING state. const STATE_MASK: usize = 0x3; -// Representation of a node in the linked list of waiters in the RUNNING state. +// Representation of a node in the linked list of waiters, used while in the +// RUNNING state. +// Note: `Waiter` can't hold a mutable pointer to the next thread, because then +// `wait` would both hand out a mutable reference to its `Waiter` node, and keep +// a shared reference to check `signaled`. Instead we hold shared references and +// use interior mutability. struct Waiter { - thread: Thread, + thread: Cell>, signaled: AtomicBool, next: *const Waiter, } @@ -400,7 +406,7 @@ fn wait(state_and_queue: &AtomicUsize, current_state: usize) { // Create the node for our current thread that we are going to try to slot // in at the head of the linked list. let mut node = Waiter { - thread: thread::current(), + thread: Cell::new(Some(thread::current())), signaled: AtomicBool::new(false), next: ptr::null(), }; @@ -453,18 +459,22 @@ impl Drop for WaiterQueue<'_> { // We should only ever see an old state which was RUNNING. assert_eq!(state_and_queue & STATE_MASK, RUNNING); - // Decode the RUNNING to a list of waiters, then walk that entire list - // and wake them up. Note that it is crucial that after we store `true` - // in the node it can be free'd! As a result we load the `thread` to - // signal ahead of time and then unpark it after the store. + // Walk the entire linked list of waiters and wake them up (in lifo + // order, last to register is first to wake up). unsafe { + // Right after setting `node.signaled = true` the other thread may + // free `node` if there happens to be has a spurious wakeup. + // So we have to take out the `thread` field and copy the pointer to + // `next` first. let mut queue = (state_and_queue & !STATE_MASK) as *const Waiter; while !queue.is_null() { let next = (*queue).next; - let thread = (*queue).thread.clone(); + let thread = (*queue).thread.replace(None).unwrap(); (*queue).signaled.store(true, Ordering::SeqCst); - thread.unpark(); + // ^- FIXME (maybe): This is another case of issue #55005 + // `store()` has a potentially dangling ref to `signaled`. queue = next; + thread.unpark(); } } } From c11a44ab6ce693629a03554b8b35d2218bca83cf Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Wed, 23 Oct 2019 10:01:22 +0200 Subject: [PATCH 08/12] Use more precise atomic orderings --- src/libstd/sync/once.rs | 53 +++++++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/src/libstd/sync/once.rs b/src/libstd/sync/once.rs index ef8a95eed272c..4c14fe75643a8 100644 --- a/src/libstd/sync/once.rs +++ b/src/libstd/sync/once.rs @@ -51,6 +51,38 @@ // // You'll find a few more details in the implementation, but that's the gist of // it! +// +// Atomic orderings: +// When running `Once` we deal with multiple atomics: +// `Once.state_and_queue` and an unknown number of `Waiter.signaled`. +// * `state_and_queue` is used (1) as a state flag, (2) for synchronizing the +// result of the `Once`, and (3) for synchronizing `Waiter` nodes. +// - At the end of the `call_inner` function we have to make sure the result +// of the `Once` is acquired. So every load which can be the only one to +// load COMPLETED must have at least Acquire ordering, which means all +// three of them. +// - `WaiterQueue::Drop` is the only place that may store COMPLETED, and +// must do so with Release ordering to make the result available. +// - `wait` inserts `Waiter` nodes as a pointer in `state_and_queue`, and +// needs to make the nodes available with Release ordering. The load in +// its `compare_and_swap` can be Relaxed because it only has to compare +// the atomic, not to read other data. +// - `WaiterQueue::Drop` must see the `Waiter` nodes, so it must load +// `state_and_queue` with Acquire ordering. +// - There is just one store where `state_and_queue` is used only as a +// state flag, without having to synchronize data: switching the state +// from INCOMPLETE to RUNNING in `call_inner`. This store can be Relaxed, +// but the read has to be Acquire because of the requirements mentioned +// above. +// * `Waiter.signaled` is both used as a flag, and to protect a field with +// interior mutability in `Waiter`. `Waiter.thread` is changed in +// `WaiterQueue::Drop` which then sets `signaled` with Release ordering. +// After `wait` loads `signaled` with Acquire and sees it is true, it needs to +// see the changes to drop the `Waiter` struct correctly. +// * There is one place where the two atomics `Once.state_and_queue` and +// `Waiter.signaled` come together, and might be reordered by the compiler or +// processor. Because both use Aquire ordering such a reordering is not +// allowed, so no need for SeqCst. use crate::cell::Cell; use crate::fmt; @@ -337,7 +369,7 @@ impl Once { // An `Acquire` load is enough because that makes all the initialization // operations visible to us, and, this being a fast path, weaker // ordering helps with performance. This `Acquire` synchronizes with - // `SeqCst` operations on the slow path. + // `Release` operations on the slow path. self.state_and_queue.load(Ordering::Acquire) == COMPLETE } @@ -355,12 +387,9 @@ impl Once { #[cold] fn call_inner(&self, ignore_poisoning: bool, - init: &mut dyn FnMut(bool)) { - - // This cold path uses SeqCst consistently because the - // performance difference really does not matter there, and - // SeqCst minimizes the chances of something going wrong. - let mut state_and_queue = self.state_and_queue.load(Ordering::SeqCst); + init: &mut dyn FnMut(bool)) + { + let mut state_and_queue = self.state_and_queue.load(Ordering::Acquire); loop { match state_and_queue { COMPLETE => break, @@ -373,7 +402,7 @@ impl Once { // Try to register this thread as the one RUNNING. let old = self.state_and_queue.compare_and_swap(state_and_queue, RUNNING, - Ordering::SeqCst); + Ordering::Acquire); if old != state_and_queue { state_and_queue = old; continue @@ -395,7 +424,7 @@ impl Once { // pointer to the waiter queue in the more significant bits. assert!(state_and_queue & STATE_MASK == RUNNING); wait(&self.state_and_queue, state_and_queue); - state_and_queue = self.state_and_queue.load(Ordering::SeqCst); + state_and_queue = self.state_and_queue.load(Ordering::Acquire); } } } @@ -438,7 +467,7 @@ fn wait(state_and_queue: &AtomicUsize, current_state: usize) { // drop our `Waiter` node and leave a hole in the linked list (and a // dangling reference). Guard against spurious wakeups by reparking // ourselves until we are signaled. - while !node.signaled.load(Ordering::SeqCst) { + while !node.signaled.load(Ordering::Acquire) { thread::park(); } } @@ -454,7 +483,7 @@ impl Drop for WaiterQueue<'_> { fn drop(&mut self) { // Swap out our state with however we finished. let state_and_queue = self.state_and_queue.swap(self.set_state_on_drop_to, - Ordering::SeqCst); + Ordering::AcqRel); // We should only ever see an old state which was RUNNING. assert_eq!(state_and_queue & STATE_MASK, RUNNING); @@ -470,7 +499,7 @@ impl Drop for WaiterQueue<'_> { while !queue.is_null() { let next = (*queue).next; let thread = (*queue).thread.replace(None).unwrap(); - (*queue).signaled.store(true, Ordering::SeqCst); + (*queue).signaled.store(true, Ordering::Release); // ^- FIXME (maybe): This is another case of issue #55005 // `store()` has a potentially dangling ref to `signaled`. queue = next; From c2bbfeadcce08a4b8ce02b66906ecc542cc9df39 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Thu, 24 Oct 2019 17:08:23 +0200 Subject: [PATCH 09/12] Always align Waiter to 4 bytes --- src/libstd/sync/once.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/libstd/sync/once.rs b/src/libstd/sync/once.rs index 4c14fe75643a8..c135471e2f2ea 100644 --- a/src/libstd/sync/once.rs +++ b/src/libstd/sync/once.rs @@ -171,6 +171,7 @@ const STATE_MASK: usize = 0x3; // `wait` would both hand out a mutable reference to its `Waiter` node, and keep // a shared reference to check `signaled`. Instead we hold shared references and // use interior mutability. +#[repr(align(4))] // Ensure the two lower bits are free to use as state bits. struct Waiter { thread: Cell>, signaled: AtomicBool, From 3712bb68c4f76161b54dcade7c1497b3ffc32e11 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Fri, 25 Oct 2019 10:01:27 +0200 Subject: [PATCH 10/12] Mention park guarantee --- src/libstd/sync/once.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/libstd/sync/once.rs b/src/libstd/sync/once.rs index c135471e2f2ea..bdb941cff5219 100644 --- a/src/libstd/sync/once.rs +++ b/src/libstd/sync/once.rs @@ -469,6 +469,10 @@ fn wait(state_and_queue: &AtomicUsize, current_state: usize) { // dangling reference). Guard against spurious wakeups by reparking // ourselves until we are signaled. while !node.signaled.load(Ordering::Acquire) { + // If the managing thread happens to signal and unpark us before we can + // park ourselves, the result could be this thread never gets unparked. + // Luckily `park` comes with the guarantee that if it got an `unpark` + // just before on an unparked thread is does not park. thread::park(); } } From 4c66658f2c51df8d7d97c975395cc161b8df2f98 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Mon, 4 Nov 2019 20:49:47 +0100 Subject: [PATCH 11/12] Don't mutate node.next --- src/libstd/sync/once.rs | 70 ++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 36 deletions(-) diff --git a/src/libstd/sync/once.rs b/src/libstd/sync/once.rs index bdb941cff5219..252a2d4319f34 100644 --- a/src/libstd/sync/once.rs +++ b/src/libstd/sync/once.rs @@ -87,7 +87,6 @@ use crate::cell::Cell; use crate::fmt; use crate::marker; -use crate::ptr; use crate::sync::atomic::{AtomicUsize, AtomicBool, Ordering}; use crate::thread::{self, Thread}; @@ -432,48 +431,47 @@ impl Once { } } -fn wait(state_and_queue: &AtomicUsize, current_state: usize) { - // Create the node for our current thread that we are going to try to slot - // in at the head of the linked list. - let mut node = Waiter { - thread: Cell::new(Some(thread::current())), - signaled: AtomicBool::new(false), - next: ptr::null(), - }; - let me = &node as *const Waiter as usize; - assert!(me & STATE_MASK == 0); // We assume pointers have 2 free bits that - // we can use for state. - - // Try to slide in the node at the head of the linked list. - // Run in a loop where we make sure the status is still RUNNING, and that - // another thread did not just replace the head of the linked list. - let mut old_head_and_status = current_state; +fn wait(state_and_queue: &AtomicUsize, mut current_state: usize) { + // Note: the following code was carefully written to avoid creating a + // mutable reference to `node` that gets aliased. loop { - if old_head_and_status & STATE_MASK != RUNNING { - return; // No need anymore to enqueue ourselves. + // Don't queue this thread if the status is no longer running, + // otherwise we will not be woken up. + if current_state & STATE_MASK != RUNNING { + return; } - node.next = (old_head_and_status & !STATE_MASK) as *const Waiter; - let old = state_and_queue.compare_and_swap(old_head_and_status, + // Create the node for our current thread. + let node = Waiter { + thread: Cell::new(Some(thread::current())), + signaled: AtomicBool::new(false), + next: (current_state & !STATE_MASK) as *const Waiter, + }; + let me = &node as *const Waiter as usize; + + // Try to slide in the node at the head of the linked list, making sure + // that another thread didn't just replace the head of the linked list. + let old = state_and_queue.compare_and_swap(current_state, me | RUNNING, Ordering::Release); - if old == old_head_and_status { - break; // Success! + if old != current_state { + current_state = old; + continue; } - old_head_and_status = old; - } - // We have enqueued ourselves, now lets wait. - // It is important not to return before being signaled, otherwise we would - // drop our `Waiter` node and leave a hole in the linked list (and a - // dangling reference). Guard against spurious wakeups by reparking - // ourselves until we are signaled. - while !node.signaled.load(Ordering::Acquire) { - // If the managing thread happens to signal and unpark us before we can - // park ourselves, the result could be this thread never gets unparked. - // Luckily `park` comes with the guarantee that if it got an `unpark` - // just before on an unparked thread is does not park. - thread::park(); + // We have enqueued ourselves, now lets wait. + // It is important not to return before being signaled, otherwise we + // would drop our `Waiter` node and leave a hole in the linked list + // (and a dangling reference). Guard against spurious wakeups by + // reparking ourselves until we are signaled. + while !node.signaled.load(Ordering::Acquire) { + // If the managing thread happens to signal and unpark us before we + // can park ourselves, the result could be this thread never gets + // unparked. Luckily `park` comes with the guarantee that if it got + // an `unpark` just before on an unparked thread is does not park. + thread::park(); + } + break; } } From b05e200867ce633848d34d8a184bf45c7fa905a4 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Sat, 9 Nov 2019 12:46:17 +0100 Subject: [PATCH 12/12] Run rustfmt on libstd/sync/once.rs --- src/libstd/sync/once.rs | 61 ++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 32 deletions(-) diff --git a/src/libstd/sync/once.rs b/src/libstd/sync/once.rs index 252a2d4319f34..e8e395247f9c1 100644 --- a/src/libstd/sync/once.rs +++ b/src/libstd/sync/once.rs @@ -87,7 +87,7 @@ use crate::cell::Cell; use crate::fmt; use crate::marker; -use crate::sync::atomic::{AtomicUsize, AtomicBool, Ordering}; +use crate::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use crate::thread::{self, Thread}; /// A synchronization primitive which can be used to run a one-time global @@ -149,7 +149,7 @@ pub struct OnceState { #[rustc_deprecated( since = "1.38.0", reason = "the `new` function is now preferred", - suggestion = "Once::new()", + suggestion = "Once::new()" )] pub const ONCE_INIT: Once = Once::new(); @@ -185,15 +185,11 @@ struct WaiterQueue<'a> { set_state_on_drop_to: usize, } - impl Once { /// Creates a new `Once` value. #[stable(feature = "once_new", since = "1.2.0")] pub const fn new() -> Once { - Once { - state_and_queue: AtomicUsize::new(INCOMPLETE), - _marker: marker::PhantomData, - } + Once { state_and_queue: AtomicUsize::new(INCOMPLETE), _marker: marker::PhantomData } } /// Performs an initialization routine once and only once. The given closure @@ -254,7 +250,10 @@ impl Once { /// /// [poison]: struct.Mutex.html#poisoning #[stable(feature = "rust1", since = "1.0.0")] - pub fn call_once(&self, f: F) where F: FnOnce() { + pub fn call_once(&self, f: F) + where + F: FnOnce(), + { // Fast path check if self.is_completed() { return; @@ -311,16 +310,17 @@ impl Once { /// INIT.call_once(|| {}); /// ``` #[unstable(feature = "once_poison", issue = "33577")] - pub fn call_once_force(&self, f: F) where F: FnOnce(&OnceState) { + pub fn call_once_force(&self, f: F) + where + F: FnOnce(&OnceState), + { // Fast path check if self.is_completed() { return; } let mut f = Some(f); - self.call_inner(true, &mut |p| { - f.take().unwrap()(&OnceState { poisoned: p }) - }); + self.call_inner(true, &mut |p| f.take().unwrap()(&OnceState { poisoned: p })); } /// Returns `true` if some `call_once` call has completed @@ -385,10 +385,7 @@ impl Once { // currently no way to take an `FnOnce` and call it via virtual dispatch // without some allocation overhead. #[cold] - fn call_inner(&self, - ignore_poisoning: bool, - init: &mut dyn FnMut(bool)) - { + fn call_inner(&self, ignore_poisoning: bool, init: &mut dyn FnMut(bool)) { let mut state_and_queue = self.state_and_queue.load(Ordering::Acquire); loop { match state_and_queue { @@ -397,15 +394,16 @@ impl Once { // Panic to propagate the poison. panic!("Once instance has previously been poisoned"); } - POISONED | - INCOMPLETE => { + POISONED | INCOMPLETE => { // Try to register this thread as the one RUNNING. - let old = self.state_and_queue.compare_and_swap(state_and_queue, - RUNNING, - Ordering::Acquire); + let old = self.state_and_queue.compare_and_swap( + state_and_queue, + RUNNING, + Ordering::Acquire, + ); if old != state_and_queue { state_and_queue = old; - continue + continue; } // `waiter_queue` will manage other waiting threads, and // wake them up on drop. @@ -417,7 +415,7 @@ impl Once { // poisoned or not. init(state_and_queue == POISONED); waiter_queue.set_state_on_drop_to = COMPLETE; - break + break; } _ => { // All other values must be RUNNING with possibly a @@ -451,9 +449,7 @@ fn wait(state_and_queue: &AtomicUsize, mut current_state: usize) { // Try to slide in the node at the head of the linked list, making sure // that another thread didn't just replace the head of the linked list. - let old = state_and_queue.compare_and_swap(current_state, - me | RUNNING, - Ordering::Release); + let old = state_and_queue.compare_and_swap(current_state, me | RUNNING, Ordering::Release); if old != current_state { current_state = old; continue; @@ -485,8 +481,8 @@ impl fmt::Debug for Once { impl Drop for WaiterQueue<'_> { fn drop(&mut self) { // Swap out our state with however we finished. - let state_and_queue = self.state_and_queue.swap(self.set_state_on_drop_to, - Ordering::AcqRel); + let state_and_queue = + self.state_and_queue.swap(self.set_state_on_drop_to, Ordering::AcqRel); // We should only ever see an old state which was RUNNING. assert_eq!(state_and_queue & STATE_MASK, RUNNING); @@ -562,10 +558,10 @@ impl OnceState { #[cfg(all(test, not(target_os = "emscripten")))] mod tests { + use super::Once; use crate::panic; use crate::sync::mpsc::channel; use crate::thread; - use super::Once; #[test] fn smoke_once() { @@ -585,8 +581,10 @@ mod tests { let (tx, rx) = channel(); for _ in 0..10 { let tx = tx.clone(); - thread::spawn(move|| { - for _ in 0..4 { thread::yield_now() } + thread::spawn(move || { + for _ in 0..4 { + thread::yield_now() + } unsafe { O.call_once(|| { assert!(!RUN); @@ -675,6 +673,5 @@ mod tests { assert!(t1.join().is_ok()); assert!(t2.join().is_ok()); - } }