@@ -129,8 +129,8 @@ static __always_inline void release_held_lock_entry(void)
129129 * <error> for lock B
130130 * release_held_lock_entry
131131 *
132- * try_cmpxchg_acquire for lock A
133132 * grab_held_lock_entry
133+ * try_cmpxchg_acquire for lock A
134134 *
135135 * Lack of any ordering means reordering may occur such that dec, inc
136136 * are done before entry is overwritten. This permits a remote lock
@@ -139,13 +139,8 @@ static __always_inline void release_held_lock_entry(void)
139139 * CPU holds a lock it is attempting to acquire, leading to false ABBA
140140 * diagnosis).
141141 *
142- * In case of unlock, we will always do a release on the lock word after
143- * releasing the entry, ensuring that other CPUs cannot hold the lock
144- * (and make conclusions about deadlocks) until the entry has been
145- * cleared on the local CPU, preventing any anomalies. Reordering is
146- * still possible there, but a remote CPU cannot observe a lock in our
147- * table which it is already holding, since visibility entails our
148- * release store for the said lock has not retired.
142+ * The case of unlock is treated differently due to NMI reentrancy, see
143+ * comments in res_spin_unlock.
149144 *
150145 * In theory we don't have a problem if the dec and WRITE_ONCE above get
151146 * reordered with each other, we either notice an empty NULL entry on
@@ -175,10 +170,22 @@ static __always_inline int res_spin_lock(rqspinlock_t *lock)
175170{
176171 int val = 0 ;
177172
178- if (likely (atomic_try_cmpxchg_acquire (& lock -> val , & val , _Q_LOCKED_VAL ))) {
179- grab_held_lock_entry (lock );
173+ /*
174+ * Grab the deadlock detection entry before doing the cmpxchg, so that
175+ * reentrancy due to NMIs between the succeeding cmpxchg and creation of
176+ * held lock entry can correctly detect an acquisition attempt in the
177+ * interrupted context.
178+ *
179+ * cmpxchg lock A
180+ * <NMI>
181+ * res_spin_lock(A) --> missed AA, leads to timeout
182+ * </NMI>
183+ * grab_held_lock_entry(A)
184+ */
185+ grab_held_lock_entry (lock );
186+
187+ if (likely (atomic_try_cmpxchg_acquire (& lock -> val , & val , _Q_LOCKED_VAL )))
180188 return 0 ;
181- }
182189 return resilient_queued_spin_lock_slowpath (lock , val );
183190}
184191
@@ -192,28 +199,25 @@ static __always_inline void res_spin_unlock(rqspinlock_t *lock)
192199{
193200 struct rqspinlock_held * rqh = this_cpu_ptr (& rqspinlock_held_locks );
194201
195- if (unlikely (rqh -> cnt > RES_NR_HELD ))
196- goto unlock ;
197- WRITE_ONCE (rqh -> locks [rqh -> cnt - 1 ], NULL );
198- unlock :
199202 /*
200- * Release barrier, ensures correct ordering. See release_held_lock_entry
201- * for details. Perform release store instead of queued_spin_unlock,
202- * since we use this function for test-and-set fallback as well. When we
203- * have CONFIG_QUEUED_SPINLOCKS=n, we clear the full 4-byte lockword.
203+ * Release barrier, ensures correct ordering. Perform release store
204+ * instead of queued_spin_unlock, since we use this function for the TAS
205+ * fallback as well. When we have CONFIG_QUEUED_SPINLOCKS=n, we clear
206+ * the full 4-byte lockword.
204207 *
205- * Like release_held_lock_entry, we can do the release before the dec.
206- * We simply care about not seeing the 'lock' in our table from a remote
207- * CPU once the lock has been released, which doesn't rely on the dec.
208+ * Perform the smp_store_release before clearing the lock entry so that
209+ * NMIs landing in the unlock path can correctly detect AA issues. The
210+ * opposite order shown below may lead to missed AA checks:
208211 *
209- * Unlike smp_wmb(), release is not a two way fence, hence it is
210- * possible for a inc to move up and reorder with our clearing of the
211- * entry. This isn't a problem however, as for a misdiagnosis of ABBA,
212- * the remote CPU needs to hold this lock, which won't be released until
213- * the store below is done, which would ensure the entry is overwritten
214- * to NULL, etc.
212+ * WRITE_ONCE(rqh->locks[rqh->cnt - 1], NULL)
213+ * <NMI>
214+ * res_spin_lock(A) --> missed AA, leads to timeout
215+ * </NMI>
216+ * smp_store_release(A->locked, 0)
215217 */
216218 smp_store_release (& lock -> locked , 0 );
219+ if (likely (rqh -> cnt <= RES_NR_HELD ))
220+ WRITE_ONCE (rqh -> locks [rqh -> cnt - 1 ], NULL );
217221 this_cpu_dec (rqspinlock_held_locks .cnt );
218222}
219223
0 commit comments