Re: [PATCH tip/locking/core v10 6/7] locking/pvqspinlock: Allow limited lock stealing

From: Peter Zijlstra
Date: Tue Nov 10 2015 - 11:03:59 EST


On Mon, Nov 09, 2015 at 07:09:26PM -0500, Waiman Long wrote:
> @@ -291,7 +292,7 @@ static __always_inline void __pv_wait_head(struct qspinlock *lock,
> void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
> {
> struct mcs_spinlock *prev, *next, *node;
> - u32 new, old, tail;
> + u32 new, old, tail, locked;
> int idx;
>
> BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
> @@ -431,11 +432,25 @@ queue:
> * sequentiality; this is because the set_locked() function below
> * does not imply a full barrier.
> *
> + * The PV pv_wait_head_or_lock function, if active, will acquire
> + * the lock and return a non-zero value. So we have to skip the
> + * smp_load_acquire() call. As the next PV queue head hasn't been
> + * designated yet, there is no way for the locked value to become
> + * _Q_SLOW_VAL. So both the set_locked() and the
> + * atomic_cmpxchg_relaxed() calls will be safe.
> + *
> + * If PV isn't active, 0 will be returned instead.
> + *
> */
> - pv_wait_head(lock, node);
> - while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_PENDING_MASK)
> + locked = val = pv_wait_head_or_lock(lock, node);
> + if (locked)
> + goto reset_tail_or_wait_next;
> +
> + while ((val = smp_load_acquire(&lock->val.counter))
> + & _Q_LOCKED_PENDING_MASK)
> cpu_relax();
>
> +reset_tail_or_wait_next:
> /*
> * claim the lock:
> *
> @@ -447,8 +462,12 @@ queue:
> * to grab the lock.
> */
> for (;;) {
> - if (val != tail) {
> - set_locked(lock);
> + /*
> + * The lock value may or may not have the _Q_LOCKED_VAL bit set.
> + */
> + if ((val & _Q_TAIL_MASK) != tail) {
> + if (!locked)
> + set_locked(lock);
> break;
> }
> /*

How about this instead? If we've already got _Q_LOCKED_VAL set, issuing
that store again isn't much of a problem, the cacheline is already hot
and we own it and its a regular store not an atomic.

@@ -432,10 +433,13 @@ void queued_spin_lock_slowpath(struct qs
* does not imply a full barrier.
*
*/
- pv_wait_head(lock, node);
+ if ((val = pv_wait_head_or_lock(lock, node)))
+ goto locked;
+
while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_PENDING_MASK)
cpu_relax();

+locked:
/*
* claim the lock:
*
@@ -447,7 +451,8 @@ void queued_spin_lock_slowpath(struct qs
* to grab the lock.
*/
for (;;) {
- if (val != tail) {
+ /* In the PV case we might already have _Q_LOCKED_VAL set */
+ if ((val & _Q_TAIL_MASK) != tail) {
set_locked(lock);
break;
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/