[PATCH 2/9] locking/pvqspinlock: Introduce CONFIG_PARAVIRT_QSPINLOCKS_LITE

From: Waiman Long
Date: Tue Jul 07 2020 - 22:29:16 EST


Add a new PARAVIRT_QSPINLOCKS_LITE config option that allows
architectures to use the PV qspinlock code without the need to use or
implement a pv_kick() function, thus eliminating the atomic unlock
overhead. The non-atomic queued_spin_unlock() can be used instead.
The pv_wait() function will still be needed, but it can be a dummy
function.

With that option set, the hybrid PV queued/unfair locking code should
still be able to make it performant enough in a paravirtualized
environment.

Signed-off-by: Waiman Long <longman@xxxxxxxxxx>
---
kernel/Kconfig.locks | 4 +++
kernel/locking/lock_events_list.h | 3 ++
kernel/locking/qspinlock_paravirt.h | 49 ++++++++++++++++++++++++-----
kernel/locking/qspinlock_stat.h | 5 +--
4 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index 3de8fd11873b..1824ba8c44a9 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -243,6 +243,10 @@ config QUEUED_SPINLOCKS
def_bool y if ARCH_USE_QUEUED_SPINLOCKS
depends on SMP

+config PARAVIRT_QSPINLOCKS_LITE
+ bool
+ depends on QUEUED_SPINLOCKS && PARAVIRT_SPINLOCKS
+
config BPF_ARCH_SPINLOCK
bool

diff --git a/kernel/locking/lock_events_list.h b/kernel/locking/lock_events_list.h
index 239039d0ce21..9ae07a7148e8 100644
--- a/kernel/locking/lock_events_list.h
+++ b/kernel/locking/lock_events_list.h
@@ -22,11 +22,14 @@
/*
* Locking events for PV qspinlock.
*/
+#ifndef CONFIG_PARAVIRT_QSPINLOCKS_LITE
LOCK_EVENT(pv_hash_hops) /* Average # of hops per hashing operation */
LOCK_EVENT(pv_kick_unlock) /* # of vCPU kicks issued at unlock time */
LOCK_EVENT(pv_kick_wake) /* # of vCPU kicks for pv_latency_wake */
LOCK_EVENT(pv_latency_kick) /* Average latency (ns) of vCPU kick */
LOCK_EVENT(pv_latency_wake) /* Average latency (ns) of kick-to-wakeup */
+#endif
+
LOCK_EVENT(pv_lock_stealing) /* # of lock stealing operations */
LOCK_EVENT(pv_spurious_wakeup) /* # of spurious wakeups in non-head vCPUs */
LOCK_EVENT(pv_wait_again) /* # of wait's after queue head vCPU kick */
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 8eec58320b85..2d24563aa9b9 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -77,6 +77,23 @@ struct pv_node {
* This hybrid PV queued/unfair lock combines the best attributes of a
* queued lock (no lock starvation) and an unfair lock (good performance
* on not heavily contended locks).
+ *
+ * PV lock lite
+ * ------------
+ *
+ * By default, the PV lock uses two hypervisor specific functions pv_wait()
+ * and pv_kick() to release the vcpu back to the hypervisor and request the
+ * hypervisor to put the given vcpu online again respectively.
+ *
+ * The pv_kick() function is called at unlock time and requires the use of
+ * an atomic instruction to prevent missed wakeup. The unlock overhead of
+ * the PV lock is a major reason why the PV lock is slightly slower than
+ * the native lock. Not all the hypervisors need to really use both
+ * pv_wait() and pv_kick(). The PARAVIRT_QSPINLOCKS_LITE config option
+ * enables a lighter version of PV lock that relies mainly on the hybrid
+ * queued/unfair lock. The pv_wait() function will be used if provided.
+ * The pv_kick() function isn't used to eliminate the unlock overhead and
+ * the non-atomic queued_spin_unlock() can be used.
*/
#define queued_spin_trylock(l) pv_hybrid_queued_unfair_trylock(l)
static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
@@ -153,6 +170,7 @@ static __always_inline int trylock_clear_pending(struct qspinlock *lock)
}
#endif /* _Q_PENDING_BITS == 8 */

+#ifndef CONFIG_PARAVIRT_QSPINLOCKS_LITE
/*
* Lock and MCS node addresses hash table for fast lookup
*
@@ -410,6 +428,29 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
}
#endif /* __pv_queued_spin_unlock */

+static inline void set_pv_node_running(struct pv_node *pn)
+{
+ /*
+ * If pv_kick_node() changed us to vcpu_hashed, retain that value so
+ * that pv_wait_head_or_lock() will not try to hash this lock.
+ */
+ cmpxchg(&pn->state, vcpu_halted, vcpu_running);
+}
+#else
+static inline bool pv_hash_lock(struct qspinlock *lock, struct pv_node *node)
+{
+ return false;
+}
+
+static inline void pv_kick_node(struct qspinlock *lock,
+ struct mcs_spinlock *node) { }
+
+static inline void set_pv_node_running(struct pv_node *pn)
+{
+ pn->state = vcpu_running;
+}
+#endif /* CONFIG_PARAVIRT_QSPINLOCKS_LITE */
+
/*
* Return true if when it is time to check the previous node which is not
* in a running state.
@@ -475,13 +516,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
lockevent_cond_inc(pv_wait_early, wait_early);
pv_wait(&pn->state, vcpu_halted);
}
-
- /*
- * If pv_kick_node() changed us to vcpu_hashed, retain that
- * value so that pv_wait_head_or_lock() knows to not also try
- * to hash this lock.
- */
- cmpxchg(&pn->state, vcpu_halted, vcpu_running);
+ set_pv_node_running(pn);

/*
* If the locked flag is still not set after wakeup, it is a
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
index e625bb410aa2..e9f63240785b 100644
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -7,7 +7,8 @@
#include "lock_events.h"

#ifdef CONFIG_LOCK_EVENT_COUNTS
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && \
+ !defined(CONFIG_PARAVIRT_QSPINLOCKS_LITE)
/*
* Collect pvqspinlock locking event counts
*/
@@ -133,7 +134,7 @@ static inline void __pv_wait(u8 *ptr, u8 val)
#define pv_kick(c) __pv_kick(c)
#define pv_wait(p, v) __pv_wait(p, v)

-#endif /* CONFIG_PARAVIRT_SPINLOCKS */
+#endif /* CONFIG_PARAVIRT_SPINLOCKS && !CONFIG_PARAVIRT_QSPINLOCKS_LITE */

#else /* CONFIG_LOCK_EVENT_COUNTS */

--
2.18.1


--------------40473F93CD0C6D6973E234D5
Content-Type: text/x-patch; charset=UTF-8;
name="0009-powerpc-pseries-Fixup.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
filename="0009-powerpc-pseries-Fixup.patch"