[PATCH 6/6] locking/qspinlock: Enable lock events tracking for CNA qspinlock code

From: Waiman Long
Date: Thu Jan 23 2020 - 13:53:12 EST


Add some lock events for tracking the behavior of the CNA qspinlock
code. A new lockevent_max() function is added to find out the maximum
value that CNA intra_count can reach.

Signed-off-by: Waiman Long <longman@xxxxxxxxxx>
---
kernel/locking/lock_events.c | 23 +++++++++++++++++++----
kernel/locking/lock_events.h | 11 +++++++++++
kernel/locking/lock_events_list.h | 13 +++++++++++++
kernel/locking/qspinlock_cna.h | 21 ++++++++++++++++-----
kernel/locking/qspinlock_stat.h | 23 ++++++++++++++++++++++-
5 files changed, 81 insertions(+), 10 deletions(-)

diff --git a/kernel/locking/lock_events.c b/kernel/locking/lock_events.c
index fa2c2f951c6b..0237cbbc94a2 100644
--- a/kernel/locking/lock_events.c
+++ b/kernel/locking/lock_events.c
@@ -120,14 +120,29 @@ static const struct file_operations fops_lockevent = {

static bool __init skip_lockevent(const char *name)
{
- static int pv_on __initdata = -1;
+ static enum {
+ LOCK_UNKNOWN,
+ LOCK_NATIVE,
+ LOCK_PV,
+ LOCK_CNA,
+ } state __initdata = LOCK_UNKNOWN;
+
+ if (state == LOCK_UNKNOWN) {
+ if (pv_ops.lock.queued_spin_lock_slowpath ==
+ native_queued_spin_lock_slowpath)
+ state = LOCK_NATIVE;
+ else if (pv_ops.lock.queued_spin_lock_slowpath ==
+ pv_queued_spin_lock_slowpath)
+ state = LOCK_PV;
+ else
+ state = LOCK_CNA;
+ }

- if (pv_on < 0)
- pv_on = !pv_is_native_spin_unlock();
/*
* Skip PV qspinlock events on bare metal.
*/
- if (!pv_on && !memcmp(name, "pv_", 3))
+ if (((state != LOCK_PV) && !memcmp(name, "pv_", 3)) ||
+ ((state != LOCK_CNA) && !memcmp(name, "cna_", 4)))
return true;
return false;
}
diff --git a/kernel/locking/lock_events.h b/kernel/locking/lock_events.h
index 8c7e7d25f09c..d8528725324c 100644
--- a/kernel/locking/lock_events.h
+++ b/kernel/locking/lock_events.h
@@ -50,11 +50,22 @@ static inline void __lockevent_add(enum lock_events event, int inc)

#define lockevent_add(ev, c) __lockevent_add(LOCKEVENT_ ##ev, c)

+static inline void __lockevent_max(enum lock_events event, unsigned long val)
+{
+ unsigned long max = raw_cpu_read(lockevents[event]);
+
+ if (val > max)
+ raw_cpu_write(lockevents[event], val);
+}
+
+#define lockevent_max(ev, v) __lockevent_max(LOCKEVENT_ ##ev, v)
+
#else /* CONFIG_LOCK_EVENT_COUNTS */

#define lockevent_inc(ev)
#define lockevent_add(ev, c)
#define lockevent_cond_inc(ev, c)
+#define lockevent_max(ev, v)

#endif /* CONFIG_LOCK_EVENT_COUNTS */
#endif /* __LOCKING_LOCK_EVENTS_H */
diff --git a/kernel/locking/lock_events_list.h b/kernel/locking/lock_events_list.h
index 239039d0ce21..df1042bb19e9 100644
--- a/kernel/locking/lock_events_list.h
+++ b/kernel/locking/lock_events_list.h
@@ -35,6 +35,19 @@ LOCK_EVENT(pv_wait_head) /* # of vCPU wait's at the queue head */
LOCK_EVENT(pv_wait_node) /* # of vCPU wait's at non-head queue node */
#endif /* CONFIG_PARAVIRT_SPINLOCKS */

+#ifdef CONFIG_NUMA_AWARE_SPINLOCKS
+/*
+ * Locking events for CNA qspinlock
+ */
+LOCK_EVENT(cna_prescan_hit)
+LOCK_EVENT(cna_prescan_miss)
+LOCK_EVENT(cna_mainscan_hit)
+LOCK_EVENT(cna_merge_queue) /* # of queue merges (secondary -> primary) */
+LOCK_EVENT(cna_splice_new) /* # of splices to new secondary queue */
+LOCK_EVENT(cna_splice_old) /* # of splices to existing secondary queue */
+LOCK_EVENT(cna_intra_max) /* Maximum intra_count value */
+#endif
+
/*
* Locking events for qspinlock
*
diff --git a/kernel/locking/qspinlock_cna.h b/kernel/locking/qspinlock_cna.h
index f0b0c15dcf9d..2c410d67e094 100644
--- a/kernel/locking/qspinlock_cna.h
+++ b/kernel/locking/qspinlock_cna.h
@@ -193,6 +193,7 @@ static void cna_splice_tail(struct mcs_spinlock *node,
if (node->locked <= 1) { /* if secondary queue is empty */
/* create secondary queue */
last->next = first;
+ lockevent_inc(cna_splice_new);
} else {
/* add to the tail of the secondary queue */
struct mcs_spinlock *tail_2nd = decode_tail(node->locked);
@@ -200,6 +201,7 @@ static void cna_splice_tail(struct mcs_spinlock *node,

tail_2nd->next = first;
last->next = head_2nd;
+ lockevent_inc(cna_splice_old);
}

node->locked = ((struct cna_node *)last)->encoded_tail;
@@ -285,14 +287,15 @@ __always_inline u32 cna_pre_scan(struct qspinlock *lock,
cn->intra_count == intra_node_handoff_threshold ?
FLUSH_SECONDARY_QUEUE :
cna_scan_main_queue(node, node);
-
+ lockevent_cond_inc(cna_prescan_hit,
+ cn->pre_scan_result == LOCAL_WAITER_FOUND);
return 0;
}

static inline void cna_pass_lock(struct mcs_spinlock *node,
struct mcs_spinlock *next)
{
- struct cna_node *cn = (struct cna_node *)node;
+ struct cna_node *cn = (struct cna_node *)node, *next_cn;
struct mcs_spinlock *next_holder = next, *tail_2nd;
u32 val = 1;

@@ -311,20 +314,27 @@ static inline void cna_pass_lock(struct mcs_spinlock *node,
* pre-scan, and if so, try to find it in post-scan starting from the
* node where pre-scan stopped (stored in @pre_scan_result)
*/
- if (scan >= MIN_ENCODED_TAIL)
+ if (scan >= MIN_ENCODED_TAIL) {
scan = cna_scan_main_queue(node, decode_tail(scan));
+ lockevent_inc(cna_prescan_miss);
+ lockevent_cond_inc(cna_mainscan_hit,
+ scan == LOCAL_WAITER_FOUND);
+ }

if (scan == LOCAL_WAITER_FOUND) {
next_holder = node->next;
+ next_cn = (struct cna_node *)next_holder;
+
/*
* we unlock successor by passing a non-zero value,
* so set @val to 1 iff @locked is 0, which will happen
* if we acquired the MCS lock when its queue was empty
*/
val = node->locked ? node->locked : 1;
+
/* inc @intra_count if the secondary queue is not empty */
- ((struct cna_node *)next_holder)->intra_count =
- cn->intra_count + (node->locked > 1);
+ next_cn->intra_count = cn->intra_count + (node->locked > 1);
+ lockevent_max(cna_intra_max, next_cn->intra_count);
} else if (node->locked > 1) { /* if secondary queue is not empty */
/* next holder will be the first node in the secondary queue */
tail_2nd = decode_tail(node->locked);
@@ -332,6 +342,7 @@ static inline void cna_pass_lock(struct mcs_spinlock *node,
next_holder = tail_2nd->next;
/* splice the secondary queue onto the head of the main queue */
tail_2nd->next = next;
+ lockevent_inc(cna_merge_queue);
}

pass_lock:
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
index e625bb410aa2..530f86477e0f 100644
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -22,6 +22,18 @@
*/
static DEFINE_PER_CPU(u64, pv_kick_time);

+#ifdef CONFIG_NUMA_AWARE_SPINLOCKS
+static inline bool lock_event_return_max(int id)
+{
+ return id == LOCKEVENT_cna_intra_max;
+}
+#else
+static inline bool lock_event_return_max(int id)
+{
+ return false;
+}
+#endif
+
/*
* Function to read and return the PV qspinlock counts.
*
@@ -38,7 +50,7 @@ ssize_t lockevent_read(struct file *file, char __user *user_buf,
{
char buf[64];
int cpu, id, len;
- u64 sum = 0, kicks = 0;
+ u64 sum = 0, kicks = 0, val;

/*
* Get the counter ID stored in file->f_inode->i_private
@@ -49,6 +61,15 @@ ssize_t lockevent_read(struct file *file, char __user *user_buf,
return -EBADF;

for_each_possible_cpu(cpu) {
+ val = per_cpu(lockevents[id], cpu);
+ if (lock_event_return_max(id)) {
+ /*
+ * Find the maximum of all per-cpu values
+ */
+ if (val > sum)
+ sum = val;
+ continue;
+ }
sum += per_cpu(lockevents[id], cpu);
/*
* Need to sum additional counters for some of them
--
2.18.1


--------------C3B3D0C2A24A32A734C19393--