Re: [RFC] locking/rwsem: Avoid issuing wakeup before setting the reader waiter to nil

From: Peter Zijlstra
Date: Tue Dec 18 2018 - 08:10:45 EST


On Mon, Dec 17, 2018 at 12:53:10PM -0800, Davidlohr Bueso wrote:
> On Mon, 17 Dec 2018, Peter Zijlstra wrote:
> >
> > I've put some patches here:
> >
> > git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git locking/core
> >
> > Could you have a look?
>
> So how about the following to reduce some of the performance penalty (at
> the cost of more complexity)?

I'd rather do it like so, except I'm still conflicted on the naming.

diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h
index 545f37138057..ad826d2a4557 100644
--- a/include/linux/sched/wake_q.h
+++ b/include/linux/sched/wake_q.h
@@ -51,8 +51,8 @@ static inline void wake_q_init(struct wake_q_head *head)
head->lastp = &head->first;
}

-extern void wake_q_add(struct wake_q_head *head,
- struct task_struct *task);
+extern void wake_q_add(struct wake_q_head *head, struct task_struct *task);
+extern void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task);
extern void wake_up_q(struct wake_q_head *head);

#endif /* _LINUX_SCHED_WAKE_Q_H */
diff --git a/kernel/futex.c b/kernel/futex.c
index d14971f6ed3d..6218d98f649b 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1402,8 +1402,7 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
* Queue the task for later wakeup for after we've released
* the hb->lock. wake_q_add() grabs reference to p.
*/
- wake_q_add(wake_q, p);
- put_task_struct(p);
+ wake_q_add_safe(wake_q, p);
}

/*
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 50d9af615dc4..fbe96341beee 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -211,9 +211,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
* Ensure issuing the wakeup (either by us or someone else)
* after setting the reader waiter to nil.
*/
- wake_q_add(wake_q, tsk);
- /* wake_q_add() already take the task ref */
- put_task_struct(tsk);
+ wake_q_add_safe(wake_q, tsk);
}

adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d740d7a3608d..72d82ce73714 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -408,7 +408,7 @@ static bool set_nr_if_polling(struct task_struct *p)
* This function must be used as-if it were wake_up_process(); IOW the task
* must be ready to be woken at this location.
*/
-void wake_q_add(struct wake_q_head *head, struct task_struct *task)
+static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task)
{
struct wake_q_node *node = &task->wake_q;

@@ -422,15 +422,27 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task)
*/
smp_mb__before_atomic();
if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)))
- return;
-
- get_task_struct(task);
+ return false;

/*
* The head is context local, there can be no concurrency.
*/
*head->lastp = node;
head->lastp = &node->next;
+
+ return true;
+}
+
+void wake_q_add(struct wake_q_head *head, struct task_struct *task)
+{
+ if (__wake_q_add(head, task))
+ get_task_struct(task);
+}
+
+void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
+{
+ if (!__wake_a_add(head, task))
+ put_task_struct(task);
}

void wake_up_q(struct wake_q_head *head)