Re: [PATCH RT 7/8] kernel/locking: use an exclusive wait_q for sleepers
From: Steven Rostedt
Date: Tue Nov 21 2017 - 11:20:39 EST
On Tue, 21 Nov 2017 11:07:01 -0500
Steven Rostedt <rostedt@xxxxxxxxxxx> wrote:
> 4.4.97-rt111-rc1 stable review patch.
> If anyone has any objections, please let me know.
>
> ------------------
>
> From: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx>
>
> If a task is queued as a sleeper for a wakeup and never goes to
> schedule() (because it just obtained the lock) then it will receive a
> spurious wake up which is not "bad", it is considered. Until that wake
> up happens this task can no be enqueued for any wake ups handled by the
> WAKE_Q infrastructure (because a task can only be enqueued once). This
> wouldn't be bad if we would use the same wakeup mechanism for the wake
> up of sleepers as we do for "normal" wake ups. But we don'tÃÂÅ
Strange character's in change log, made quilt mail fail to send to list
(bad encoding, because quilt mail can't handle this).
-- Steve
>
> So.
> T1 T2 T3
> spin_lock(x) spin_unlock(x);
> wake_q_add_sleeper(q1, T1)
> spin_unlock(x)
> set_state(TASK_INTERRUPTIBLE)
> if (!condition)
> schedule()
> condition = true
> wake_q_add(q2, T1)
> // T1 not added, still enqueued
> wake_up_q(q2)
> wake_up_q_sleeper(q1)
> // T1 not woken up, wrong task state
>
> In order to solve this race this patch adds a wake_q_node for the
> sleeper case.
>
> Reported-by: Mike Galbraith <efault@xxxxxx>
> Cc: stable-rt@xxxxxxxxxxxxxxx
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx>
> Signed-off-by: Steven Rostedt (VMware) <rostedt@xxxxxxxxxxx>
> ---
> include/linux/sched.h | 17 +++++++++++++++--
> kernel/fork.c | 1 +
> kernel/locking/rtmutex.c | 2 +-
> kernel/sched/core.c | 20 ++++++++++++++++----
> 4 files changed, 33 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 331cdbfc6431..f37654adf12a 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -979,8 +979,20 @@ struct wake_q_head {
> #define WAKE_Q(name) \
> struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
>
> -extern void wake_q_add(struct wake_q_head *head,
> - struct task_struct *task);
> +extern void __wake_q_add(struct wake_q_head *head,
> + struct task_struct *task, bool sleeper);
> +static inline void wake_q_add(struct wake_q_head *head,
> + struct task_struct *task)
> +{
> + __wake_q_add(head, task, false);
> +}
> +
> +static inline void wake_q_add_sleeper(struct wake_q_head *head,
> + struct task_struct *task)
> +{
> + __wake_q_add(head, task, true);
> +}
> +
> extern void __wake_up_q(struct wake_q_head *head, bool sleeper);
>
> static inline void wake_up_q(struct wake_q_head *head)
> @@ -1640,6 +1652,7 @@ struct task_struct {
> raw_spinlock_t pi_lock;
>
> struct wake_q_node wake_q;
> + struct wake_q_node wake_q_sleeper;
>
> #ifdef CONFIG_RT_MUTEXES
> /* PI waiters blocked on a rt_mutex held by this task */
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 0a873f52999f..368e770abee6 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -395,6 +395,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
> tsk->splice_pipe = NULL;
> tsk->task_frag.page = NULL;
> tsk->wake_q.next = NULL;
> + tsk->wake_q_sleeper.next = NULL;
>
> account_kernel_stack(ti, 1);
>
> diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
> index 552dc6dd3a79..b5b89c51f27e 100644
> --- a/kernel/locking/rtmutex.c
> +++ b/kernel/locking/rtmutex.c
> @@ -1557,7 +1557,7 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
> raw_spin_unlock(¤t->pi_lock);
>
> if (waiter->savestate)
> - wake_q_add(wake_sleeper_q, waiter->task);
> + wake_q_add_sleeper(wake_sleeper_q, waiter->task);
> else
> wake_q_add(wake_q, waiter->task);
> }
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index bea476417297..ed0f841d4d5c 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -523,9 +523,15 @@ static bool set_nr_if_polling(struct task_struct *p)
> #endif
> #endif
>
> -void wake_q_add(struct wake_q_head *head, struct task_struct *task)
> +void __wake_q_add(struct wake_q_head *head, struct task_struct *task,
> + bool sleeper)
> {
> - struct wake_q_node *node = &task->wake_q;
> + struct wake_q_node *node;
> +
> + if (sleeper)
> + node = &task->wake_q_sleeper;
> + else
> + node = &task->wake_q;
>
> /*
> * Atomically grab the task, if ->wake_q is !nil already it means
> @@ -554,11 +560,17 @@ void __wake_up_q(struct wake_q_head *head, bool sleeper)
> while (node != WAKE_Q_TAIL) {
> struct task_struct *task;
>
> - task = container_of(node, struct task_struct, wake_q);
> + if (sleeper)
> + task = container_of(node, struct task_struct, wake_q_sleeper);
> + else
> + task = container_of(node, struct task_struct, wake_q);
> BUG_ON(!task);
> /* task can safely be re-inserted now */
> node = node->next;
> - task->wake_q.next = NULL;
> + if (sleeper)
> + task->wake_q_sleeper.next = NULL;
> + else
> + task->wake_q.next = NULL;
>
> /*
> * wake_up_process() implies a wmb() to pair with the queueing