Re: [RFC PATCH 07/13] sched: Add core wide task selection and scheduling.

From: Joel Fernandes
Date: Thu May 21 2020 - 19:14:31 EST


On Wed, Mar 04, 2020 at 04:59:57PM +0000, vpillai wrote:
> From: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
>
> Instead of only selecting a local task, select a task for all SMT
> siblings for every reschedule on the core (irrespective which logical
> CPU does the reschedule).
>
> There could be races in core scheduler where a CPU is trying to pick
> a task for its sibling in core scheduler, when that CPU has just been
> offlined. We should not schedule any tasks on the CPU in this case.
> Return an idle task in pick_next_task for this situation.
>
> NOTE: there is still potential for siblings rivalry.
> NOTE: this is far too complicated; but thus far I've failed to
> simplify it further.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
> Signed-off-by: Julien Desfossez <jdesfossez@xxxxxxxxxxxxxxxx>
> Signed-off-by: Vineeth Remanan Pillai <vpillai@xxxxxxxxxxxxxxxx>
> Signed-off-by: Aaron Lu <aaron.lu@xxxxxxxxxxxxxxxxx>
> Signed-off-by: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx>
> ---
> kernel/sched/core.c | 274 ++++++++++++++++++++++++++++++++++++++++++-
> kernel/sched/fair.c | 40 +++++++
> kernel/sched/sched.h | 6 +-
> 3 files changed, 318 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 445f0d519336..9a1bd236044e 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -4253,7 +4253,7 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt)
> * Pick up the highest-prio task:
> */
> static inline struct task_struct *
> -pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
> +__pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
> {
> const struct sched_class *class;
> struct task_struct *p;
> @@ -4309,6 +4309,273 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
> BUG();
> }
>
> +#ifdef CONFIG_SCHED_CORE
> +
> +static inline bool cookie_equals(struct task_struct *a, unsigned long cookie)
> +{
> + return is_idle_task(a) || (a->core_cookie == cookie);
> +}
> +
> +static inline bool cookie_match(struct task_struct *a, struct task_struct *b)
> +{
> + if (is_idle_task(a) || is_idle_task(b))
> + return true;
> +
> + return a->core_cookie == b->core_cookie;
> +}
> +
> +// XXX fairness/fwd progress conditions
> +/*
> + * Returns
> + * - NULL if there is no runnable task for this class.
> + * - the highest priority task for this runqueue if it matches
> + * rq->core->core_cookie or its priority is greater than max.
> + * - Else returns idle_task.
> + */
> +static struct task_struct *
> +pick_task(struct rq *rq, const struct sched_class *class, struct task_struct *max)
> +{
> + struct task_struct *class_pick, *cookie_pick;
> + unsigned long cookie = rq->core->core_cookie;
> +
> + class_pick = class->pick_task(rq);
> + if (!class_pick)
> + return NULL;
> +
> + if (!cookie) {
> + /*
> + * If class_pick is tagged, return it only if it has
> + * higher priority than max.
> + */
> + if (max && class_pick->core_cookie &&
> + prio_less(class_pick, max))
> + return idle_sched_class.pick_task(rq);
> +
> + return class_pick;
> + }
> +
> + /*
> + * If class_pick is idle or matches cookie, return early.
> + */
> + if (cookie_equals(class_pick, cookie))
> + return class_pick;
> +
> + cookie_pick = sched_core_find(rq, cookie);
> +
> + /*
> + * If class > max && class > cookie, it is the highest priority task on
> + * the core (so far) and it must be selected, otherwise we must go with
> + * the cookie pick in order to satisfy the constraint.
> + */
> + if (prio_less(cookie_pick, class_pick) &&
> + (!max || prio_less(max, class_pick)))
> + return class_pick;
> +
> + return cookie_pick;
> +}

I've been hating on this pick_task() routine for a while now :-). If we add
the task to the tag tree as Peter suggested at OSPM for that other issue
Vineeth found, it seems it could be simpler.

This has just been near a compiler so far but how about:

---8<-----------------------

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 005d7f7323e2d..81e23252b6c99 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -182,9 +182,6 @@ static void sched_core_enqueue(struct rq *rq, struct task_struct *p)

rq->core->core_task_seq++;

- if (!p->core_cookie)
- return;
-
node = &rq->core_tree.rb_node;
parent = *node;

@@ -215,7 +212,7 @@ static void sched_core_dequeue(struct rq *rq, struct task_struct *p)

void sched_core_add(struct rq *rq, struct task_struct *p)
{
- if (p->core_cookie && task_on_rq_queued(p))
+ if (task_on_rq_queued(p))
sched_core_enqueue(rq, p);
}

@@ -4563,36 +4560,32 @@ pick_task(struct rq *rq, const struct sched_class *class, struct task_struct *ma
if (!class_pick)
return NULL;

- if (!cookie) {
- /*
- * If class_pick is tagged, return it only if it has
- * higher priority than max.
- */
- if (max && class_pick->core_cookie &&
- prio_less(class_pick, max))
- return idle_sched_class.pick_task(rq);
-
+ if (!max)
return class_pick;
- }

- /*
- * If class_pick is idle or matches cookie, return early.
- */
+ /* Make sure the current max's cookie is core->core_cookie */
+ WARN_ON_ONCE(max->core_cookie != cookie);
+
+ /* If class_pick is idle or matches cookie, play nice. */
if (cookie_equals(class_pick, cookie))
return class_pick;

- cookie_pick = sched_core_find(rq, cookie);
+ /* If class_pick is highest prio, trump max. */
+ if (prio_less(max, class_pick)) {
+
+ /* .. but not before checking if cookie trumps class. */
+ cookie_pick = sched_core_find(rq, cookie);
+ if (prio_less(class_pick, cookie_pick))
+ return cookie_pick;

- /*
- * If class > max && class > cookie, it is the highest priority task on
- * the core (so far) and it must be selected, otherwise we must go with
- * the cookie pick in order to satisfy the constraint.
- */
- if (prio_less(cookie_pick, class_pick) &&
- (!max || prio_less(max, class_pick)))
return class_pick;
+ }

- return cookie_pick;
+ /*
+ * We get here if class_pick was incompatible with max
+ * and lower prio than max. So we have nothing.
+ */
+ return idle_sched_class.pick_task(rq);
}

static struct task_struct *