[PATCH 08/10] sched/fair: Dont iterate if no idle CPUs

From: Srikar Dronamraju
Date: Thu Apr 22 2021 - 06:24:47 EST


Now that the nr_busy_cpus for a LLC are updated in idle callbacks,
scheduler can detect if all threads of a LLC are busy. In such cases, it
can avoid searching for idle CPUs in the LLC that can run the wakee
thread.

Cc: LKML <linux-kernel@xxxxxxxxxxxxxxx>
Cc: Gautham R Shenoy <ego@xxxxxxxxxxxxxxxxxx>
Cc: Parth Shah <parth@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Valentin Schneider <valentin.schneider@xxxxxxx>
Cc: Dietmar Eggemann <dietmar.eggemann@xxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx>
Cc: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
Cc: Rik van Riel <riel@xxxxxxxxxxx>
Signed-off-by: Srikar Dronamraju <srikar@xxxxxxxxxxxxxxxxxx>
---
kernel/sched/fair.c | 23 +++++++++++++++--------
1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 8f752f77b76f..db5dc9875e4c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -715,7 +715,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
#include "pelt.h"
#ifdef CONFIG_SMP

-static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu);
+static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu, bool idle);
static unsigned long task_h_load(struct task_struct *p);
static unsigned long capacity_of(int cpu);

@@ -5868,7 +5868,8 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
return this_eff_load < prev_eff_load ? this_cpu : nr_cpumask_bits;
}

-static int wake_affine_idler_llc(struct task_struct *p, int this_cpu, int prev_cpu, int sync)
+static int wake_affine_idler_llc(struct task_struct *p, int this_cpu, int prev_cpu,
+ int sync, bool *idle)
{
int pnr_busy, pllc_size, tnr_busy, tllc_size;
struct sched_domain_shared *tsds, *psds;
@@ -5913,8 +5914,10 @@ static int wake_affine_idler_llc(struct task_struct *p, int this_cpu, int prev_c
tllc_size = per_cpu(sd_llc_size, this_cpu);
pllc_size = per_cpu(sd_llc_size, prev_cpu);

- if (pnr_busy == pllc_size && tnr_busy == tllc_size)
+ if (pnr_busy == pllc_size && tnr_busy == tllc_size) {
+ *idle = false;
return nr_cpumask_bits;
+ }

diff = pnr_busy * tllc_size - tnr_busy * pllc_size;
if (diff > 0)
@@ -5926,7 +5929,7 @@ static int wake_affine_idler_llc(struct task_struct *p, int this_cpu, int prev_c
}

static int wake_affine(struct sched_domain *sd, struct task_struct *p,
- int this_cpu, int prev_cpu, int sync)
+ int this_cpu, int prev_cpu, int sync, bool *idle)
{
bool share_caches = cpus_share_cache(prev_cpu, this_cpu);
int target = nr_cpumask_bits;
@@ -5935,7 +5938,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
target = wake_affine_idle(this_cpu, prev_cpu);

else if (sched_feat(WA_IDLER_LLC) && !share_caches)
- target = wake_affine_idler_llc(p, this_cpu, prev_cpu, sync);
+ target = wake_affine_idler_llc(p, this_cpu, prev_cpu, sync, idle);

if (sched_feat(WA_WEIGHT) && target == nr_cpumask_bits)
target = wake_affine_weight(sd, p, this_cpu, prev_cpu, sync);
@@ -6333,7 +6336,7 @@ static inline bool asym_fits_capacity(int task_util, int cpu)
/*
* Try and locate an idle core/thread in the LLC cache domain.
*/
-static int select_idle_sibling(struct task_struct *p, int prev, int target)
+static int select_idle_sibling(struct task_struct *p, int prev, int target, bool idle)
{
struct sched_domain *sd;
unsigned long task_util;
@@ -6410,6 +6413,9 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
}
}

+ if (!idle)
+ return target;
+
sd = rcu_dereference(per_cpu(sd_llc, target));
if (!sd)
return target;
@@ -6818,6 +6824,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
int want_affine = 0;
/* SD_flags and WF_flags share the first nibble */
int sd_flag = wake_flags & 0xF;
+ bool idle = true;

if (wake_flags & WF_TTWU) {
record_wakee(p);
@@ -6841,7 +6848,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
if (cpu != prev_cpu)
- new_cpu = wake_affine(tmp, p, cpu, prev_cpu, sync);
+ new_cpu = wake_affine(tmp, p, cpu, prev_cpu, sync, &idle);

sd = NULL; /* Prefer wake_affine over balance flags */
break;
@@ -6858,7 +6865,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
} else if (wake_flags & WF_TTWU) { /* XXX always ? */
/* Fast path */
- new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
+ new_cpu = select_idle_sibling(p, prev_cpu, new_cpu, idle);

if (want_affine)
current->recent_used_cpu = cpu;
--
2.18.2