[RFC V2 2/2] sched/fair: Fallback to sched-idle CPU if idle CPU isn't found

From: Viresh Kumar
Date: Thu Apr 25 2019 - 05:37:56 EST


We target for an idle CPU in select_idle_sibling() to run the next task,
but in case we don't find idle CPUs it is better to pick a CPU which
will run the task the soonest, for performance reason. A CPU which isn't
idle but has only SCHED_IDLE activity queued on it should be a good
target based on this criteria as any normal fair task will most likely
preempt the currently running SCHED_IDLE task immediately. In fact,
choosing a SCHED_IDLE CPU shall give better results as it should be able
to run the task sooner than an idle CPU (which requires to be woken up
from an idle state).

This patch updates the fast path to fallback to a sched-idle CPU if the
idle CPU isn't found, the slow path can be updated separately later.

Following is the order in which select_idle_sibling() picks up next CPU
to run the task now:

1. idle_cpu(target) OR sched_idle_cpu(target)
2. idle_cpu(prev) OR sched_idle_cpu(prev)
3. idle_cpu(recent_used_cpu) OR sched_idle_cpu(recent_used_cpu)
4. idle core(sd)
5. idle_cpu(sd)
6. sched_idle_cpu(sd)
7. idle_cpu(p) - smt
8. sched_idle_cpu(p)- smt

Though the policy can be tweaked a bit if we want to have different
priorities.

Signed-off-by: Viresh Kumar <viresh.kumar@xxxxxxxxxx>
---
kernel/sched/fair.c | 28 +++++++++++++++++++++-------
1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6511cb57acdd..fbaefb9a9296 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6057,6 +6057,15 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
return new_cpu;
}

+/* CPU only has SCHED_IDLE tasks enqueued */
+static int sched_idle_cpu(int cpu)
+{
+ struct rq *rq = cpu_rq(cpu);
+
+ return unlikely(rq->nr_running == rq->cfs.idle_h_nr_running &&
+ rq->nr_running);
+}
+
#ifdef CONFIG_SCHED_SMT
DEFINE_STATIC_KEY_FALSE(sched_smt_present);
EXPORT_SYMBOL_GPL(sched_smt_present);
@@ -6154,7 +6163,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
*/
static int select_idle_smt(struct task_struct *p, int target)
{
- int cpu;
+ int cpu, si_cpu = -1;

if (!static_branch_likely(&sched_smt_present))
return -1;
@@ -6164,9 +6173,11 @@ static int select_idle_smt(struct task_struct *p, int target)
continue;
if (available_idle_cpu(cpu))
return cpu;
+ if (si_cpu == -1 && sched_idle_cpu(cpu))
+ si_cpu = cpu;
}

- return -1;
+ return si_cpu;
}

#else /* CONFIG_SCHED_SMT */
@@ -6194,7 +6205,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
u64 avg_cost, avg_idle;
u64 time, cost;
s64 delta;
- int cpu, nr = INT_MAX;
+ int cpu, nr = INT_MAX, si_cpu = -1;

this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
if (!this_sd)
@@ -6222,11 +6233,13 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t

for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
if (!--nr)
- return -1;
+ return si_cpu;
if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
continue;
if (available_idle_cpu(cpu))
break;
+ if (si_cpu == -1 && sched_idle_cpu(cpu))
+ si_cpu = cpu;
}

time = local_clock() - time;
@@ -6245,13 +6258,14 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
struct sched_domain *sd;
int i, recent_used_cpu;

- if (available_idle_cpu(target))
+ if (available_idle_cpu(target) || sched_idle_cpu(target))
return target;

/*
* If the previous CPU is cache affine and idle, don't be stupid:
*/
- if (prev != target && cpus_share_cache(prev, target) && available_idle_cpu(prev))
+ if (prev != target && cpus_share_cache(prev, target) &&
+ (available_idle_cpu(prev) || sched_idle_cpu(prev)))
return prev;

/* Check a recently used CPU as a potential idle candidate: */
@@ -6259,7 +6273,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
if (recent_used_cpu != prev &&
recent_used_cpu != target &&
cpus_share_cache(recent_used_cpu, target) &&
- available_idle_cpu(recent_used_cpu) &&
+ (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) &&
cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) {
/*
* Replace recent_used_cpu with prev as it is a potential
--
2.21.0.rc0.269.g1a574e7a288b