[tip:sched/core] sched/fair: Make the use of prev_cpu consistent in the wakeup path

From: tip-bot for Morten Rasmussen
Date: Wed Aug 10 2016 - 17:22:27 EST


Commit-ID: 772bd008cd9a1d4e8ce566f2edcc61d1c28fcbe5
Gitweb: http://git.kernel.org/tip/772bd008cd9a1d4e8ce566f2edcc61d1c28fcbe5
Author: Morten Rasmussen <morten.rasmussen@xxxxxxx>
AuthorDate: Wed, 22 Jun 2016 18:03:13 +0100
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitDate: Wed, 10 Aug 2016 14:03:32 +0200

sched/fair: Make the use of prev_cpu consistent in the wakeup path

In commit:

ac66f5477239 ("sched/numa: Introduce migrate_swap()")

select_task_rq() got a 'cpu' argument to enable overriding of prev_cpu
in special cases (NUMA task swapping).

However, the select_task_rq_fair() helper functions: wake_affine() and
select_idle_sibling(), still use task_cpu(p) directly to work out
prev_cpu, which leads to inconsistencies.

This patch passes prev_cpu (potentially overridden by NUMA code) into
the helper functions to ensure prev_cpu is indeed the same CPU
everywhere in the wakeup path.

cc: Ingo Molnar <mingo@xxxxxxxxxx>
cc: Rik van Riel <riel@xxxxxxxxxx>
Signed-off-by: Morten Rasmussen <morten.rasmussen@xxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Mike Galbraith <efault@xxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Rik van Riel <riel@xxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: dietmar.eggemann@xxxxxxx
Cc: linux-kernel@xxxxxxxxxxxxxxx
Cc: mgalbraith@xxxxxxx
Cc: vincent.guittot@xxxxxxxxxx
Cc: yuyang.du@xxxxxxxxx
Link: http://lkml.kernel.org/r/1466615004-3503-3-git-send-email-morten.rasmussen@xxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
kernel/sched/fair.c | 24 +++++++++++++-----------
1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 9f9a4e5..d819da6 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -656,7 +656,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
}

#ifdef CONFIG_SMP
-static int select_idle_sibling(struct task_struct *p, int cpu);
+static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu);
static unsigned long task_h_load(struct task_struct *p);

/*
@@ -1512,7 +1512,8 @@ balance:
* Call select_idle_sibling to maybe find a better one.
*/
if (!cur)
- env->dst_cpu = select_idle_sibling(env->p, env->dst_cpu);
+ env->dst_cpu = select_idle_sibling(env->p, env->src_cpu,
+ env->dst_cpu);

assign:
task_numa_assign(env, cur, imp);
@@ -5101,18 +5102,18 @@ static int wake_wide(struct task_struct *p)
return 1;
}

-static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
+static int wake_affine(struct sched_domain *sd, struct task_struct *p,
+ int prev_cpu, int sync)
{
s64 this_load, load;
s64 this_eff_load, prev_eff_load;
- int idx, this_cpu, prev_cpu;
+ int idx, this_cpu;
struct task_group *tg;
unsigned long weight;
int balanced;

idx = sd->wake_idx;
this_cpu = smp_processor_id();
- prev_cpu = task_cpu(p);
load = source_load(prev_cpu, idx);
this_load = target_load(this_cpu, idx);

@@ -5277,11 +5278,10 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
/*
* Try and locate an idle CPU in the sched_domain.
*/
-static int select_idle_sibling(struct task_struct *p, int target)
+static int select_idle_sibling(struct task_struct *p, int prev, int target)
{
struct sched_domain *sd;
struct sched_group *sg;
- int i = task_cpu(p);

if (idle_cpu(target))
return target;
@@ -5289,8 +5289,8 @@ static int select_idle_sibling(struct task_struct *p, int target)
/*
* If the prevous cpu is cache affine and idle, don't be stupid.
*/
- if (i != target && cpus_share_cache(i, target) && idle_cpu(i))
- return i;
+ if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev))
+ return prev;

/*
* Otherwise, iterate the domains and find an eligible idle cpu.
@@ -5311,6 +5311,8 @@ static int select_idle_sibling(struct task_struct *p, int target)
for_each_lower_domain(sd) {
sg = sd->groups;
do {
+ int i;
+
if (!cpumask_intersects(sched_group_cpus(sg),
tsk_cpus_allowed(p)))
goto next;
@@ -5419,13 +5421,13 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f

if (affine_sd) {
sd = NULL; /* Prefer wake_affine over balance flags */
- if (cpu != prev_cpu && wake_affine(affine_sd, p, sync))
+ if (cpu != prev_cpu && wake_affine(affine_sd, p, prev_cpu, sync))
new_cpu = cpu;
}

if (!sd) {
if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
- new_cpu = select_idle_sibling(p, new_cpu);
+ new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);

} else while (sd) {
struct sched_group *group;