[PATCH v2 10/13] sched: Add per-cpu max capacity to sched_group_capacity

From: Morten Rasmussen
Date: Wed Jun 22 2016 - 13:04:49 EST


struct sched_group_capacity currently represents the compute capacity
sum of all cpus in the sched_group. Unless it is divided by the
group_weight to get the average capacity per cpu it hides differences in
cpu capacity for mixed capacity systems (e.g. high RT/IRQ utilization or
ARM big.LITTLE). But even the average may not be sufficient if the group
covers cpus of different capacities. Instead, by extending struct
sched_group_capacity to indicate max per-cpu capacity in the group a
suitable group for a given task utilization can easily be found such
that cpus with reduced capacity can be avoided for tasks with high
utilization (not implemented by this patch).

cc: Ingo Molnar <mingo@xxxxxxxxxx>
cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>

Signed-off-by: Morten Rasmussen <morten.rasmussen@xxxxxxx>
---
kernel/sched/core.c | 3 ++-
kernel/sched/fair.c | 17 ++++++++++++-----
kernel/sched/sched.h | 3 ++-
3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5093765e9930..06cd7e4a81a2 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5617,7 +5617,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
printk(KERN_CONT " %*pbl",
cpumask_pr_args(sched_group_cpus(group)));
if (group->sgc->capacity != SCHED_CAPACITY_SCALE) {
- printk(KERN_CONT " (cpu_capacity = %d)",
+ printk(KERN_CONT " (cpu_capacity = %lu)",
group->sgc->capacity);
}

@@ -6086,6 +6086,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
* die on a /0 trap.
*/
sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
+ sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;

/*
* Make sure the first group of this domain contains the
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 9c9b837742f6..4d10d022006d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6625,13 +6625,14 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu)

cpu_rq(cpu)->cpu_capacity = capacity;
sdg->sgc->capacity = capacity;
+ sdg->sgc->max_capacity = capacity;
}

void update_group_capacity(struct sched_domain *sd, int cpu)
{
struct sched_domain *child = sd->child;
struct sched_group *group, *sdg = sd->groups;
- unsigned long capacity;
+ unsigned long capacity, max_capacity;
unsigned long interval;

interval = msecs_to_jiffies(sd->balance_interval);
@@ -6644,6 +6645,7 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
}

capacity = 0;
+ max_capacity = 0;

if (child->flags & SD_OVERLAP) {
/*
@@ -6668,11 +6670,12 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
*/
if (unlikely(!rq->sd)) {
capacity += capacity_of(cpu);
- continue;
+ } else {
+ sgc = rq->sd->groups->sgc;
+ capacity += sgc->capacity;
}

- sgc = rq->sd->groups->sgc;
- capacity += sgc->capacity;
+ max_capacity = max(capacity, max_capacity);
}
} else {
/*
@@ -6682,12 +6685,16 @@ void update_group_capacity(struct sched_domain *sd, int cpu)

group = child->groups;
do {
- capacity += group->sgc->capacity;
+ struct sched_group_capacity *sgc = group->sgc;
+
+ capacity += sgc->capacity;
+ max_capacity = max(sgc->max_capacity, max_capacity);
group = group->next;
} while (group != child->groups);
}

sdg->sgc->capacity = capacity;
+ sdg->sgc->max_capacity = max_capacity;
}

/*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 3e9904ef224f..0cdb52168984 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -868,7 +868,8 @@ struct sched_group_capacity {
* CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
* for a single CPU.
*/
- unsigned int capacity;
+ unsigned long capacity;
+ unsigned long max_capacity; /* Max per-cpu capacity in group */
unsigned long next_update;
int imbalance; /* XXX unrelated to capacity but shared group state */
/*
--
1.9.1