[RFC][PATCH 1/6] sched: restore __cpu_power to a straight sum of power

From: Peter Zijlstra
Date: Thu Aug 27 2009 - 11:10:14 EST


cpu_power is supposed to be a representation of the process capacity
of the cpu, not a value to randomly tweak in order to affect
placement.

Remove the placement hacks and add SMT gain.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
LKML-Reference: <new-submission>
---
include/linux/sched.h | 1 +
include/linux/topology.h | 1 +
kernel/sched.c | 34 ++++++++++++++++++----------------
3 files changed, 20 insertions(+), 16 deletions(-)

Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -930,6 +930,7 @@ struct sched_domain {
unsigned int newidle_idx;
unsigned int wake_idx;
unsigned int forkexec_idx;
+ unsigned int smt_gain;
int flags; /* See SD_* */
enum sched_domain_level level;

Index: linux-2.6/include/linux/topology.h
===================================================================
--- linux-2.6.orig/include/linux/topology.h
+++ linux-2.6/include/linux/topology.h
@@ -99,6 +99,7 @@ int arch_update_cpu_topology(void);
| SD_SHARE_CPUPOWER, \
.last_balance = jiffies, \
.balance_interval = 1, \
+ .smt_gain = 1178, /* 15% */ \
}
#endif
#endif /* CONFIG_SCHED_SMT */
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -8468,15 +8468,13 @@ static void free_sched_groups(const stru
* there are asymmetries in the topology. If there are asymmetries, group
* having more cpu_power will pickup more load compared to the group having
* less cpu_power.
- *
- * cpu_power will be a multiple of SCHED_LOAD_SCALE. This multiple represents
- * the maximum number of tasks a group can handle in the presence of other idle
- * or lightly loaded groups in the same sched domain.
*/
static void init_sched_groups_power(int cpu, struct sched_domain *sd)
{
struct sched_domain *child;
struct sched_group *group;
+ long power;
+ int weight;

WARN_ON(!sd || !sd->groups);

@@ -8487,22 +8485,26 @@ static void init_sched_groups_power(int

sd->groups->__cpu_power = 0;

- /*
- * For perf policy, if the groups in child domain share resources
- * (for example cores sharing some portions of the cache hierarchy
- * or SMT), then set this domain groups cpu_power such that each group
- * can handle only one task, when there are other idle groups in the
- * same sched domain.
- */
- if (!child || (!(sd->flags & SD_POWERSAVINGS_BALANCE) &&
- (child->flags &
- (SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES)))) {
- sg_inc_cpu_power(sd->groups, SCHED_LOAD_SCALE);
+ if (!child) {
+ power = SCHED_LOAD_SCALE;
+ weight = cpumask_weight(sched_domain_span(sd));
+ /*
+ * SMT siblings share the power of a single core.
+ * Usually multiple threads get a better yield out of
+ * that one core than a single thread would have,
+ * reflect that in sd->smt_gain.
+ */
+ if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1)
+ power *= sd->smt_gain;
+ power /= weight;
+ power >>= SCHED_LOAD_SHIFT;
+ }
+ sg_inc_cpu_power(sd->groups, power);
return;
}

/*
- * add cpu_power of each child group to this groups cpu_power
+ * Add cpu_power of each child group to this groups cpu_power.
*/
group = child->groups;
do {

--

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/