[PATCH 21/30] sched: fix task_h_load()

From: Peter Zijlstra
Date: Fri Jun 27 2008 - 08:01:22 EST


Currently task_h_load() computes the load of a task and uses that to either
subtract it from the total, or add to it.

However, removing or adding a task need not have any effect on the total load
at all. Imagine adding a task to a group that is local to one cpu - in that
case the total load of that cpu is unaffected.

So properly compute addition/removal:

s_i = S * rw_i / \Sum_j rw_j
s'_i = S * (rw_i + wl) / (\Sum_j rw_j + wg)

then s'_i - s_i gives the change in load.

Where s_i is the shares for cpu i, S the group weight, rw_i the runqueue weight
for that cpu, wl the weight we add (subtract) and wg the weight contribution to
the runqueue.

Signed-off-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
---
kernel/sched_fair.c | 49 ++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 40 insertions(+), 9 deletions(-)

Index: linux-2.6/kernel/sched_fair.c
===================================================================
--- linux-2.6.orig/kernel/sched_fair.c
+++ linux-2.6/kernel/sched_fair.c
@@ -1071,22 +1071,53 @@ static inline int wake_idle(int cpu, str
static const struct sched_class fair_sched_class;

#ifdef CONFIG_FAIR_GROUP_SCHED
-static unsigned long task_h_load(struct task_struct *p)
+static unsigned long effective_load(struct task_group *tg, long wl, int cpu)
{
- unsigned long h_load = p->se.load.weight;
- struct cfs_rq *cfs_rq = cfs_rq_of(&p->se);
+ struct sched_entity *se = tg->se[cpu];
+ long wg = wl;

- update_h_load(task_cpu(p));
+ for_each_sched_entity(se) {
+#define D(n) (likely(n) ? (n) : 1)
+
+ long S, Srw, rw, s, sn;
+
+ S = se->my_q->tg->shares;
+ s = se->my_q->shares;
+ rw = se->my_q->load.weight;

- h_load = calc_delta_mine(h_load, cfs_rq->h_load, &cfs_rq->load);
+ Srw = S * rw / D(s);
+ sn = S * (rw + wl) / D(Srw + wg);
+
+ wl = sn - s;
+ wg = 0;
+#undef D
+ }

- return h_load;
+ return wl;
}
+
+static unsigned long task_load_sub(struct task_struct *p)
+{
+ return effective_load(task_group(p), -(long)p->se.load.weight, task_cpu(p));
+}
+
+static unsigned long task_load_add(struct task_struct *p, int cpu)
+{
+ return effective_load(task_group(p), p->se.load.weight, cpu);
+}
+
#else
-static unsigned long task_h_load(struct task_struct *p)
+
+static unsigned long task_load_sub(struct task_struct *p)
+{
+ return -p->se.load.weight;
+}
+
+static unsigned long task_load_add(struct task_struct *p, int cpu)
{
return p->se.load.weight;
}
+
#endif

static int
@@ -1109,9 +1140,9 @@ wake_affine(struct rq *rq, struct sched_
* of the current CPU:
*/
if (sync)
- tl -= task_h_load(current);
+ tl += task_load_sub(current);

- balanced = 100*(tl + task_h_load(p)) <= imbalance*load;
+ balanced = 100*(tl + task_load_add(p, this_cpu)) <= imbalance*load;

/*
* If the currently running task will sleep within

--

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/