Re: sched: per-entity load-tracking

From: Ingo Molnar
Date: Sat Oct 06 2012 - 03:39:55 EST



* Paul Turner <pjt@xxxxxxxxxx> wrote:

> Peter:
> The rebase to tip/master made interdiff angry enough that it wasn't
> producing the right relative diffs for your stack. Sorry :(

Find below the diff between the two series, using 'quilt
snapshot' and 'quilt diff --snapshot'.

One quick stylistic note: instead of putting the
update_cfs_rq_blocked_load() in the middle of the file, order
functions naturally so that no prototypes are needed.

Thanks,

Ingo
--
tip/kernel/sched/fair.c | 28 ++++++++++++++++++----------
1 file changed, 18 insertions(+), 10 deletions(-)

Index: tip/kernel/sched/fair.c
===================================================================
--- tip.orig/kernel/sched/fair.c
+++ tip/kernel/sched/fair.c
@@ -262,6 +262,9 @@ static inline struct cfs_rq *group_cfs_r
return grp->my_q;
}

+static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq,
+ int force_update);
+
static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
{
if (!cfs_rq->on_list) {
@@ -281,6 +284,8 @@ static inline void list_add_leaf_cfs_rq(
}

cfs_rq->on_list = 1;
+ /* We should have no load, but we need to update last_decay. */
+ update_cfs_rq_blocked_load(cfs_rq, 0);
}
}

@@ -1151,7 +1156,7 @@ static inline void update_cfs_shares(str
* Note: The tables below are dependent on this value.
*/
#define LOAD_AVG_PERIOD 32
-#define LOAD_AVG_MAX 47765 /* maximum possible load avg */
+#define LOAD_AVG_MAX 47742 /* maximum possible load avg */
#define LOAD_AVG_MAX_N 345 /* number of full periods to produce LOAD_MAX_AVG */

/* Precomputed fixed inverse multiplies for multiplication by y^n */
@@ -1203,7 +1208,8 @@ static __always_inline u64 decay_load(u6
}

val *= runnable_avg_yN_inv[local_n];
- return SRR(val, 32);
+ /* We don't use SRR here since we always want to round down. */
+ return val >> 32;
}

/*
@@ -4236,13 +4242,15 @@ static void __update_blocked_averages_cp
if (se) {
update_entity_load_avg(se, 1);
/*
- * We can pivot on the runnable average decaying to zero for
- * list removal since the parent average will always be >=
- * child.
+ * We pivot on our runnable average having decayed to zero for
+ * list removal. This generally implies that all our children
+ * have also been removed (modulo rounding error or bandwidth
+ * control); however, such cases are rare and we can fix these
+ * at enqueue.
+ *
+ * TODO: fix up out-of-order children on enqueue.
*/
- if (se->avg.runnable_avg_sum)
- update_cfs_shares(cfs_rq);
- else
+ if (!se->avg.runnable_avg_sum && !cfs_rq->nr_running)
list_del_leaf_cfs_rq(cfs_rq);
} else {
struct rq *rq = rq_of(cfs_rq);
@@ -6013,10 +6021,10 @@ static void task_tick_fair(struct rq *rq
entity_tick(cfs_rq, se, queued);
}

- update_rq_runnable_avg(rq, 1);
-
if (sched_feat_numa(NUMA))
task_tick_numa(rq, curr);
+
+ update_rq_runnable_avg(rq, 1);
}

/*
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/