[RFC][PATCH 04/14] sched/fair: More accurate reweight_entity()

From: Peter Zijlstra
Date: Fri May 12 2017 - 13:24:20 EST


When a (group) entity changes it's weight we should instantly change
its load_avg and propagate that change into the sums it is part of.
Because we use these values to predict future behaviour and are not
interested in its historical value.

Without this change, the change in load would need to propagate
through the average, by which time it could again have changed etc..
always chasing itself.

With this change, the cfs_rq load_avg sum will more accurately reflect
the current runnable and expected return of blocked load.

Reported-by: Paul Turner <pjt@xxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
kernel/sched/fair.c | 75 +++++++++++++++++++++++++++++++---------------------
1 file changed, 46 insertions(+), 29 deletions(-)

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2673,9 +2673,41 @@ static long calc_cfs_shares(struct cfs_r
}
# endif /* CONFIG_SMP */

+/*
+ * Signed add and clamp on underflow.
+ *
+ * Explicitly do a load-store to ensure the intermediate value never hits
+ * memory. This allows lockless observations without ever seeing the negative
+ * values.
+ */
+#define add_positive(_ptr, _val) do { \
+ typeof(_ptr) ptr = (_ptr); \
+ typeof(_val) val = (_val); \
+ typeof(*ptr) res, var = READ_ONCE(*ptr); \
+ \
+ res = var + val; \
+ \
+ if (val < 0 && res > var) \
+ res = 0; \
+ \
+ WRITE_ONCE(*ptr, res); \
+} while (0)
+
+/*
+ * XXX we want to get rid of this helper and use the full load resolution.
+ */
+static inline long se_weight(struct sched_entity *se)
+{
+ return scale_load_down(se->load.weight);
+}
+
static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
unsigned long weight)
{
+ unsigned long se_load_avg = se->avg.load_avg;
+ u64 se_load_sum = se_weight(se) * se->avg.load_sum;
+ u64 new_load_sum = scale_load_down(weight) * se->avg.load_sum;
+
if (se->on_rq) {
/* commit outstanding execution time */
if (cfs_rq->curr == se)
@@ -2683,10 +2715,23 @@ static void reweight_entity(struct cfs_r
account_entity_dequeue(cfs_rq, se);
}

+ se->avg.load_avg = div_u64(new_load_sum,
+ LOAD_AVG_MAX - 1024 + se->avg.period_contrib);
+
update_load_set(&se->load, weight);

- if (se->on_rq)
+ if (se->on_rq) {
account_entity_enqueue(cfs_rq, se);
+ add_positive(&cfs_rq->runnable_load_avg,
+ (long)(se->avg.load_avg - se_load_avg));
+ add_positive(&cfs_rq->runnable_load_sum,
+ (s64)(new_load_sum - se_load_sum));
+ }
+
+ add_positive(&cfs_rq->avg.load_avg,
+ (long)(se->avg.load_avg - se_load_avg));
+ add_positive(&cfs_rq->avg.load_sum,
+ (s64)(new_load_sum - se_load_sum));
}

static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
@@ -2927,14 +2972,6 @@ ___update_load_avg(struct sched_avg *sa,
}

/*
- * XXX we want to get rid of this helper and use the full load resolution.
- */
-static inline long se_weight(struct sched_entity *se)
-{
- return scale_load_down(se->load.weight);
-}
-
-/*
* sched_entity:
*
* load_sum := runnable_sum
@@ -2983,26 +3020,6 @@ __update_load_avg_cfs_rq(u64 now, int cp
return 0;
}

-/*
- * Signed add and clamp on underflow.
- *
- * Explicitly do a load-store to ensure the intermediate value never hits
- * memory. This allows lockless observations without ever seeing the negative
- * values.
- */
-#define add_positive(_ptr, _val) do { \
- typeof(_ptr) ptr = (_ptr); \
- typeof(_val) val = (_val); \
- typeof(*ptr) res, var = READ_ONCE(*ptr); \
- \
- res = var + val; \
- \
- if (val < 0 && res > var) \
- res = 0; \
- \
- WRITE_ONCE(*ptr, res); \
-} while (0)
-
#ifdef CONFIG_FAIR_GROUP_SCHED
/**
* update_tg_load_avg - update the tg's load avg