[PATCH v2 3/3] sched/fair: add lsub_positive and use it consistently

From: Patrick Bellasi
Date: Mon Nov 05 2018 - 09:54:32 EST


The following pattern:

var -= min_t(typeof(var), var, val);

is used multiple times in fair.c.

The existing sub_positive() already capture that pattern but it adds
also explicit load-sotre to properly support lockless observations.
In other cases, the patter above is used to update local, and/or not
concurrently accessed, variables.

Let's add a simpler version of sub_positive, targeted to local variables
updates, which gives the same readability benefits at calling sites
without enforcing {READ,WRITE}_ONCE barriers.

Signed-off-by: Patrick Bellasi <patrick.bellasi@xxxxxxx>
Link: https://lore.kernel.org/lkml/20181031184527.GA3178@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: linux-kernel@xxxxxxxxxxxxxxx
---
kernel/sched/fair.c | 24 +++++++++++++++++-------
1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index aeb37fe4dbb1..d50c739127d6 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2734,6 +2734,17 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
WRITE_ONCE(*ptr, res); \
} while (0)

+/*
+ * Remove and clamp on negative, from a local variable.
+ *
+ * A variant of sub_positive which do not use explicit load-store
+ * and thus optimized for local variable updates.
+ */
+#define lsub_positive(_ptr, _val) do { \
+ typeof(_ptr) ptr = (_ptr); \
+ *ptr -= min_t(typeof(*ptr), *ptr, _val); \
+} while (0)
+
#ifdef CONFIG_SMP
static inline void
enqueue_runnable_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -4639,7 +4650,7 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
cfs_b->distribute_running = 0;
throttled = !list_empty(&cfs_b->throttled_cfs_rq);

- cfs_b->runtime -= min(runtime, cfs_b->runtime);
+ lsub_positive(&cfs_b->runtime, runtime);
}

/*
@@ -4773,7 +4784,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)

raw_spin_lock(&cfs_b->lock);
if (expires == cfs_b->runtime_expires)
- cfs_b->runtime -= min(runtime, cfs_b->runtime);
+ lsub_positive(&cfs_b->runtime, runtime);
cfs_b->distribute_running = 0;
raw_spin_unlock(&cfs_b->lock);
}
@@ -6240,7 +6251,7 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p)
util = READ_ONCE(cfs_rq->avg.util_avg);

/* Discount task's util from CPU's util */
- util -= min_t(unsigned int, util, task_util(p));
+ lsub_positive(&util, task_util(p));

/*
* Covered cases:
@@ -6289,10 +6300,9 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p)
* properly fix the execl regression and it helps in further
* reducing the chances for the above race.
*/
- if (unlikely(task_on_rq_queued(p) || current == p)) {
- estimated -= min_t(unsigned int, estimated,
- _task_util_est(p));
- }
+ if (unlikely(task_on_rq_queued(p) || current == p))
+ lsub_positive(&estimated, _task_util_est(p));
+
util = max(util, estimated);
}

--
2.18.0