[tip: sched/core] sched/fair: Add NOHZ balancer flag for nohz.next_balance updates

From: tip-bot2 for Valentin Schneider
Date: Thu Sep 09 2021 - 07:19:43 EST


The following commit has been merged into the sched/core branch of tip:

Commit-ID: 013ce5ed58f799a2f035b732f904f6ebd8e8d881
Gitweb: https://git.kernel.org/tip/013ce5ed58f799a2f035b732f904f6ebd8e8d881
Author: Valentin Schneider <valentin.schneider@xxxxxxx>
AuthorDate: Mon, 23 Aug 2021 12:16:59 +01:00
Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
CommitterDate: Thu, 09 Sep 2021 11:27:29 +02:00

sched/fair: Add NOHZ balancer flag for nohz.next_balance updates

A following patch will trigger NOHZ idle balances as a means to update
nohz.next_balance. Vincent noted that blocked load updates can have
non-negligible overhead, which should be avoided if the intent is to only
update nohz.next_balance.

Add a new NOHZ balance kick flag, NOHZ_NEXT_KICK. Gate NOHZ blocked load
update by the presence of NOHZ_STATS_KICK - currently all NOHZ balance
kicks will have the NOHZ_STATS_KICK flag set, so no change in behaviour is
expected.

Suggested-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
Signed-off-by: Valentin Schneider <valentin.schneider@xxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Reviewed-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
Link: https://lkml.kernel.org/r/20210823111700.2842997-2-valentin.schneider@xxxxxxx
---
kernel/sched/fair.c | 24 ++++++++++++++----------
kernel/sched/sched.h | 8 +++++++-
2 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7b3e859..48ce754 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -10342,7 +10342,7 @@ static void nohz_balancer_kick(struct rq *rq)
goto out;

if (rq->nr_running >= 2) {
- flags = NOHZ_KICK_MASK;
+ flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
goto out;
}

@@ -10356,7 +10356,7 @@ static void nohz_balancer_kick(struct rq *rq)
* on.
*/
if (rq->cfs.h_nr_running >= 1 && check_cpu_capacity(rq, sd)) {
- flags = NOHZ_KICK_MASK;
+ flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
goto unlock;
}
}
@@ -10370,7 +10370,7 @@ static void nohz_balancer_kick(struct rq *rq)
*/
for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
if (sched_asym_prefer(i, cpu)) {
- flags = NOHZ_KICK_MASK;
+ flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
goto unlock;
}
}
@@ -10383,7 +10383,7 @@ static void nohz_balancer_kick(struct rq *rq)
* to run the misfit task on.
*/
if (check_misfit_status(rq, sd)) {
- flags = NOHZ_KICK_MASK;
+ flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
goto unlock;
}

@@ -10410,7 +10410,7 @@ static void nohz_balancer_kick(struct rq *rq)
*/
nr_busy = atomic_read(&sds->nr_busy_cpus);
if (nr_busy > 1) {
- flags = NOHZ_KICK_MASK;
+ flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
goto unlock;
}
}
@@ -10572,7 +10572,8 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
* setting the flag, we are sure to not clear the state and not
* check the load of an idle cpu.
*/
- WRITE_ONCE(nohz.has_blocked, 0);
+ if (flags & NOHZ_STATS_KICK)
+ WRITE_ONCE(nohz.has_blocked, 0);

/*
* Ensures that if we miss the CPU, we must see the has_blocked
@@ -10594,13 +10595,15 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
* balancing owner will pick it up.
*/
if (need_resched()) {
- has_blocked_load = true;
+ if (flags & NOHZ_STATS_KICK)
+ has_blocked_load = true;
goto abort;
}

rq = cpu_rq(balance_cpu);

- has_blocked_load |= update_nohz_stats(rq);
+ if (flags & NOHZ_STATS_KICK)
+ has_blocked_load |= update_nohz_stats(rq);

/*
* If time for next balance is due,
@@ -10631,8 +10634,9 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
if (likely(update_next_balance))
nohz.next_balance = next_balance;

- WRITE_ONCE(nohz.next_blocked,
- now + msecs_to_jiffies(LOAD_AVG_PERIOD));
+ if (flags & NOHZ_STATS_KICK)
+ WRITE_ONCE(nohz.next_blocked,
+ now + msecs_to_jiffies(LOAD_AVG_PERIOD));

abort:
/* There is still blocked load, enable periodic update */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e7e2bba..30b7bd2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2706,12 +2706,18 @@ extern void cfs_bandwidth_usage_dec(void);
#define NOHZ_BALANCE_KICK_BIT 0
#define NOHZ_STATS_KICK_BIT 1
#define NOHZ_NEWILB_KICK_BIT 2
+#define NOHZ_NEXT_KICK_BIT 3

+/* Run rebalance_domains() */
#define NOHZ_BALANCE_KICK BIT(NOHZ_BALANCE_KICK_BIT)
+/* Update blocked load */
#define NOHZ_STATS_KICK BIT(NOHZ_STATS_KICK_BIT)
+/* Update blocked load when entering idle */
#define NOHZ_NEWILB_KICK BIT(NOHZ_NEWILB_KICK_BIT)
+/* Update nohz.next_balance */
+#define NOHZ_NEXT_KICK BIT(NOHZ_NEXT_KICK_BIT)

-#define NOHZ_KICK_MASK (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK)
+#define NOHZ_KICK_MASK (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK | NOHZ_NEXT_KICK)

#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)