[PATCH 15/15] tick: Assume timekeeping is correctly handed over upon last offline idle call

From: Frederic Weisbecker
Date: Wed Jan 31 2024 - 18:31:31 EST


The timekeeping duty is handed over from the outgoing CPU on stop
machine, then the oneshot tick is stopped right after. Therefore it's
guaranteed that the current CPU isn't the timekeeper upon its last call
to idle.

Besides, calling tick_nohz_idle_stop_tick() while the dying CPU goes
into idle suggests that the tick is going to be stopped while it is
actually stopped already from the appropriate CPU hotplug state.

Remove the confusing call and the obsolete case handling and convert it
to a sanity check that verifies the above assumption.

Reviewed-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Signed-off-by: Frederic Weisbecker <frederic@xxxxxxxxxx>
---
include/linux/tick.h | 2 ++
kernel/cpu.c | 1 +
kernel/sched/idle.c | 1 -
kernel/time/tick-common.c | 4 ++++
kernel/time/tick-sched.c | 13 +------------
5 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/include/linux/tick.h b/include/linux/tick.h
index c7840ae8ebaf..44fddfa93e18 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -29,8 +29,10 @@ static inline void tick_cleanup_dead_cpu(int cpu) { }

#if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_HOTPLUG_CPU)
extern int tick_cpu_dying(unsigned int cpu);
+extern void tick_assert_timekeeping_handover(void);
#else
#define tick_cpu_dying NULL
+static inline void tick_assert_timekeeping_handover(void) { }
#endif

#if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_SUSPEND)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 5a8ad4f5ccf3..7e84a7b0675e 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1399,6 +1399,7 @@ void cpuhp_report_idle_dead(void)
struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);

BUG_ON(st->state != CPUHP_AP_OFFLINE);
+ tick_assert_timekeeping_handover();
rcutree_report_cpu_dead();
st->state = CPUHP_AP_IDLE_DEAD;
/*
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 31231925f1ec..b15d40cad7ea 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -291,7 +291,6 @@ static void do_idle(void)
local_irq_disable();

if (cpu_is_offline(cpu)) {
- tick_nohz_idle_stop_tick();
cpuhp_report_idle_dead();
arch_cpu_idle_dead();
}
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 9cd09eea06d6..fb0fdec8719a 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -396,6 +396,10 @@ int tick_broadcast_oneshot_control(enum tick_broadcast_state state)
EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control);

#ifdef CONFIG_HOTPLUG_CPU
+void tick_assert_timekeeping_handover(void)
+{
+ WARN_ON_ONCE(tick_do_timer_cpu == smp_processor_id());
+}
/*
* Stop the tick and transfer the timekeeping job away from a dying cpu.
*/
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index cb8e4a171288..0dcd1c0e0a4e 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1117,18 +1117,7 @@ static bool report_idle_softirq(void)

static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
{
- /*
- * If this CPU is offline and it is the one which updates
- * jiffies, then give up the assignment and let it be taken by
- * the CPU which runs the tick timer next. If we don't drop
- * this here, the jiffies might be stale and do_timer() never
- * gets invoked.
- */
- if (unlikely(!cpu_online(cpu))) {
- if (cpu == tick_do_timer_cpu)
- tick_do_timer_cpu = TICK_DO_TIMER_NONE;
- return false;
- }
+ WARN_ON_ONCE(cpu_is_offline(cpu));

if (unlikely(!tick_sched_flag_test(ts, TS_FLAG_NOHZ)))
return false;
--
2.43.0