[PATCH v4] hrtimer: avoid retrigger_next_event IPI

From: Marcelo Tosatti
Date: Thu Apr 15 2021 - 16:40:53 EST


Setting the realtime clock triggers an IPI to all CPUs to reprogram
the clock event device.

However, only realtime and TAI clocks have their offsets updated
(and therefore potentially require a reprogram).

Instead of sending an IPI unconditionally, check each per CPU hrtimer base
whether it has active timers in the CLOCK_REALTIME and CLOCK_TAI bases. If
that's not the case, update the realtime and TAI base offsets remotely and
skip the IPI. This ensures that any subsequently armed timers on
CLOCK_REALTIME and CLOCK_TAI are evaluated with the correct offsets.

Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx>

---

v4:
- Drop unused code (Thomas).

v3:
- Nicer changelog (Thomas).
- Code style fixes (Thomas).
- Compilation warning with CONFIG_HIGH_RES_TIMERS=n (Thomas).
- Shrink preemption disabled section (Thomas).

v2:
- Only REALTIME and TAI bases are affected by offset-to-monotonic changes (Thomas).
- Don't special case nohz_full CPUs (Thomas).

diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 5c9d968187ae..e228c0a0c98f 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -871,6 +871,19 @@ static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram)
tick_program_event(expires, 1);
}

+#define CLOCK_SET_BASES ((1U << HRTIMER_BASE_REALTIME) | \
+ (1U << HRTIMER_BASE_REALTIME_SOFT) | \
+ (1U << HRTIMER_BASE_TAI) | \
+ (1U << HRTIMER_BASE_TAI_SOFT))
+
+static bool need_reprogram_timer(struct hrtimer_cpu_base *cpu_base)
+{
+ if (cpu_base->softirq_activated)
+ return true;
+
+ return (cpu_base->active_bases & CLOCK_SET_BASES) != 0;
+}
+
/*
* Clock realtime was set
*
@@ -885,8 +898,32 @@ static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram)
void clock_was_set(void)
{
#ifdef CONFIG_HIGH_RES_TIMERS
- /* Retrigger the CPU local events everywhere */
- on_each_cpu(retrigger_next_event, NULL, 1);
+ cpumask_var_t mask;
+ int cpu;
+
+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
+ on_each_cpu(retrigger_next_event, NULL, 1);
+ goto set_timerfd;
+ }
+
+ /* Avoid interrupting CPUs if possible */
+ cpus_read_lock();
+ for_each_online_cpu(cpu) {
+ unsigned long flags;
+ struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
+
+ raw_spin_lock_irqsave(&cpu_base->lock, flags);
+ if (need_reprogram_timer(cpu_base))
+ cpumask_set_cpu(cpu, mask);
+ raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+ }
+
+ preempt_disable();
+ smp_call_function_many(mask, retrigger_next_event, NULL, 1);
+ preempt_enable();
+ cpus_read_unlock();
+ free_cpumask_var(mask);
+set_timerfd:
#endif
timerfd_clock_was_set();
}