This patch adds a mechanism to detect and warn about long-running IRQ
handlers exceeding a user-defined duration threshold in microseconds.
The feature is enabled via the kernel boot parameter:
"irqhandler.duration_warn_us=<threshold_in_us>"
For example, passing irqhandler.duration_warn_us=1000 will warn if an
IRQ handler takes more than 1000 microseconds.
Implementation uses local_clock() to measure the execution duration of
IRQ handlers. When the threshold is exceeded, a ratelimited warning is
printed:
"[CPU14] long duration on IRQ[159:bad_irq_handler [long_irq]], took: 1330 us"
Signed-off-by: Wladislav Wiebe <wladislav.wiebe@xxxxxxxxx>
---
V1 -> V2: refactor to use local_clock() instead of jiffies and replace
Kconfig knobs by a new command-line parameter.
V1 link: https://lore.kernel.org/lkml/20250630124721.18232-1-wladislav.wiebe@xxxxxxxxx/
---
.../admin-guide/kernel-parameters.txt | 5 ++
kernel/irq/handle.c | 48 ++++++++++++++++++-
2 files changed, 52 insertions(+), 1 deletion(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f1f2c0874da9..fa89f21ea1e6 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2543,6 +2543,11 @@
for it. Intended to get systems with badly broken
firmware running.
+ irqhandler.duration_warn_us= [KNL,EARLY]
+ Warn if an IRQ handler exceeds the specified duration
+ threshold in microseconds. Useful for identifying
+ long-running IRQs in the system.
+
irqpoll [HW]
When an interrupt is not handled search all handlers
for it. Also check all handlers each timer
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 9489f93b3db3..eab8fdfab8d8 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -136,6 +136,44 @@ void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action)
wake_up_process(action->thread);
}
+static DEFINE_STATIC_KEY_FALSE(irqhandler_duration_check_enabled);
+static u64 irqhandler_duration_threshold_us __ro_after_init;
+
+static int __init irqhandler_duration_check_setup(char *arg)
+{
+ unsigned long val;
+ int ret;
+
+ if (!arg)
+ return 0;
+
+ ret = kstrtoul(arg, 0, &val);
+ if (ret)
+ return ret;
+
+ if (val > 0) {
+ irqhandler_duration_threshold_us = val;
+ static_branch_enable(&irqhandler_duration_check_enabled);
+ } else {
+ pr_err("Invalid irqhandler.duration_warn_us setting (%lu)\n", val);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+early_param("irqhandler.duration_warn_us", irqhandler_duration_check_setup);
+
+static inline void irqhandler_duration_check(u64 ts_start, unsigned int irq,
+ struct irqaction *action)
+{
+ u64 delta_us = (local_clock() - ts_start) >> 10;
+
+ if (unlikely(delta_us > irqhandler_duration_threshold_us)) {
+ pr_warn_ratelimited("[CPU%d] long duration on IRQ[%u:%ps], took: %llu us\n",
+ smp_processor_id(), irq, action->handler, delta_us);
+ }
+}
+
irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc)
{
irqreturn_t retval = IRQ_NONE;