This patch adds a mechanism to detect and warn about long-running IRQ handlers exceeding a user-defined duration threshold in microseconds. The feature is enabled via the kernel boot parameter: "irqhandler.duration_warn_us=<threshold_in_us>" For example, passing irqhandler.duration_warn_us=1000 will warn if an IRQ handler takes more than 1000 microseconds. Implementation uses local_clock() to measure the execution duration of IRQ handlers. When the threshold is exceeded, a ratelimited warning is printed: "[CPU14] long duration on IRQ[159:bad_irq_handler [long_irq]], took: 1330 us" Signed-off-by: Wladislav Wiebe <wladislav.wiebe@xxxxxxxxx> --- V1 -> V2: refactor to use local_clock() instead of jiffies and replace Kconfig knobs by a new command-line parameter. V1 link: https://lore.kernel.org/lkml/20250630124721.18232-1-wladislav.wiebe@xxxxxxxxx/ --- .../admin-guide/kernel-parameters.txt | 5 ++ kernel/irq/handle.c | 48 ++++++++++++++++++- 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f1f2c0874da9..fa89f21ea1e6 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2543,6 +2543,11 @@ for it. Intended to get systems with badly broken firmware running. + irqhandler.duration_warn_us= [KNL,EARLY] + Warn if an IRQ handler exceeds the specified duration + threshold in microseconds. Useful for identifying + long-running IRQs in the system. + irqpoll [HW] When an interrupt is not handled search all handlers for it. Also check all handlers each timer diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 9489f93b3db3..eab8fdfab8d8 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -136,6 +136,44 @@ void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action) wake_up_process(action->thread); } +static DEFINE_STATIC_KEY_FALSE(irqhandler_duration_check_enabled); +static u64 irqhandler_duration_threshold_us __ro_after_init; + +static int __init irqhandler_duration_check_setup(char *arg) +{ + unsigned long val; + int ret; + + if (!arg) + return 0; + + ret = kstrtoul(arg, 0, &val); + if (ret) + return ret; + + if (val > 0) { + irqhandler_duration_threshold_us = val; + static_branch_enable(&irqhandler_duration_check_enabled); + } else { + pr_err("Invalid irqhandler.duration_warn_us setting (%lu)\n", val); + return -EINVAL; + } + + return 0; +} +early_param("irqhandler.duration_warn_us", irqhandler_duration_check_setup); + +static inline void irqhandler_duration_check(u64 ts_start, unsigned int irq, + struct irqaction *action) +{ + u64 delta_us = (local_clock() - ts_start) >> 10; + + if (unlikely(delta_us > irqhandler_duration_threshold_us)) { + pr_warn_ratelimited("[CPU%d] long duration on IRQ[%u:%ps], took: %llu us\n", + smp_processor_id(), irq, action->handler, delta_us); + } +} + irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc) { irqreturn_t retval = IRQ_NONE; @@ -146,6 +184,7 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc) for_each_action_of_desc(desc, action) { irqreturn_t res; + u64 ts_start; /* * If this IRQ would be threaded under force_irqthreads, mark it so. @@ -155,7 +194,14 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc) lockdep_hardirq_threaded(); trace_irq_handler_entry(irq, action); - res = action->handler(irq, action->dev_id); + + if (static_branch_unlikely(&irqhandler_duration_check_enabled)) { + ts_start = local_clock(); + res = action->handler(irq, action->dev_id); + irqhandler_duration_check(ts_start, irq, action); + } else + res = action->handler(irq, action->dev_id); + trace_irq_handler_exit(irq, action, res); if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pS enabled interrupts\n", -- 2.39.3.dirty