Introduce a mechanism to detect and warn about prolonged IRQ handlers. With a new command-line parameter (irqhandler.duration_warn_us=), users can configure the duration threshold in microseconds when a warning in such format should be emitted: "[CPU14] long duration of IRQ[159:bad_irq_handler [long_irq]], took: 1330 us" The implementation uses local_clock() to measure the execution duration of the generic IRQ per-CPU event handler. Signed-off-by: Wladislav Wiebe <wladislav.wiebe@xxxxxxxxx> --- V2 -> V3: Addressed review comments based on v2: https://lore.kernel.org/lkml/20250714084209.918-1-wladislav.wiebe@xxxxxxxxx/ - refactor commit message - switch from early_param() to __setup() - comment on approximation of nano to microseconds conversion - move ts_start to if() branch - align pr_warn arguments - surround else block with brackets as well - invert the condition and drop the "else {}" in cmdline arg. check - make struct irqaction *action function param. const in irqhandler_duration_check() - print smp_processor_id() return as unsigned int - fix warning text "on IRQ[...]" -> "of IRQ[...]" V1 -> V2: refactor to use local_clock() instead of jiffies and replace Kconfig knobs by a new command-line parameter. V1 link: https://lore.kernel.org/lkml/20250630124721.18232-1-wladislav.wiebe@xxxxxxxxx/ --- .../admin-guide/kernel-parameters.txt | 5 ++ kernel/irq/handle.c | 49 ++++++++++++++++++- 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 07e22ba5bfe3..7a2d0338ee91 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2543,6 +2543,11 @@ for it. Intended to get systems with badly broken firmware running. + irqhandler.duration_warn_us= [KNL] + Warn if an IRQ handler exceeds the specified duration + threshold in microseconds. Useful for identifying + long-running IRQs in the system. + irqpoll [HW] When an interrupt is not handled search all handlers for it. Also check all handlers each timer diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 9489f93b3db3..258f40ad8cb1 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -136,6 +136,44 @@ void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action) wake_up_process(action->thread); } +static DEFINE_STATIC_KEY_FALSE(irqhandler_duration_check_enabled); +static u64 irqhandler_duration_threshold_us __ro_after_init; + +static int __init irqhandler_duration_check_setup(char *arg) +{ + unsigned long val; + int ret; + + ret = kstrtoul(arg, 0, &val); + if (ret) { + pr_err("Unable to parse irqhandler.duration_warn_us setting: ret=%d\n", ret); + return 0; + } + + if (!val) { + pr_err("Invalid irqhandler.duration_warn_us setting, must be > 0\n"); + return 0; + } + + irqhandler_duration_threshold_us = val; + static_branch_enable(&irqhandler_duration_check_enabled); + + return 1; +} +__setup("irqhandler.duration_warn_us=", irqhandler_duration_check_setup); + +static inline void irqhandler_duration_check(u64 ts_start, unsigned int irq, + const struct irqaction *action) +{ + /* Approx. conversion to microseconds */ + u64 delta_us = (local_clock() - ts_start) >> 10; + + if (unlikely(delta_us > irqhandler_duration_threshold_us)) { + pr_warn_ratelimited("[CPU%u] long duration of IRQ[%u:%ps], took: %llu us\n", + smp_processor_id(), irq, action->handler, delta_us); + } +} + irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc) { irqreturn_t retval = IRQ_NONE; @@ -155,7 +193,16 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc) lockdep_hardirq_threaded(); trace_irq_handler_entry(irq, action); - res = action->handler(irq, action->dev_id); + + if (static_branch_unlikely(&irqhandler_duration_check_enabled)) { + u64 ts_start = local_clock(); + + res = action->handler(irq, action->dev_id); + irqhandler_duration_check(ts_start, irq, action); + } else { + res = action->handler(irq, action->dev_id); + } + trace_irq_handler_exit(irq, action, res); if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pS enabled interrupts\n", -- 2.39.3.dirty