Something like the completly untested below should work independent of
config options.
Thanks,
tglx
---
include/linux/irqdesc.h | 4 ++
kernel/irq/manage.c | 3 +
kernel/irq/spurious.c | 74
+++++++++++++++++++++++++++++++++++-------------
3 files changed, 61 insertions(+), 20 deletions(-)
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -30,6 +30,8 @@ struct pt_regs;
* @tot_count: stats field for non-percpu irqs
* @irq_count: stats field to detect stalled irqs
* @last_unhandled: aging timer for unhandled count
+ * @storm_count: Counter for irq storm detection
+ * @storm_checked: Timestamp for irq storm detection
* @irqs_unhandled: stats field for spurious unhandled interrupts
* @threads_handled: stats field for deferred spurious detection of
threaded handlers
* @threads_handled_last: comparator field for deferred spurious
detection of theraded handlers
@@ -65,6 +67,8 @@ struct irq_desc {
unsigned int tot_count;
unsigned int irq_count; /* For detecting broken IRQs */
unsigned long last_unhandled; /* Aging timer for unhandled count */
+ unsigned long storm_count;
+ unsigned long storm_checked;
unsigned int irqs_unhandled;
atomic_t threads_handled;
int threads_handled_last;
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1581,6 +1581,9 @@ static int
if (!shared) {
init_waitqueue_head(&desc->wait_for_threads);
+ /* Take a timestamp for interrupt storm detection */
+ desc->storm_checked = jiffies;
+
/* Setup the type (level, edge polarity) if configured: */
if (new->flags & IRQF_TRIGGER_MASK) {
ret = __irq_set_trigger(desc,
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -21,6 +21,7 @@ static void poll_spurious_irqs(struct ti
static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs);
static int irq_poll_cpu;
static atomic_t irq_poll_active;
+static unsigned long irqstorm_limit __ro_after_init;
/*
* We wait here for a poller to finish.
@@ -189,18 +190,21 @@ static inline int bad_action_ret(irqretu
* (The other 100-of-100,000 interrupts may have been a correctly
* functioning device sharing an IRQ with the failing one)
*/
-static void __report_bad_irq(struct irq_desc *desc, irqreturn_t
action_ret)
+static void __report_bad_irq(struct irq_desc *desc, irqreturn_t
action_ret,
+ bool storm)
{
unsigned int irq = irq_desc_get_irq(desc);
struct irqaction *action;
unsigned long flags;
- if (bad_action_ret(action_ret)) {
- printk(KERN_ERR "irq event %d: bogus return value %x\n",
- irq, action_ret);
- } else {
- printk(KERN_ERR "irq %d: nobody cared (try booting with "
+ if (!storm) {
+ if (bad_action_ret(action_ret)) {
+ pr_err("irq event %d: bogus return value %x\n",
+ irq, action_ret);
+ } else {
+ pr_err("irq %d: nobody cared (try booting with "
"the \"irqpoll\" option)\n", irq);
+ }
}
dump_stack();
printk(KERN_ERR "handlers:\n");
@@ -228,7 +232,7 @@ static void report_bad_irq(struct irq_de
if (count > 0) {
count--;
- __report_bad_irq(desc, action_ret);
+ __report_bad_irq(desc, action_ret, false);
}
}
@@ -267,6 +271,33 @@ try_misrouted_irq(unsigned int irq, stru
return action && (action->flags & IRQF_IRQPOLL);
}
+static void disable_stuck_irq(struct irq_desc *desc, irqreturn_t
action_ret,
+ const char *reason, bool storm)
+{
+ __report_bad_irq(desc, action_ret, storm);
+ pr_emerg("Disabling %s IRQ #%d\n", reason, irq_desc_get_irq(desc));
+ desc->istate |= IRQS_SPURIOUS_DISABLED;
+ desc->depth++;
+ irq_disable(desc);
+}
+
+/* Interrupt storm detector for runaway interrupts (handled or not).
*/
+static bool irqstorm_detected(struct irq_desc *desc)
+{
+ unsigned long now = jiffies;
+
+ if (++desc->storm_count < irqstorm_limit) {
+ if (time_after(now, desc->storm_checked + HZ)) {
+ desc->storm_count = 0;
+ desc->storm_checked = now;
+ }
+ return false;
+ }
+
+ disable_stuck_irq(desc, IRQ_NONE, "runaway", true);
+ return true;
+}
+
#define SPURIOUS_DEFERRED 0x80000000
void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret)
@@ -403,24 +434,16 @@ void note_interrupt(struct irq_desc *des
desc->irqs_unhandled -= ok;
}
+ if (unlikely(irqstorm_limit && irqstorm_detected(desc)))
+ return;
+
desc->irq_count++;
if (likely(desc->irq_count < 100000))
return;
desc->irq_count = 0;
if (unlikely(desc->irqs_unhandled > 99900)) {
- /*
- * The interrupt is stuck
- */
- __report_bad_irq(desc, action_ret);
- /*
- * Now kill the IRQ
- */
- printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
- desc->istate |= IRQS_SPURIOUS_DISABLED;
- desc->depth++;
- irq_disable(desc);
-
+ disable_stuck_irq(desc, action_ret, "unhandled", false);
mod_timer(&poll_spurious_irq_timer,
jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
}
@@ -462,5 +485,16 @@ static int __init irqpoll_setup(char *st
"performance\n");
return 1;
}
-
__setup("irqpoll", irqpoll_setup);
+
+static int __init irqstorm_setup(char *arg)
+{
+ int res = kstrtoul(arg, 0, &irqstorm_limit);
+
+ if (!res) {
+ pr_info("Interrupt storm detector enabled. Limit=%lu / s\n",
+ irqstorm_limit);
+ }
+ return !!res;
+}
+__setup("irqstorm_limit", irqstorm_setup);