The capture kernel should try its best to save the crash info. Normally, irq flood is caused by some trivial devices, which has no impact on saving vmcore. Introducing a parameter "irqflood_suppress" to enable suppress irq flood. Signed-off-by: Pingfan Liu <kernelfans@xxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Jisheng Zhang <Jisheng.Zhang@xxxxxxxxxxxxx> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Cc: "Guilherme G. Piccoli" <gpiccoli@xxxxxxxxxxxxx> Cc: Petr Mladek <pmladek@xxxxxxxx> Cc: Marc Zyngier <maz@xxxxxxxxxx> Cc: Linus Walleij <linus.walleij@xxxxxxxxxx> Cc: afzal mohammed <afzal.mohd.ma@xxxxxxxxx> Cc: Lina Iyer <ilina@xxxxxxxxxxxxxx> Cc: "Gustavo A. R. Silva" <gustavo@xxxxxxxxxxxxxx> Cc: Maulik Shah <mkshah@xxxxxxxxxxxxxx> Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx> Cc: Jonathan Corbet <corbet@xxxxxxx> Cc: Pawan Gupta <pawan.kumar.gupta@xxxxxxxxxxxxxxx> Cc: Mike Kravetz <mike.kravetz@xxxxxxxxxx> Cc: Oliver Neukum <oneukum@xxxxxxxx> To: linux-kernel@xxxxxxxxxxxxxxx Cc: linux-doc@xxxxxxxxxxxxxxx Cc: kexec@xxxxxxxxxxxxxxxxxxx --- include/linux/irq.h | 2 ++ kernel/irq/spurious.c | 32 ++++++++++++++++++++++++++++++++ kernel/watchdog.c | 9 ++++++++- 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/include/linux/irq.h b/include/linux/irq.h index 1b7f4df..140cb61 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -684,6 +684,8 @@ extern void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret); /* Enable/disable irq debugging output: */ extern int noirqdebug_setup(char *str); +void suppress_max_irq(void); + /* Checks whether the interrupt can be requested by request_irq(): */ extern int can_request_irq(unsigned int irq, unsigned long irqflags); diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index f865e5f..d3d94d6 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -464,3 +464,35 @@ static int __init irqpoll_setup(char *str) } __setup("irqpoll", irqpoll_setup); + +#ifdef CONFIG_IRQ_TIME_ACCOUNTING + +static bool irqflood_suppress; + +static int __init irqflood_suppress_setup(char *str) +{ + irqflood_suppress = true; + pr_info("enable auto suppress irqflood\n"); + return 1; +} +__setup("irqflood_suppress", irqflood_suppress_setup); + +void suppress_max_irq(void) +{ + unsigned int tmp, maxirq = 0, max = 0; + int irq; + + if (!irqflood_suppress) + return; + for_each_active_irq(irq) { + tmp = kstat_irqs_cpu(irq, smp_processor_id()); + if (max < tmp) { + maxirq = irq; + max = tmp; + } + } + pr_warn("Suppress irq:%u, which is triggered %u times\n", + maxirq, max); + disable_irq_nosync(maxirq); +} +#endif diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 230ac38..28a74e5 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -24,6 +24,7 @@ #include <linux/sched/isolation.h> #include <linux/stop_machine.h> #include <linux/kernel_stat.h> +#include <linux/irq.h> #include <asm/irq_regs.h> #include <linux/kvm_para.h> @@ -364,9 +365,15 @@ static void check_irq_flood(void) percent = irqts * 100 / totalts; percent = percent < 100 ? percent : 100; __this_cpu_write(check_hint, -1); - if (percent >= 98) + if (percent >= 98) { pr_info("Irq flood occupies more than %lu%% of the past %lu seconds\n", percent, totalts >> 30); + /* + * Suppress top irq when scheduler does not work for long time and irq + * occupies too much time. + */ + suppress_max_irq(); + } } else if (cnt == 0) { __this_cpu_write(last_total_ts, totalts); __this_cpu_write(last_irq_ts, irqts); -- 2.7.5