For some high performance IO devices, interrupt may come very frequently, meantime IO request completion may take a bit time. Especially on some devices(SCSI or NVMe), IO requests can be submitted concurrently from multiple CPU cores, however IO completion is only done on one of these submission CPU cores. Then IRQ flood can be easily triggered, and CPU lockup. However, we can't simply use threaded IRQ for avoiding IRQ flood and CPU lockup, because thread wakup introduces extra latency, which does affect IO latency and throughput a lot. Add IRQF_RESCUE_THREAD to create one interrupt thread handler for avoiding irq flood, and the thread will be waken up for handling interrupt in case that IRQ flood is going to be triggered Cc: Long Li <longli@xxxxxxxxxxxxx> Cc: Ingo Molnar <mingo@xxxxxxxxxx>, Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Keith Busch <keith.busch@xxxxxxxxx> Cc: Jens Axboe <axboe@xxxxxx> Cc: Christoph Hellwig <hch@xxxxxx> Cc: Sagi Grimberg <sagi@xxxxxxxxxxx> Cc: John Garry <john.garry@xxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Hannes Reinecke <hare@xxxxxxxx> Cc: linux-nvme@xxxxxxxxxxxxxxxxxxx Cc: linux-scsi@xxxxxxxxxxxxxxx Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> --- include/linux/interrupt.h | 6 ++++++ kernel/irq/handle.c | 6 +++++- kernel/irq/manage.c | 12 ++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 5b8328a99b2a..19fdab18e679 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -61,6 +61,11 @@ * interrupt handler after suspending interrupts. For system * wakeup devices users need to implement wakeup detection in * their interrupt handlers. + * IRQF_RESCUE_THREAD - Interrupt will be handled in thread context in case + * that irq flood is triggered. Can't afford to always handle + * irq in thread context becasue IO performance drops much by + * the extra wakeup latency. So one backup thread is created + * for avoiding irq flood which may cause CPU lockup. */ #define IRQF_SHARED 0x00000080 #define IRQF_PROBE_SHARED 0x00000100 @@ -74,6 +79,7 @@ #define IRQF_NO_THREAD 0x00010000 #define IRQF_EARLY_RESUME 0x00020000 #define IRQF_COND_SUSPEND 0x00040000 +#define IRQF_RESCUE_THREAD 0x00080000 #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index a4ace611f47f..8e5312c35483 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -146,7 +146,11 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags irqreturn_t res; trace_irq_handler_entry(irq, action); - res = action->handler(irq, action->dev_id); + if ((action->flags & IRQF_RESCUE_THREAD) && + irq_flood_detected()) + res = IRQ_WAKE_THREAD; + else + res = action->handler(irq, action->dev_id); trace_irq_handler_exit(irq, action, res); if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pS enabled interrupts\n", diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index e8f7f179bf77..1566abbf50e8 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1082,6 +1082,8 @@ static int irq_thread(void *data) if (force_irqthreads && test_bit(IRQTF_FORCED_THREAD, &action->thread_flags)) handler_fn = irq_forced_thread_fn; + else if (action->flags & IRQF_RESCUE_THREAD) + handler_fn = irq_forced_thread_fn; else handler_fn = irq_thread_fn; @@ -2011,6 +2013,16 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, handler = irq_default_primary_handler; } + if (irqflags & IRQF_RESCUE_THREAD) { + if (irqflags & IRQF_NO_THREAD) + return -EINVAL; + if (thread_fn) + return -EINVAL; + if (handler == irq_default_primary_handler) + return -EINVAL; + thread_fn = handler; + } + action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); if (!action) return -ENOMEM; -- 2.20.1