Though IRTE modification for IRQ affinity change is a atomic operation, it does not guarantee the timing of IRQ posting at PID. considered the following scenario: Device system agent iommu memory CPU/LAPIC 1 FEEX_XXXX 2 Interrupt request 3 Fetch IRTE -> 4 ->Atomic Swap PID.PIR(vec) Push to Global Observable(GO) 5 if (ON*) i done;* else 6 send a notification -> * ON: outstanding notification, 1 will suppress new notifications If IRQ affinity change happens between 3 and 5 in IOMMU, old CPU's PIR could have pending bit set for the vector being moved. We must check PID.PIR to prevent the lost of interrupts. Suggested-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Signed-off-by: Jacob Pan <jacob.jun.pan@xxxxxxxxxxxxxxx> --- arch/x86/kernel/apic/vector.c | 8 +++++++- arch/x86/kernel/irq.c | 20 +++++++++++++++++--- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 319448d87b99..14fc33cfdb37 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -19,6 +19,7 @@ #include <asm/apic.h> #include <asm/i8259.h> #include <asm/desc.h> +#include <asm/posted_intr.h> #include <asm/irq_remapping.h> #include <asm/trace/irq_vectors.h> @@ -978,9 +979,14 @@ static void __vector_cleanup(struct vector_cleanup *cl, bool check_irr) * Do not check IRR when called from lapic_offline(), because * fixup_irqs() was just called to scan IRR for set bits and * forward them to new destination CPUs via IPIs. + * + * If the vector to be cleaned is delivered as posted intr, + * it is possible that the interrupt has been posted but + * not made to the IRR due to coalesced notifications. + * Therefore, check PIR to see if the interrupt was posted. */ irr = check_irr ? apic_read(APIC_IRR + (vector / 32 * 0x10)) : 0; - if (irr & (1U << (vector % 32))) { + if (irr & (1U << (vector % 32)) || is_pi_pending_this_cpu(vector)) { pr_warn_once("Moved interrupt pending in old target APIC %u\n", apicd->irq); rearm = true; continue; diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 786c2c8330f4..7732cb9bbf0c 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -444,11 +444,26 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification) } #endif /* X86_POSTED_MSI */ +/* + * Check if a given vector is pending in APIC IRR or PIR if posted interrupt + * is enabled for coalesced interrupt delivery (CID). + */ +static inline bool is_vector_pending(unsigned int vector) +{ + unsigned int irr; + + irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); + if (irr & (1 << (vector % 32))) + return true; + + return is_pi_pending_this_cpu(vector); +} + #ifdef CONFIG_HOTPLUG_CPU /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ void fixup_irqs(void) { - unsigned int irr, vector; + unsigned int vector; struct irq_desc *desc; struct irq_data *data; struct irq_chip *chip; @@ -475,8 +490,7 @@ void fixup_irqs(void) if (IS_ERR_OR_NULL(__this_cpu_read(vector_irq[vector]))) continue; - irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); - if (irr & (1 << (vector % 32))) { + if (is_vector_pending(vector)) { desc = __this_cpu_read(vector_irq[vector]); raw_spin_lock(&desc->lock); -- 2.25.1