Evan, Evan Green <evgreen@xxxxxxxxxxxx> writes: > I did another experiment that I think lends credibility to my torn MSI > hypothesis. I have the following change: > > And indeed, I get a machine check, despite the fact that MSI_DATA is > overwritten just after address is updated. I don't have to understand why a SoC released in 2019 still has unmaskable MSI especially as Inhell's own XHCI spec clearly documents and recommends MSI-X. While your workaround (disabling MSI) works in this particular case it's not really a good option: 1) Quite some devices have a bug where the legacy INTX disable does not work reliably or is outright broken. That means MSI disable will reroute to INTX. 2) I digged out old debug data which confirms that some silly devices lose interrupts accross MSI disable/reenable if the INTX fallback is disabled. And no, it's not a random weird device, it's part of a chipset which was pretty popular a few years ago. I leave it as an excercise for the reader to guess the vendor. Can you please apply the patch below? It enforces an IPI to the new vector/target CPU when the interrupt is MSI w/o masking. It should cure the issue. It goes without saying that I'm not proud of it. Thanks, tglx 8<-------------- --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -498,6 +498,7 @@ extern bool default_check_apicid_used(ph extern void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap); extern int default_cpu_present_to_apicid(int mps_cpu); extern int default_check_phys_apicid_present(int phys_apicid); +extern bool apic_hotplug_force_retrigger(struct irq_data *irqd); #endif /* CONFIG_X86_LOCAL_APIC */ --- a/arch/x86/include/asm/irqdomain.h +++ b/arch/x86/include/asm/irqdomain.h @@ -10,6 +10,7 @@ enum { /* Allocate contiguous CPU vectors */ X86_IRQ_ALLOC_CONTIGUOUS_VECTORS = 0x1, X86_IRQ_ALLOC_LEGACY = 0x2, + X86_IRQ_MSI_NOMASK_TRAINWRECK = 0x4, }; extern struct irq_domain *x86_vector_domain; --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -103,6 +103,14 @@ int pci_msi_prepare(struct irq_domain *d } else { arg->type = X86_IRQ_ALLOC_TYPE_MSI; arg->flags |= X86_IRQ_ALLOC_CONTIGUOUS_VECTORS; + /* + * If the MSI implementation does not provide masking + * enable the workaround for the CPU hotplug forced + * migration problem which is caused by the torn write of + * the address/data pair. + */ + if (!desc->msi_attrib.maskbit) + arg->flags |= X86_IRQ_MSI_NOMASK_TRAINWRECK; } return 0; --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -34,7 +34,8 @@ struct apic_chip_data { unsigned int move_in_progress : 1, is_managed : 1, can_reserve : 1, - has_reserved : 1; + has_reserved : 1, + force_retrigger : 1; }; struct irq_domain *x86_vector_domain; @@ -99,6 +100,18 @@ struct irq_cfg *irq_cfg(unsigned int irq return irqd_cfg(irq_get_irq_data(irq)); } +bool apic_hotplug_force_retrigger(struct irq_data *irqd) +{ + struct apic_chip_data *apicd; + + irqd = __irq_domain_get_irq_data(x86_vector_domain, irqd); + if (!irqd) + return false; + + apicd = apic_chip_data(irqd); + return apicd && apicd->force_retrigger; +} + static struct apic_chip_data *alloc_apic_chip_data(int node) { struct apic_chip_data *apicd; @@ -552,6 +565,8 @@ static int x86_vector_alloc_irqs(struct } apicd->irq = virq + i; + if (info->flags & X86_IRQ_MSI_NOMASK_TRAINWRECK) + apicd->force_retrigger = true; irqd->chip = &lapic_controller; irqd->chip_data = apicd; irqd->hwirq = virq + i; @@ -624,6 +639,7 @@ static void x86_vector_debug_show(struct seq_printf(m, "%*scan_reserve: %u\n", ind, "", apicd.can_reserve ? 1 : 0); seq_printf(m, "%*shas_reserved: %u\n", ind, "", apicd.has_reserved ? 1 : 0); seq_printf(m, "%*scleanup_pending: %u\n", ind, "", !hlist_unhashed(&apicd.clist)); + seq_printf(m, "%*sforce_retrigger: %u\n", ind, "", apicd.force_retrigger ? 1 : 0); } #endif --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -350,6 +350,7 @@ void fixup_irqs(void) struct irq_desc *desc; struct irq_data *data; struct irq_chip *chip; + bool retrigger; irq_migrate_all_off_this_cpu(); @@ -370,24 +371,29 @@ void fixup_irqs(void) * nothing else will touch it. */ for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { - if (IS_ERR_OR_NULL(__this_cpu_read(vector_irq[vector]))) + desc = __this_cpu_read(vector_irq[vector]); + if (IS_ERR_OR_NULL(desc)) continue; + raw_spin_lock(&desc->lock); + data = irq_desc_get_irq_data(desc); + retrigger = apic_hotplug_force_retrigger(data); + irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); - if (irr & (1 << (vector % 32))) { - desc = __this_cpu_read(vector_irq[vector]); + if (irr & (1 << (vector % 32))) + retrigger = true; - raw_spin_lock(&desc->lock); - data = irq_desc_get_irq_data(desc); + if (!retrigger) { + __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); + } else { chip = irq_data_get_irq_chip(data); if (chip->irq_retrigger) { chip->irq_retrigger(data); - __this_cpu_write(vector_irq[vector], VECTOR_RETRIGGERED); + __this_cpu_write(vector_irq[vector], + VECTOR_RETRIGGERED); } - raw_spin_unlock(&desc->lock); } - if (__this_cpu_read(vector_irq[vector]) != VECTOR_RETRIGGERED) - __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); + raw_spin_unlock(&desc->lock); } } #endif --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -432,6 +432,8 @@ int irq_reserve_ipi(struct irq_domain *d int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest); /* V2 interfaces to support hierarchy IRQ domains. */ +struct irq_data *__irq_domain_get_irq_data(struct irq_domain *domain, + struct irq_data *irq_data); extern struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, unsigned int virq); extern void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1165,6 +1165,21 @@ static int irq_domain_alloc_irq_data(str } /** + * __irq_domain_get_irq_data - Get irq_data associated with @domain for @data + * @domain: domain to match + * @irq_data: initial irq data to start hierarchy search + */ +struct irq_data *__irq_domain_get_irq_data(struct irq_domain *domain, + struct irq_data *irq_data) +{ + for (; irq_data; irq_data = irq_data->parent_data) { + if (irq_data->domain == domain) + return irq_data; + } + return NULL; +} + +/** * irq_domain_get_irq_data - Get irq_data associated with @virq and @domain * @domain: domain to match * @virq: IRQ number to get irq_data @@ -1172,14 +1187,7 @@ static int irq_domain_alloc_irq_data(str struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, unsigned int virq) { - struct irq_data *irq_data; - - for (irq_data = irq_get_irq_data(virq); irq_data; - irq_data = irq_data->parent_data) - if (irq_data->domain == domain) - return irq_data; - - return NULL; + return __irq_domain_get_irq_data(domain, irq_get_irq_data(virq)); } EXPORT_SYMBOL_GPL(irq_domain_get_irq_data);