[This is against Jesse's tree which includes my original patch] The previous MSI-X fix (8d181018532dd709ec1f789e374cda92d7b01ce1) had three bugs. First, it didn't move the write that disabled the vector. This led to writing garbage to the MSI-X vector (spotted by Michael Ellerman). It didn't fix the PCI resume case, and it had a race window where the device could generate an interrupt before the MSI-X registers were programmed (leading to a DMA to random addresses). Fortunately, the MSI-X capability has a bit to mask all the vectors. By setting this bit instead of clearing the enable bit, we can ensure the device will not generate spurious interrupts. Since the capability is now enabled, the NIU device will not have a problem with the reads and writes to the MSI-X registers being in the original order in the code. Signed-off-by: Matthew Wilcox <willy@xxxxxxxxxxxxxxx> diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 3627732..f530611 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -93,7 +93,7 @@ static void msi_set_enable(struct pci_dev *dev, int enable) __msi_set_enable(dev, pci_find_capability(dev, PCI_CAP_ID_MSI), enable); } -static void msix_set_enable(struct pci_dev *dev, int enable) +static void msix_disable(struct pci_dev *dev) { int pos; u16 control; @@ -102,8 +102,6 @@ static void msix_set_enable(struct pci_dev *dev, int enable) if (pos) { pci_read_config_word(dev, pos + PCI_MSIX_FLAGS, &control); control &= ~PCI_MSIX_FLAGS_ENABLE; - if (enable) - control |= PCI_MSIX_FLAGS_ENABLE; pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control); } } @@ -321,22 +319,22 @@ static void __pci_restore_msix_state(struct pci_dev *dev) if (!dev->msix_enabled) return; + BUG_ON(list_empty(&dev->msi_list)); + entry = list_entry(dev->msi_list.next, struct msi_desc, list); + pos = entry->msi_attrib.pos; + pci_read_config_word(dev, pos + PCI_MSIX_FLAGS, &control); /* route the table */ pci_intx_for_msi(dev, 0); - msix_set_enable(dev, 0); + control |= PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL; + pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control); list_for_each_entry(entry, &dev->msi_list, list) { write_msi_msg(entry->irq, &entry->msg); msix_mask_irq(entry, entry->masked); } - BUG_ON(list_empty(&dev->msi_list)); - entry = list_entry(dev->msi_list.next, struct msi_desc, list); - pos = entry->msi_attrib.pos; - pci_read_config_word(dev, pos + PCI_MSIX_FLAGS, &control); control &= ~PCI_MSIX_FLAGS_MASKALL; - control |= PCI_MSIX_FLAGS_ENABLE; pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control); } @@ -427,11 +425,18 @@ static int msix_capability_init(struct pci_dev *dev, u8 bir; void __iomem *base; - msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */ - pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); + + /* + * Some devices require MSI-X to be enabled before we can touch the + * MSI-X registers. We need to mask all the vectors to prevent + * interrupts coming in before they're fully set up. + */ + pci_read_config_word(dev, pos + PCI_MSIX_FLAGS, &control); + control |= PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL; + pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control); + /* Request & Map MSI-X table region */ - pci_read_config_word(dev, msi_control_reg(pos), &control); nr_entries = multi_msix_capable(control); pci_read_config_dword(dev, msix_table_offset_reg(pos), &table_offset); @@ -439,8 +444,10 @@ static int msix_capability_init(struct pci_dev *dev, table_offset &= ~PCI_MSIX_FLAGS_BIRMASK; phys_addr = pci_resource_start (dev, bir) + table_offset; base = ioremap_nocache(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE); - if (base == NULL) - return -ENOMEM; + if (base == NULL) { + ret = -ENOMEM; + goto fail; + } /* MSI-X Table Initialization */ for (i = 0; i < nvec; i++) { @@ -455,6 +462,8 @@ static int msix_capability_init(struct pci_dev *dev, entry->msi_attrib.default_irq = dev->irq; entry->msi_attrib.pos = pos; entry->mask_base = base; + entry->masked = readl(base + j * PCI_MSIX_ENTRY_SIZE + + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); msix_mask_irq(entry, 1); list_add_tail(&entry->list, &dev->msi_list); @@ -475,10 +484,8 @@ static int msix_capability_init(struct pci_dev *dev, ret = avail; } - if (ret) { - msi_free_irqs(dev); - return ret; - } + if (ret) + goto fail; i = 0; list_for_each_entry(entry, &dev->msi_list, list) { @@ -486,18 +493,21 @@ static int msix_capability_init(struct pci_dev *dev, set_irq_msi(entry->irq, entry); i++; } - /* Set MSI-X enabled bits */ + + /* Set MSI-X enabled bits and unmask the function */ pci_intx_for_msi(dev, 0); - msix_set_enable(dev, 1); dev->msix_enabled = 1; - list_for_each_entry(entry, &dev->msi_list, list) { - int vector = entry->msi_attrib.entry_nr; - entry->masked = readl(base + vector * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); - } + control &= ~PCI_MSIX_FLAGS_MASKALL; + pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control); return 0; + + fail: + msi_free_irqs(dev); + control &= ~PCI_MSIX_FLAGS_ENABLE; + pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control); + return ret; } /** @@ -742,10 +752,11 @@ void pci_msix_shutdown(struct pci_dev* dev) if (!pci_msi_enable || !dev || !dev->msix_enabled) return; - msix_set_enable(dev, 0); + msix_disable(dev); pci_intx_for_msi(dev, 1); dev->msix_enabled = 0; } + void pci_disable_msix(struct pci_dev* dev) { if (!pci_msi_enable || !dev || !dev->msix_enabled) diff --git a/drivers/pci/msi.h b/drivers/pci/msi.h index 71f4df2..5b58ab0 100644 --- a/drivers/pci/msi.h +++ b/drivers/pci/msi.h @@ -25,8 +25,6 @@ #define msix_table_offset_reg(base) (base + 0x04) #define msix_pba_offset_reg(base) (base + 0x08) -#define msix_enable(control) control |= PCI_MSIX_FLAGS_ENABLE -#define msix_disable(control) control &= ~PCI_MSIX_FLAGS_ENABLE #define msix_table_size(control) ((control & PCI_MSIX_FLAGS_QSIZE)+1) #define multi_msix_capable msix_table_size #define msix_unmask(address) (address & ~PCI_MSIX_FLAGS_BITMASK) -- Matthew Wilcox Intel Open Source Technology Centre "Bill, look, we understand that you're interested in selling us this operating system, but compare it to ours. We can't possibly take such a retrograde step." -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html