The MSI core will introduce runtime allocation of MSI related data. This data will be devres managed and has to be set up before enabling PCI/MSI[-X]. This would introduce an ordering issue vs. pcim_release(). The setup order is: pcim_enable_device() devres_alloc(pcim_release...); ... pci_irq_alloc() msi_setup_device_data() devres_alloc(msi_device_data_release, ...) and once the device is released these release functions are invoked in the opposite order: msi_device_data_release() ... pcim_release() pci_disable_msi[x]() which is obviously wrong, because pci_disable_msi[x]() requires the MSI data to be available to tear down the MSI[-X] interrupts. Remove the MSI[-X] teardown from pcim_release() and add an explicit action to be installed on the attempt of enabling PCI/MSI[-X]. This allows the MSI core data allocation to be ordered correctly in a subsequent step. Reported-by: Nishanth Menon <nm@xxxxxx> Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> --- V4: New patch --- drivers/pci/msi/msi.c | 33 +++++++++++++++++++++++++++++++++ drivers/pci/pci.c | 5 ----- include/linux/pci.h | 3 ++- 3 files changed, 35 insertions(+), 6 deletions(-) --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -341,6 +341,31 @@ void pci_restore_msi_state(struct pci_de } EXPORT_SYMBOL_GPL(pci_restore_msi_state); +static void pcim_msi_release(void *pcidev) +{ + struct pci_dev *dev = pcidev; + + dev->is_msi_managed = false; + pci_free_irq_vectors(dev); +} + +/* + * Needs to be separate from pcim_release to prevent an ordering problem + * vs. msi_device_data_release() in the MSI core code. + */ +static int pcim_setup_msi_release(struct pci_dev *dev) +{ + int ret; + + if (!pci_is_managed(dev) || dev->is_msi_managed) + return 0; + + ret = devm_add_action(&dev->dev, pcim_msi_release, dev); + if (!ret) + dev->is_msi_managed = true; + return ret; +} + static struct msi_desc * msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd) { @@ -884,6 +909,10 @@ static int __pci_enable_msi_range(struct if (nvec > maxvec) nvec = maxvec; + rc = pcim_setup_msi_release(dev); + if (rc) + return rc; + for (;;) { if (affd) { nvec = irq_calc_affinity_vectors(minvec, nvec, affd); @@ -927,6 +956,10 @@ static int __pci_enable_msix_range(struc if (WARN_ON_ONCE(dev->msix_enabled)) return -EINVAL; + rc = pcim_setup_msi_release(dev); + if (rc) + return rc; + for (;;) { if (affd) { nvec = irq_calc_affinity_vectors(minvec, nvec, affd); --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2024,11 +2024,6 @@ static void pcim_release(struct device * struct pci_devres *this = res; int i; - if (dev->msi_enabled) - pci_disable_msi(dev); - if (dev->msix_enabled) - pci_disable_msix(dev); - for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) if (this->region_mask & (1 << i)) pci_release_region(dev, i); --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -425,7 +425,8 @@ struct pci_dev { unsigned int ats_enabled:1; /* Address Translation Svc */ unsigned int pasid_enabled:1; /* Process Address Space ID */ unsigned int pri_enabled:1; /* Page Request Interface */ - unsigned int is_managed:1; + unsigned int is_managed:1; /* Managed via devres */ + unsigned int is_msi_managed:1; /* MSI release via devres installed */ unsigned int needs_freset:1; /* Requires fundamental reset */ unsigned int state_saved:1; unsigned int is_physfn:1;