On Mon, 17 Feb 2025, Ilpo Järvinen wrote: > PCIe BW controller enables BW notifications for Downstream Ports by > setting Link Bandwidth Management Interrupt Enable (LBMIE) and Link > Autonomous Bandwidth Interrupt Enable (LABIE) (PCIe Spec. r6.2 sec. > 7.5.3.7). > > It was discovered that performing a reset can lead to the device > underneath the Downstream Port becoming unavailable if BW notifications > are left enabled throughout the reset sequence (at least LBMIE was > found to cause an issue). > > While the PCIe Specifications do not indicate BW notifications could not > be kept enabled during resets, the PCIe Link state during an > intentional reset is not of large interest. Thus, disable BW controller > for the bridge while reset is performed and re-enable it after the > reset has completed to workaround the problems some devices encounter > if BW notifications are left on throughout the reset sequence. > > Keep a counter for the disable/enable because MFD will execute > pci_dev_save_and_disable() and pci_dev_restore() back to back for > sibling devices: > > [ 50.139010] vfio-pci 0000:01:00.0: resetting > [ 50.139053] vfio-pci 0000:01:00.1: resetting > [ 50.141126] pcieport 0000:00:01.1: PME: Spurious native interrupt! > [ 50.141133] pcieport 0000:00:01.1: PME: Spurious native interrupt! > [ 50.441466] vfio-pci 0000:01:00.0: reset done > [ 50.501534] vfio-pci 0000:01:00.1: reset done > > Fixes: 665745f27487 ("PCI/bwctrl: Re-add BW notification portdrv as PCIe BW controller") > Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219765 > Tested-by: Joel Mathew Thomas <proxy0@xxxxxxxxxxxx> > Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@xxxxxxxxxxxxxxx> > Cc: stable@xxxxxxxxxxxxxxx > --- > > I suspect the root cause is some kind of violation of specifications. > Resets shouldn't cause devices to become unavailable just because BW > notifications have been enabled. > > Before somebody comments on those dual rwsems, I do have yet to be > submitted patch to simplify the locking as per Lukas Wunner's earlier > suggestion. I've just focused on solving the regressions first. Hi all, This problem was root caused to a problem in hotplug code so this patch should not be applied. I've just sent a series to address the synchronization problems the hotplug code and it should be considered instead to fix the issues this patch attempted to workaround. -- i. > drivers/pci/pci.c | 8 +++++++ > drivers/pci/pci.h | 4 ++++ > drivers/pci/pcie/bwctrl.c | 49 ++++++++++++++++++++++++++++++++------- > 3 files changed, 53 insertions(+), 8 deletions(-) > > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c > index 869d204a70a3..7a53d7474175 100644 > --- a/drivers/pci/pci.c > +++ b/drivers/pci/pci.c > @@ -5148,6 +5148,7 @@ static void pci_dev_save_and_disable(struct pci_dev *dev) > { > const struct pci_error_handlers *err_handler = > dev->driver ? dev->driver->err_handler : NULL; > + struct pci_dev *bridge = pci_upstream_bridge(dev); > > /* > * dev->driver->err_handler->reset_prepare() is protected against > @@ -5166,6 +5167,9 @@ static void pci_dev_save_and_disable(struct pci_dev *dev) > */ > pci_set_power_state(dev, PCI_D0); > > + if (bridge) > + pcie_bwnotif_disable(bridge); > + > pci_save_state(dev); > /* > * Disable the device by clearing the Command register, except for > @@ -5181,9 +5185,13 @@ static void pci_dev_restore(struct pci_dev *dev) > { > const struct pci_error_handlers *err_handler = > dev->driver ? dev->driver->err_handler : NULL; > + struct pci_dev *bridge = pci_upstream_bridge(dev); > > pci_restore_state(dev); > > + if (bridge) > + pcie_bwnotif_enable(bridge); > + > /* > * dev->driver->err_handler->reset_done() is protected against > * races with ->remove() by the device lock, which must be held by > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h > index 01e51db8d285..856546f1aad9 100644 > --- a/drivers/pci/pci.h > +++ b/drivers/pci/pci.h > @@ -759,12 +759,16 @@ static inline void pcie_ecrc_get_policy(char *str) { } > #ifdef CONFIG_PCIEPORTBUS > void pcie_reset_lbms_count(struct pci_dev *port); > int pcie_lbms_count(struct pci_dev *port, unsigned long *val); > +void pcie_bwnotif_enable(struct pci_dev *port); > +void pcie_bwnotif_disable(struct pci_dev *port); > #else > static inline void pcie_reset_lbms_count(struct pci_dev *port) {} > static inline int pcie_lbms_count(struct pci_dev *port, unsigned long *val) > { > return -EOPNOTSUPP; > } > +static inline void pcie_bwnotif_enable(struct pci_dev *port) {} > +static inline void pcie_bwnotif_disable(struct pci_dev *port) {} > #endif > > struct pci_dev_reset_methods { > diff --git a/drivers/pci/pcie/bwctrl.c b/drivers/pci/pcie/bwctrl.c > index 0a5e7efbce2c..a117f6f67c07 100644 > --- a/drivers/pci/pcie/bwctrl.c > +++ b/drivers/pci/pcie/bwctrl.c > @@ -40,11 +40,13 @@ > * @set_speed_mutex: Serializes link speed changes > * @lbms_count: Count for LBMS (since last reset) > * @cdev: Thermal cooling device associated with the port > + * @disable_count: BW notifications disabled/enabled counter > */ > struct pcie_bwctrl_data { > struct mutex set_speed_mutex; > atomic_t lbms_count; > struct thermal_cooling_device *cdev; > + int disable_count; > }; > > /* > @@ -200,10 +202,9 @@ int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req, > return ret; > } > > -static void pcie_bwnotif_enable(struct pcie_device *srv) > +static void __pcie_bwnotif_enable(struct pci_dev *port) > { > - struct pcie_bwctrl_data *data = srv->port->link_bwctrl; > - struct pci_dev *port = srv->port; > + struct pcie_bwctrl_data *data = port->link_bwctrl; > u16 link_status; > int ret; > > @@ -224,12 +225,44 @@ static void pcie_bwnotif_enable(struct pcie_device *srv) > pcie_update_link_speed(port->subordinate); > } > > -static void pcie_bwnotif_disable(struct pci_dev *port) > +void pcie_bwnotif_enable(struct pci_dev *port) > +{ > + guard(rwsem_read)(&pcie_bwctrl_setspeed_rwsem); > + guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem); > + > + if (!port->link_bwctrl) > + return; > + > + port->link_bwctrl->disable_count--; > + if (!port->link_bwctrl->disable_count) { > + __pcie_bwnotif_enable(port); > + pci_dbg(port, "BW notifications enabled\n"); > + } > + WARN_ON_ONCE(port->link_bwctrl->disable_count < 0); > +} > + > +static void __pcie_bwnotif_disable(struct pci_dev *port) > { > pcie_capability_clear_word(port, PCI_EXP_LNKCTL, > PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE); > } > > +void pcie_bwnotif_disable(struct pci_dev *port) > +{ > + guard(rwsem_read)(&pcie_bwctrl_setspeed_rwsem); > + guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem); > + > + if (!port->link_bwctrl) > + return; > + > + port->link_bwctrl->disable_count++; > + > + if (port->link_bwctrl->disable_count == 1) { > + __pcie_bwnotif_disable(port); > + pci_dbg(port, "BW notifications disabled\n"); > + } > +} > + > static irqreturn_t pcie_bwnotif_irq(int irq, void *context) > { > struct pcie_device *srv = context; > @@ -314,7 +347,7 @@ static int pcie_bwnotif_probe(struct pcie_device *srv) > return ret; > } > > - pcie_bwnotif_enable(srv); > + __pcie_bwnotif_enable(port); > } > } > > @@ -336,7 +369,7 @@ static void pcie_bwnotif_remove(struct pcie_device *srv) > > scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) { > scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) { > - pcie_bwnotif_disable(srv->port); > + __pcie_bwnotif_disable(srv->port); > > free_irq(srv->irq, srv); > > @@ -347,13 +380,13 @@ static void pcie_bwnotif_remove(struct pcie_device *srv) > > static int pcie_bwnotif_suspend(struct pcie_device *srv) > { > - pcie_bwnotif_disable(srv->port); > + __pcie_bwnotif_disable(srv->port); > return 0; > } > > static int pcie_bwnotif_resume(struct pcie_device *srv) > { > - pcie_bwnotif_enable(srv); > + __pcie_bwnotif_enable(srv->port); > return 0; > } > > > base-commit: 2014c95afecee3e76ca4a56956a936e23283f05b >