Re: [bug report] lockdep WARN at PCI device rescan

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Nov 24, 2023 / 18:30, Heiner Kallweit wrote:
> On 24.11.2023 11:49, Shinichiro Kawasaki wrote:
[...]
> > I came across another fix idea: assuming the guard by pci_rescan_remove_lock is
> > required in p2sb_bar(), how about to do trylock? If the mutex can not be locked,
> > make the p2sb_bar() call fail. This way, we can avoid the deadlock between
> > pci_rescan_remove_lock and workqueue completion.
> > 
> > I created a patch below and confirmed it avoided the lockdep WARN. The i2c-i801
> > probe was ok at system boot. When I did the two commands above, I observed the
> > i2c-i801 device probe failed due to trylock failure. But I think it's far better
> > than hang.
> > 
> 
> I wouldn't call this a solution. A solution has to support pci drivers using
> p2sb_bar() in probe(). You can't simply make them fail.

I see... it looks a bit tough to find out the good solution, but let me try some
more.

Here are my three observations:

A) pci drivers should be able to call p2sb_bar() in probe() without failure.
B) when /sys/bus/pci/rescan is written, pci_rescan_remove_lock is locked then
   probe() is called.
C) p2sb_bar() locks pci_rescan_remove_lock.

These results in the deadlock. To avoid the deadlock, one of the three needs
to change. A) is not to change. I guess changing B) will be too much. So, I
would like to question if we can change C).

Can we remove pci_rescan_remove_lock in p2sb_bar()? Maybe no. As its inline
comment says, p2sb_bar() locks pci_rescan_remove_lock to guard the section which
unhides and hides the P2SB device from parallel scan. We can't simply remove it.

Or, can we replace pci_rescan_remove_lock with other lock? For example, what if
we have locks for each pci_bus which serialize scans for each pci_bus? Is it
enough to guard the P2SB device? (Or is it just a stupid idea?)

As a trial, I created a patch below. I confirmed it avoids the deadlock and the
lockdep WARN. But it has impacts on PCI sub-system, and I'm not sure if it
really hides the P2SB device. May ask comments on it?


diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index ed6b7f48736..6995d66fb36 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -562,6 +562,7 @@ static struct pci_bus *pci_alloc_bus(struct pci_bus *parent)
 	if (parent)
 		b->domain_nr = parent->domain_nr;
 #endif
+	mutex_init(&b->scan_lock);
 	return b;
 }
 
@@ -2417,6 +2418,18 @@ bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l,
 }
 EXPORT_SYMBOL(pci_bus_read_dev_vendor_id);
 
+void pci_lock_bus_scan(struct pci_bus *bus)
+{
+	mutex_lock(&bus->scan_lock);
+}
+EXPORT_SYMBOL_GPL(pci_lock_bus_scan);
+
+void pci_unlock_bus_scan(struct pci_bus *bus)
+{
+	mutex_unlock(&bus->scan_lock);
+}
+EXPORT_SYMBOL_GPL(pci_unlock_bus_scan);
+
 /*
  * Read the config data for a PCI device, sanity-check it,
  * and fill in the dev structure.
@@ -2584,26 +2597,45 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
 	WARN_ON(ret < 0);
 }
 
-struct pci_dev *pci_scan_single_device(struct pci_bus *bus, int devfn)
+struct pci_dev *do_pci_scan_single_device(struct pci_bus *bus, int devfn,
+					  bool to_lock)
 {
 	struct pci_dev *dev;
 
+	if (to_lock)
+		pci_lock_bus_scan(bus);
+
 	dev = pci_get_slot(bus, devfn);
 	if (dev) {
 		pci_dev_put(dev);
-		return dev;
+		goto out;
 	}
 
 	dev = pci_scan_device(bus, devfn);
 	if (!dev)
-		return NULL;
+		goto out;
 
 	pci_device_add(dev, bus);
 
+out:
+	if (to_lock)
+		pci_unlock_bus_scan(bus);
 	return dev;
 }
+
+struct pci_dev *pci_scan_single_device(struct pci_bus *bus, int devfn)
+{
+	return do_pci_scan_single_device(bus, devfn, true);
+}
 EXPORT_SYMBOL(pci_scan_single_device);
 
+struct pci_dev *pci_scan_single_device_under_lock(struct pci_bus *bus,
+						  int devfn)
+{
+	return do_pci_scan_single_device(bus, devfn, false);
+}
+EXPORT_SYMBOL(pci_scan_single_device_under_lock);
+
 static int next_ari_fn(struct pci_bus *bus, struct pci_dev *dev, int fn)
 {
 	int pos;
diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c
index 1cf2471d54d..0073ef1a9a5 100644
--- a/drivers/platform/x86/p2sb.c
+++ b/drivers/platform/x86/p2sb.c
@@ -65,7 +65,7 @@ static int p2sb_scan_and_read(struct pci_bus *bus, unsigned int devfn, struct re
 	struct pci_dev *pdev;
 	int ret;
 
-	pdev = pci_scan_single_device(bus, devfn);
+	pdev = pci_scan_single_device_under_lock(bus, devfn);
 	if (!pdev)
 		return -ENODEV;
 
@@ -89,7 +89,7 @@ static int p2sb_scan_and_read(struct pci_bus *bus, unsigned int devfn, struct re
  *
  * Caller must provide a valid pointer to @mem.
  *
- * Locking is handled by pci_rescan_remove_lock mutex.
+ * Locking is handled by bus scan lock.
  *
  * Return:
  * 0 on success or appropriate errno value on error.
@@ -113,14 +113,14 @@ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem)
 	 * Prevent concurrent PCI bus scan from seeing the P2SB device and
 	 * removing via sysfs while it is temporarily exposed.
 	 */
-	pci_lock_rescan_remove();
+	pci_lock_bus_scan(bus);
 
 	/* Unhide the P2SB device, if needed */
 	pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value);
 	if (value & P2SBC_HIDE)
 		pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, 0);
 
-	pdev_p2sb = pci_scan_single_device(bus, devfn_p2sb);
+	pdev_p2sb = pci_scan_single_device_under_lock(bus, devfn_p2sb);
 	if (devfn)
 		ret = p2sb_scan_and_read(bus, devfn, mem);
 	else
@@ -131,7 +131,7 @@ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem)
 	if (value & P2SBC_HIDE)
 		pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, P2SBC_HIDE);
 
-	pci_unlock_rescan_remove();
+	pci_unlock_bus_scan(bus);
 
 	if (ret)
 		return ret;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 60ca768bc86..096f018038f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -678,6 +678,7 @@ struct pci_bus {
 	struct bin_attribute	*legacy_mem;	/* Legacy mem */
 	unsigned int		is_added:1;
 	unsigned int		unsafe_warn:1;	/* warned about RW1C config write */
+	struct mutex	scan_lock;	/* Serialize bus scan */
 };
 
 #define to_pci_bus(n)	container_of(n, struct pci_bus, dev)
@@ -1159,6 +1160,10 @@ static inline void pci_dev_assign_slot(struct pci_dev *dev) { }
 #endif
 int pci_scan_slot(struct pci_bus *bus, int devfn);
 struct pci_dev *pci_scan_single_device(struct pci_bus *bus, int devfn);
+void pci_lock_bus_scan(struct pci_bus *bus);
+void pci_unlock_bus_scan(struct pci_bus *bus);
+struct pci_dev *pci_scan_single_device_under_lock(struct pci_bus *bus,
+						  int devfn);
 void pci_device_add(struct pci_dev *dev, struct pci_bus *bus);
 unsigned int pci_scan_child_bus(struct pci_bus *bus);
 void pci_bus_add_device(struct pci_dev *dev);





[Index of Archives]     [Linux GPIO]     [Linux SPI]     [Linux Hardward Monitoring]     [LM Sensors]     [Linux USB Devel]     [Linux Media]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux