On Saturday, November 30, 2013 12:45:55 AM Rafael J. Wysocki wrote: > On Saturday, November 30, 2013 12:38:26 AM Rafael J. Wysocki wrote: > > On Tuesday, November 26, 2013 06:26:54 PM Yinghai Lu wrote: > > > On Tue, Nov 26, 2013 at 5:24 PM, Rafael J. Wysocki <rjw@xxxxxxxxxxxxx> wrote: > > > > > > > > So assume pci_destroy_dev() is called twice in parallel for the same dev > > > > by two different threads. Thread 1 does the atomic_inc_and_test() and > > > > finds that it is OK to do the device_del() and put_device() which causes > > > > the device object to be freed. Then thread 2 does the atomic_inc_and_test() > > > > on the already freed device object and crashes the kernel. > > > > > > > thread2 should still hold one extra reference. > > > that is in > > > device_schedule_callback > > > ==> sysfs_schedule_callback > > > ==> kobject_get(kobj) > > > > > > pci_destroy_dev for thread2 is called at this point. > > > > > > and that reference will be released from > > > sysfs_schedule_callback > > > ==> kobject_put()... > > > > Well, that would be the case if thread 2 was started by device_schedule_callback(), > > but again, for example, it may be trim_stale_devices() started by acpiphp_check_bridge() > > that doesn't hold extra references to the pci_dev. [Well, that piece of code > > is racy anyway, because it walks bus->devices without locking. Which is my > > fault too, because I overlooked that. Shame, shame.] > > > > Perhaps we can do something like the (untested) patch below (in addition to the > > $subject patch). Do you see any immediate problems with it? > > Ah, I see one. It will break pci_stop_bus_device() and pci_remove_bus_device(). > So much for being clever. > > Moreover, it looks like those two routines above are racy too for the same > reason? The (still untested) patch below is what I have come up with for now. The is_gone flag is now only operated under pci_remove_rescan_mutex, so it need not be atomic. Of course, whoever calls pci_stop_and_remove_bus_device() (the "locked" one) should hold a ref to the device being removed to avoid use-after-free (the callers need to be audited for that). Well, I probably still missed something, because it's the middle of the night and I should be going to sleep instead of starig at the PCI removal code. Sigh. Thanks, Rafael --- drivers/pci/hotplug/acpiphp_glue.c | 15 ++++++++++++--- drivers/pci/pci-sysfs.c | 17 ++++++++++++----- drivers/pci/pci.h | 3 +++ drivers/pci/remove.c | 15 +++++++++++++-- include/linux/pci.h | 1 + 5 files changed, 41 insertions(+), 10 deletions(-) Index: linux-pm/drivers/pci/hotplug/acpiphp_glue.c =================================================================== --- linux-pm.orig/drivers/pci/hotplug/acpiphp_glue.c +++ linux-pm/drivers/pci/hotplug/acpiphp_glue.c @@ -553,6 +553,8 @@ static void __ref enable_slot(struct acp int max, pass; LIST_HEAD(add_list); + lock_pci_remove_rescan(); + acpiphp_rescan_slot(slot); max = acpiphp_max_busnr(bus); for (pass = 0; pass < 2; pass++) { @@ -586,6 +588,8 @@ static void __ref enable_slot(struct acp pci_bus_add_devices(bus); + unlock_pci_remove_rescan(); + slot->flags |= SLOT_ENABLED; list_for_each_entry(func, &slot->funcs, sibling) { dev = pci_get_slot(bus, PCI_DEVFN(slot->device, @@ -626,6 +630,7 @@ static void disable_slot(struct acpiphp_ struct acpiphp_func *func; struct pci_dev *pdev; + lock_pci_remove_rescan(); /* * enable_slot() enumerates all functions in this device via * pci_scan_slot(), whether they have associated ACPI hotplug @@ -633,9 +638,10 @@ static void disable_slot(struct acpiphp_ * here. */ while ((pdev = dev_in_slot(slot))) { - pci_stop_and_remove_bus_device(pdev); + __pci_stop_and_remove_bus_device(pdev); pci_dev_put(pdev); } + unlock_pci_remove_rescan(); list_for_each_entry(func, &slot->funcs, sibling) acpiphp_bus_trim(func_to_handle(func)); @@ -710,7 +716,7 @@ static void trim_stale_devices(struct pc alive = pci_bus_read_dev_vendor_id(dev->bus, dev->devfn, &v, 0); } if (!alive) { - pci_stop_and_remove_bus_device(dev); + __pci_stop_and_remove_bus_device(dev); if (handle) acpiphp_bus_trim(handle); } else if (bus) { @@ -743,12 +749,15 @@ static void acpiphp_check_bridge(struct mutex_lock(&slot->crit_sect); /* wake up all functions */ if (get_slot_status(slot) == ACPI_STA_ALL) { + lock_pci_remove_rescan(); + /* remove stale devices if any */ list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) if (PCI_SLOT(dev->devfn) == slot->device) trim_stale_devices(dev); + unlock_pci_remove_rescan(); /* configure all functions */ enable_slot(slot); } else { @@ -783,7 +792,7 @@ static void acpiphp_sanitize_bus(struct res->end) { /* Could not assign a required resources * for this device, remove it */ - pci_stop_and_remove_bus_device(dev); + __pci_stop_and_remove_bus_device(dev); break; } } Index: linux-pm/drivers/pci/pci-sysfs.c =================================================================== --- linux-pm.orig/drivers/pci/pci-sysfs.c +++ linux-pm/drivers/pci/pci-sysfs.c @@ -298,6 +298,17 @@ msi_bus_store(struct device *dev, struct static DEVICE_ATTR_RW(msi_bus); static DEFINE_MUTEX(pci_remove_rescan_mutex); + +void lock_pci_remove_rescan(void) +{ + mutex_lock(&pci_remove_rescan_mutex); +} + +void unlock_pci_remove_rescan(void) +{ + mutex_unlock(&pci_remove_rescan_mutex); +} + static ssize_t bus_rescan_store(struct bus_type *bus, const char *buf, size_t count) { @@ -354,11 +365,7 @@ static struct device_attribute dev_resca static void remove_callback(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); - - mutex_lock(&pci_remove_rescan_mutex); - pci_stop_and_remove_bus_device(pdev); - mutex_unlock(&pci_remove_rescan_mutex); + pci_stop_and_remove_bus_device(to_pci_dev(dev)); } static ssize_t Index: linux-pm/drivers/pci/pci.h =================================================================== --- linux-pm.orig/drivers/pci/pci.h +++ linux-pm/drivers/pci/pci.h @@ -11,8 +11,11 @@ extern const unsigned char pcie_link_spe /* Functions internal to the PCI core code */ +void lock_pci_remove_rescan(void); +void unlock_pci_remove_rescan(void); int pci_create_sysfs_dev_files(struct pci_dev *pdev); void pci_remove_sysfs_dev_files(struct pci_dev *pdev); +void __pci_stop_and_remove_bus_device(struct pci_dev *pdev); #if !defined(CONFIG_DMI) && !defined(CONFIG_ACPI) static inline void pci_create_firmware_label_files(struct pci_dev *pdev) { return; } Index: linux-pm/drivers/pci/remove.c =================================================================== --- linux-pm.orig/drivers/pci/remove.c +++ linux-pm/drivers/pci/remove.c @@ -34,6 +34,10 @@ static void pci_stop_dev(struct pci_dev static void pci_destroy_dev(struct pci_dev *dev) { + if (dev->is_gone) + return; + + dev->is_gone = 1; device_del(&dev->dev); down_write(&pci_bus_sem); @@ -95,6 +99,12 @@ static void pci_remove_bus_device(struct pci_destroy_dev(dev); } +void __pci_stop_and_remove_bus_device(struct pci_dev *dev) +{ + pci_stop_bus_device(dev); + pci_remove_bus_device(dev); +} + /** * pci_stop_and_remove_bus_device - remove a PCI device and any children * @dev: the device to remove @@ -109,8 +119,9 @@ static void pci_remove_bus_device(struct */ void pci_stop_and_remove_bus_device(struct pci_dev *dev) { - pci_stop_bus_device(dev); - pci_remove_bus_device(dev); + lock_pci_remove_rescan(); + __pci_stop_and_remove_bus_device(dev); + unlock_pci_remove_rescan(); } EXPORT_SYMBOL(pci_stop_and_remove_bus_device); Index: linux-pm/include/linux/pci.h =================================================================== --- linux-pm.orig/include/linux/pci.h +++ linux-pm/include/linux/pci.h @@ -321,6 +321,7 @@ struct pci_dev { unsigned int multifunction:1;/* Part of multi-function device */ /* keep track of device state */ unsigned int is_added:1; + unsigned int is_gone:1; unsigned int is_busmaster:1; /* device is busmaster */ unsigned int no_msi:1; /* device may not use msi */ unsigned int block_cfg_access:1; /* config space access is blocked */ -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html