4.15-stable review patch. If anyone has any objections, please let me know. ------------------ From: Dexuan Cui <decui@xxxxxxxxxxxxx> commit 021ad274d7dc31611d4f47f7dd4ac7a224526f30 upstream. When we hot-remove the device, we first receive a PCI_EJECT message and then receive a PCI_BUS_RELATIONS message with bus_rel->device_count == 0. The first message is offloaded to hv_eject_device_work(), and the second is offloaded to pci_devices_present_work(). Both the paths can be running list_del(&hpdev->list_entry), causing general protection fault, because system_wq can run them concurrently. The patch eliminates the race condition. Since access to present/eject work items is serialized, we do not need the hbus->enum_sem anymore, so remove it. Fixes: 4daace0d8ce8 ("PCI: hv: Add paravirtual PCI front-end for Microsoft Hyper-V VMs") Link: https://lkml.kernel.org/r/KL1P15301MB00064DA6B4D221123B5241CFBFD70@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx Tested-by: Adrian Suhov <v-adsuho@xxxxxxxxxxxxx> Tested-by: Chris Valean <v-chvale@xxxxxxxxxxxxx> Signed-off-by: Dexuan Cui <decui@xxxxxxxxxxxxx> [lorenzo.pieralisi@xxxxxxx: squashed semaphore removal patch] Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@xxxxxxx> Reviewed-by: Michael Kelley <mikelley@xxxxxxxxxxxxx> Acked-by: Haiyang Zhang <haiyangz@xxxxxxxxxxxxx> Cc: <stable@xxxxxxxxxxxxxxx> # v4.6+ Cc: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> Cc: Jack Morgenstein <jackm@xxxxxxxxxxxx> Cc: Stephen Hemminger <sthemmin@xxxxxxxxxxxxx> Cc: K. Y. Srinivasan <kys@xxxxxxxxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- drivers/pci/host/pci-hyperv.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) --- a/drivers/pci/host/pci-hyperv.c +++ b/drivers/pci/host/pci-hyperv.c @@ -457,7 +457,6 @@ struct hv_pcibus_device { spinlock_t device_list_lock; /* Protect lists below */ void __iomem *cfg_addr; - struct semaphore enum_sem; struct list_head resources_for_children; struct list_head children; @@ -471,6 +470,8 @@ struct hv_pcibus_device { struct retarget_msi_interrupt retarget_msi_interrupt_params; spinlock_t retarget_msi_interrupt_lock; + + struct workqueue_struct *wq; }; /* @@ -1600,12 +1601,8 @@ static struct hv_pci_dev *get_pcichild_w * It must also treat the omission of a previously observed device as * notification that the device no longer exists. * - * Note that this function is a work item, and it may not be - * invoked in the order that it was queued. Back to back - * updates of the list of present devices may involve queuing - * multiple work items, and this one may run before ones that - * were sent later. As such, this function only does something - * if is the last one in the queue. + * Note that this function is serialized with hv_eject_device_work(), + * because both are pushed to the ordered workqueue hbus->wq. */ static void pci_devices_present_work(struct work_struct *work) { @@ -1626,11 +1623,6 @@ static void pci_devices_present_work(str INIT_LIST_HEAD(&removed); - if (down_interruptible(&hbus->enum_sem)) { - put_hvpcibus(hbus); - return; - } - /* Pull this off the queue and process it if it was the last one. */ spin_lock_irqsave(&hbus->device_list_lock, flags); while (!list_empty(&hbus->dr_list)) { @@ -1647,7 +1639,6 @@ static void pci_devices_present_work(str spin_unlock_irqrestore(&hbus->device_list_lock, flags); if (!dr) { - up(&hbus->enum_sem); put_hvpcibus(hbus); return; } @@ -1734,7 +1725,6 @@ static void pci_devices_present_work(str break; } - up(&hbus->enum_sem); put_hvpcibus(hbus); kfree(dr); } @@ -1780,7 +1770,7 @@ static void hv_pci_devices_present(struc spin_unlock_irqrestore(&hbus->device_list_lock, flags); get_hvpcibus(hbus); - schedule_work(&dr_wrk->wrk); + queue_work(hbus->wq, &dr_wrk->wrk); } /** @@ -1858,7 +1848,7 @@ static void hv_pci_eject_device(struct h get_pcichild(hpdev, hv_pcidev_ref_pnp); INIT_WORK(&hpdev->wrk, hv_eject_device_work); get_hvpcibus(hpdev->hbus); - schedule_work(&hpdev->wrk); + queue_work(hpdev->hbus->wq, &hpdev->wrk); } /** @@ -2471,13 +2461,18 @@ static int hv_pci_probe(struct hv_device spin_lock_init(&hbus->config_lock); spin_lock_init(&hbus->device_list_lock); spin_lock_init(&hbus->retarget_msi_interrupt_lock); - sema_init(&hbus->enum_sem, 1); init_completion(&hbus->remove_event); + hbus->wq = alloc_ordered_workqueue("hv_pci_%x", 0, + hbus->sysdata.domain); + if (!hbus->wq) { + ret = -ENOMEM; + goto free_bus; + } ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0, hv_pci_onchannelcallback, hbus); if (ret) - goto free_bus; + goto destroy_wq; hv_set_drvdata(hdev, hbus); @@ -2546,6 +2541,8 @@ free_config: hv_free_config_window(hbus); close: vmbus_close(hdev->channel); +destroy_wq: + destroy_workqueue(hbus->wq); free_bus: free_page((unsigned long)hbus); return ret; @@ -2625,6 +2622,7 @@ static int hv_pci_remove(struct hv_devic irq_domain_free_fwnode(hbus->sysdata.fwnode); put_hvpcibus(hbus); wait_for_completion(&hbus->remove_event); + destroy_workqueue(hbus->wq); free_page((unsigned long)hbus); return 0; }