On Wed, 14 Jul 2021 21:20:38 -0300 Jason Gunthorpe <jgg@xxxxxxxxxx> wrote: > +/* > + * We need to get memory_lock for each device, but devices can share mmap_lock, > + * therefore we need to zap and hold the vma_lock for each device, and only then > + * get each memory_lock. > + */ > +static int vfio_hot_reset_device_set(struct vfio_pci_device *vdev, > + struct vfio_pci_group_info *groups) > +{ > + struct vfio_device_set *dev_set = vdev->vdev.dev_set; > + struct vfio_pci_device *cur_mem = > + list_first_entry(&dev_set->device_list, struct vfio_pci_device, > + vdev.dev_set_list); We shouldn't be looking at the list outside of the lock, if the first entry got removed we'd break our unwind code. > + struct vfio_pci_device *cur_vma; > + struct vfio_pci_device *cur; > + bool is_mem = true; > + int ret; > > - if (pci_dev_driver(pdev) != &vfio_pci_driver) { > - vfio_device_put(device); > - return -EBUSY; > + mutex_lock(&dev_set->lock); ^^^^^^^^^^^^^^^^^^^^^^^^^^^ > + > + /* All devices in the group to be reset need VFIO devices */ > + if (vfio_pci_for_each_slot_or_bus( > + vdev->pdev, vfio_pci_check_all_devices_bound, dev_set, > + !pci_probe_reset_slot(vdev->pdev->slot))) { > + ret = -EINVAL; > + goto err_unlock; > } > > - vdev = container_of(device, struct vfio_pci_device, vdev); > + list_for_each_entry(cur_vma, &dev_set->device_list, vdev.dev_set_list) { > + /* > + * Test whether all the affected devices are contained by the > + * set of groups provided by the user. > + */ > + if (!vfio_dev_in_groups(cur_vma, groups)) { > + ret = -EINVAL; > + goto err_undo; > + } > > - /* > - * Locking multiple devices is prone to deadlock, runaway and > - * unwind if we hit contention. > - */ > - if (!vfio_pci_zap_and_vma_lock(vdev, true)) { > - vfio_device_put(device); > - return -EBUSY; > + /* > + * Locking multiple devices is prone to deadlock, runaway and > + * unwind if we hit contention. > + */ > + if (!vfio_pci_zap_and_vma_lock(cur_vma, true)) { > + ret = -EBUSY; > + goto err_undo; > + } > } > > - devs->devices[devs->cur_index++] = vdev; > - return 0; > + list_for_each_entry(cur_mem, &dev_set->device_list, vdev.dev_set_list) { > + if (!down_write_trylock(&cur_mem->memory_lock)) { > + ret = -EBUSY; > + goto err_undo; > + } > + mutex_unlock(&cur_mem->vma_lock); > + } > + > + ret = pci_reset_bus(vdev->pdev); > + > + list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) > + up_write(&cur->memory_lock); > + mutex_unlock(&dev_set->lock); > + > + return ret; Isn't the above section actually redundant to below, ie. we could just fall through after the pci_reset_bus()? Thanks, Alex > + > +err_undo: > + list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) { > + if (cur == cur_mem) > + is_mem = false; > + if (cur == cur_vma) > + break; > + if (is_mem) > + up_write(&cur->memory_lock); > + else > + mutex_unlock(&cur->vma_lock); > + } > +err_unlock: > + mutex_unlock(&dev_set->lock); > + return ret; > } > > /*