On 10/7/22 5:50 AM, Niklas Schnelle wrote: > Since commit fa7e9ecc5e1c ("iommu/s390: Tolerate repeat attach_dev > calls") we can end up with duplicates in the list of devices attached to > a domain. This is inefficient and confusing since only one domain can > actually be in control of the IOMMU translations for a device. Fix this > by detaching the device from the previous domain, if any, on attach. > Add a WARN_ON() in case we still have attached devices on freeing the > domain. While here remove the re-attach on failure dance as it was > determined to be unlikely to help and may confuse debug and recovery. > > Fixes: fa7e9ecc5e1c ("iommu/s390: Tolerate repeat attach_dev calls") > Signed-off-by: Niklas Schnelle <schnelle@xxxxxxxxxxxxx> Looks good to me now, thanks! Reviewed-by: Matthew Rosato <mjrosato@xxxxxxxxxxxxx> > --- > v5->v6: > - Only set zdev->dma_table once zpci_register_ioat() succeeded (Matt) > v4->v5: > - Unregister IOAT and set zdev->dma_table on error (Matt) > > drivers/iommu/s390-iommu.c | 106 ++++++++++++++++--------------------- > 1 file changed, 45 insertions(+), 61 deletions(-) > > diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c > index c898bcbbce11..96173cfee324 100644 > --- a/drivers/iommu/s390-iommu.c > +++ b/drivers/iommu/s390-iommu.c > @@ -79,10 +79,36 @@ static void s390_domain_free(struct iommu_domain *domain) > { > struct s390_domain *s390_domain = to_s390_domain(domain); > > + WARN_ON(!list_empty(&s390_domain->devices)); > dma_cleanup_tables(s390_domain->dma_table); > kfree(s390_domain); > } > > +static void __s390_iommu_detach_device(struct zpci_dev *zdev) > +{ > + struct s390_domain *s390_domain = zdev->s390_domain; > + struct s390_domain_device *domain_device, *tmp; > + unsigned long flags; > + > + if (!s390_domain) > + return; > + > + spin_lock_irqsave(&s390_domain->list_lock, flags); > + list_for_each_entry_safe(domain_device, tmp, &s390_domain->devices, > + list) { > + if (domain_device->zdev == zdev) { > + list_del(&domain_device->list); > + kfree(domain_device); > + break; > + } > + } > + spin_unlock_irqrestore(&s390_domain->list_lock, flags); > + > + zpci_unregister_ioat(zdev, 0); > + zdev->s390_domain = NULL; > + zdev->dma_table = NULL; > +} > + > static int s390_iommu_attach_device(struct iommu_domain *domain, > struct device *dev) > { > @@ -90,7 +116,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, > struct zpci_dev *zdev = to_zpci_dev(dev); > struct s390_domain_device *domain_device; > unsigned long flags; > - int cc, rc; > + int cc, rc = 0; > > if (!zdev) > return -ENODEV; > @@ -99,24 +125,18 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, > if (!domain_device) > return -ENOMEM; > > - if (zdev->dma_table && !zdev->s390_domain) { > - cc = zpci_dma_exit_device(zdev); > - if (cc) { > - rc = -EIO; > - goto out_free; > - } > - } > - > if (zdev->s390_domain) > - zpci_unregister_ioat(zdev, 0); > + __s390_iommu_detach_device(zdev); > + else if (zdev->dma_table) > + zpci_dma_exit_device(zdev); > > - zdev->dma_table = s390_domain->dma_table; > cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, > - virt_to_phys(zdev->dma_table)); > + virt_to_phys(s390_domain->dma_table)); > if (cc) { > rc = -EIO; > - goto out_restore; > + goto out_free; > } > + zdev->dma_table = s390_domain->dma_table; > > spin_lock_irqsave(&s390_domain->list_lock, flags); > /* First device defines the DMA range limits */ > @@ -127,9 +147,9 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, > /* Allow only devices with identical DMA range limits */ > } else if (domain->geometry.aperture_start != zdev->start_dma || > domain->geometry.aperture_end != zdev->end_dma) { > - rc = -EINVAL; > spin_unlock_irqrestore(&s390_domain->list_lock, flags); > - goto out_restore; > + rc = -EINVAL; > + goto out_unregister; > } > domain_device->zdev = zdev; > zdev->s390_domain = s390_domain; > @@ -138,14 +158,9 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, > > return 0; > > -out_restore: > - if (!zdev->s390_domain) { > - zpci_dma_init_device(zdev); > - } else { > - zdev->dma_table = zdev->s390_domain->dma_table; > - zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, > - virt_to_phys(zdev->dma_table)); > - } > +out_unregister: > + zpci_unregister_ioat(zdev, 0); > + zdev->dma_table = NULL; > out_free: > kfree(domain_device); > > @@ -155,32 +170,12 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, > static void s390_iommu_detach_device(struct iommu_domain *domain, > struct device *dev) > { > - struct s390_domain *s390_domain = to_s390_domain(domain); > struct zpci_dev *zdev = to_zpci_dev(dev); > - struct s390_domain_device *domain_device, *tmp; > - unsigned long flags; > - int found = 0; > > - if (!zdev) > - return; > + WARN_ON(zdev->s390_domain != to_s390_domain(domain)); > > - spin_lock_irqsave(&s390_domain->list_lock, flags); > - list_for_each_entry_safe(domain_device, tmp, &s390_domain->devices, > - list) { > - if (domain_device->zdev == zdev) { > - list_del(&domain_device->list); > - kfree(domain_device); > - found = 1; > - break; > - } > - } > - spin_unlock_irqrestore(&s390_domain->list_lock, flags); > - > - if (found && (zdev->s390_domain == s390_domain)) { > - zdev->s390_domain = NULL; > - zpci_unregister_ioat(zdev, 0); > - zpci_dma_init_device(zdev); > - } > + __s390_iommu_detach_device(zdev); > + zpci_dma_init_device(zdev); > } > > static struct iommu_device *s390_iommu_probe_device(struct device *dev) > @@ -193,24 +188,13 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev) > static void s390_iommu_release_device(struct device *dev) > { > struct zpci_dev *zdev = to_zpci_dev(dev); > - struct iommu_domain *domain; > > /* > - * This is a workaround for a scenario where the IOMMU API common code > - * "forgets" to call the detach_dev callback: After binding a device > - * to vfio-pci and completing the VFIO_SET_IOMMU ioctl (which triggers > - * the attach_dev), removing the device via > - * "echo 1 > /sys/bus/pci/devices/.../remove" won't trigger detach_dev, > - * only release_device will be called via the BUS_NOTIFY_REMOVED_DEVICE > - * notifier. > - * > - * So let's call detach_dev from here if it hasn't been called before. > + * release_device is expected to detach any domain currently attached > + * to the device, but keep it attached to other devices in the group. > */ > - if (zdev && zdev->s390_domain) { > - domain = iommu_get_domain_for_dev(dev); > - if (domain) > - s390_iommu_detach_device(domain, dev); > - } > + if (zdev) > + __s390_iommu_detach_device(zdev); > } > > static int s390_iommu_update_trans(struct s390_domain *s390_domain,