On Fri, Jun 02, 2023 at 05:15:14AM -0700, Yi Liu wrote: > This allows VFIO_DEVICE_GET_PCI_HOT_RESET_INFO ioctl use the iommufd_ctx > of the cdev device to check the ownership of the other affected devices. > > When VFIO_DEVICE_GET_PCI_HOT_RESET_INFO is called on an IOMMUFD managed > device, the new flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID is reported to indicate > the values returned are IOMMUFD devids rather than group IDs as used when > accessing vfio devices through the conventional vfio group interface. > Additionally the flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED will be reported > in this mode if all of the devices affected by the hot-reset are owned by > either virtue of being directly bound to the same iommufd context as the > calling device, or implicitly owned via a shared IOMMU group. > > Suggested-by: Jason Gunthorpe <jgg@xxxxxxxxxx> > Suggested-by: Alex Williamson <alex.williamson@xxxxxxxxxx> > Signed-off-by: Yi Liu <yi.l.liu@xxxxxxxxx> > --- > drivers/vfio/iommufd.c | 49 +++++++++++++++++++++++++++++++ > drivers/vfio/pci/vfio_pci_core.c | 47 +++++++++++++++++++++++++----- > include/linux/vfio.h | 16 ++++++++++ > include/uapi/linux/vfio.h | 50 +++++++++++++++++++++++++++++++- > 4 files changed, 154 insertions(+), 8 deletions(-) This could use some more fiddling, like we could copy each vfio_pci_dependent_device to user memory inside the loop instead of allocating an array. Add another patch with something like this in it: diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index b0eadafcbcf502..516e0fda74bec9 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -775,19 +775,23 @@ static int vfio_pci_count_devs(struct pci_dev *pdev, void *data) } struct vfio_pci_fill_info { - int max; - int cur; - struct vfio_pci_dependent_device *devices; + struct vfio_pci_dependent_device __user *devices; + struct vfio_pci_dependent_device __user *devices_end; struct vfio_device *vdev; u32 flags; }; static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data) { + struct vfio_pci_dependent_device info = { + .segment = pci_domain_nr(pdev->bus), + .bus = pdev->bus->number, + .devfn = pdev->devfn, + }; struct vfio_pci_fill_info *fill = data; - if (fill->cur == fill->max) - return -EAGAIN; /* Something changed, try again */ + if (fill->devices_end >= fill->devices) + return -ENOSPC; if (fill->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID) { struct iommufd_ctx *iommufd = vfio_iommufd_device_ictx(fill->vdev); @@ -800,12 +804,12 @@ static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data) */ vdev = vfio_find_device_in_devset(dev_set, &pdev->dev); if (!vdev) - fill->devices[fill->cur].devid = VFIO_PCI_DEVID_NOT_OWNED; + info.devid = VFIO_PCI_DEVID_NOT_OWNED; else - fill->devices[fill->cur].devid = - vfio_iommufd_device_hot_reset_devid(vdev, iommufd); + info.devid = vfio_iommufd_device_hot_reset_devid( + vdev, iommufd); /* If devid is VFIO_PCI_DEVID_NOT_OWNED, clear owned flag. */ - if (fill->devices[fill->cur].devid == VFIO_PCI_DEVID_NOT_OWNED) + if (info.devid == VFIO_PCI_DEVID_NOT_OWNED) fill->flags &= ~VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED; } else { struct iommu_group *iommu_group; @@ -814,13 +818,13 @@ static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data) if (!iommu_group) return -EPERM; /* Cannot reset non-isolated devices */ - fill->devices[fill->cur].group_id = iommu_group_id(iommu_group); + info.group_id = iommu_group_id(iommu_group); iommu_group_put(iommu_group); } - fill->devices[fill->cur].segment = pci_domain_nr(pdev->bus); - fill->devices[fill->cur].bus = pdev->bus->number; - fill->devices[fill->cur].devfn = pdev->devfn; - fill->cur++; + + if (copy_to_user(fill->devices, &info, sizeof(info))) + return -EFAULT; + fill->devices++; return 0; } @@ -1212,8 +1216,7 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info( unsigned long minsz = offsetofend(struct vfio_pci_hot_reset_info, count); struct vfio_pci_hot_reset_info hdr; - struct vfio_pci_fill_info fill = { 0 }; - struct vfio_pci_dependent_device *devices = NULL; + struct vfio_pci_fill_info fill = {}; bool slot = false; int ret = 0; @@ -1231,29 +1234,9 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info( else if (pci_probe_reset_bus(vdev->pdev->bus)) return -ENODEV; - /* How many devices are affected? */ - ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_count_devs, - &fill.max, slot); - if (ret) - return ret; - - WARN_ON(!fill.max); /* Should always be at least one */ - - /* - * If there's enough space, fill it now, otherwise return -ENOSPC and - * the number of devices affected. - */ - if (hdr.argsz < sizeof(hdr) + (fill.max * sizeof(*devices))) { - ret = -ENOSPC; - hdr.count = fill.max; - goto reset_info_exit; - } - - devices = kcalloc(fill.max, sizeof(*devices), GFP_KERNEL); - if (!devices) - return -ENOMEM; - - fill.devices = devices; + fill.devices = arg->devices; + fill.devices_end = arg->devices + + (hdr.argsz - sizeof(hdr)) / sizeof(arg->devices[0]); fill.vdev = &vdev->vdev; if (vfio_device_cdev_opened(&vdev->vdev)) @@ -1264,29 +1247,14 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info( ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_fill_devs, &fill, slot); mutex_unlock(&vdev->vdev.dev_set->lock); + if (ret) + return ret; - /* - * If a device was removed between counting and filling, we may come up - * short of fill.max. If a device was added, we'll have a return of - * -EAGAIN above. - */ - if (!ret) { - hdr.count = fill.cur; - hdr.flags = fill.flags; - } - -reset_info_exit: + hdr.count = fill.devices - arg->devices; + hdr.flags = fill.flags; if (copy_to_user(arg, &hdr, minsz)) ret = -EFAULT; - - if (!ret) { - if (copy_to_user(&arg->devices, devices, - hdr.count * sizeof(*devices))) - ret = -EFAULT; - } - - kfree(devices); - return ret; + return 0; } static int