Hi Yi, On 4/1/23 16:44, Yi Liu wrote: > for the users that accept device fds passed from management stacks to be > able to figure out the host reset affected devices among the devices > opened by the user. This is needed as such users do not have BDF (bus, > devfn) knowledge about the devices it has opened, hence unable to use > the information reported by existing VFIO_DEVICE_GET_PCI_HOT_RESET_INFO > to figure out the affected devices. > > Signed-off-by: Yi Liu <yi.l.liu@xxxxxxxxx> > --- > drivers/vfio/pci/vfio_pci_core.c | 58 ++++++++++++++++++++++++++++---- > include/uapi/linux/vfio.h | 24 ++++++++++++- > 2 files changed, 74 insertions(+), 8 deletions(-) > > diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c > index 19f5b075d70a..a5a7e148dce1 100644 > --- a/drivers/vfio/pci/vfio_pci_core.c > +++ b/drivers/vfio/pci/vfio_pci_core.c > @@ -30,6 +30,7 @@ > #if IS_ENABLED(CONFIG_EEH) > #include <asm/eeh.h> > #endif > +#include <uapi/linux/iommufd.h> > > #include "vfio_pci_priv.h" > > @@ -767,6 +768,20 @@ static int vfio_pci_get_irq_count(struct vfio_pci_core_device *vdev, int irq_typ > return 0; > } > > +static struct vfio_device * > +vfio_pci_find_device_in_devset(struct vfio_device_set *dev_set, > + struct pci_dev *pdev) > +{ > + struct vfio_device *cur; > + > + lockdep_assert_held(&dev_set->lock); > + > + list_for_each_entry(cur, &dev_set->device_list, dev_set_list) > + if (cur->dev == &pdev->dev) > + return cur; > + return NULL; > +} > + > static int vfio_pci_count_devs(struct pci_dev *pdev, void *data) > { > (*(int *)data)++; > @@ -776,13 +791,20 @@ static int vfio_pci_count_devs(struct pci_dev *pdev, void *data) > struct vfio_pci_fill_info { > int max; > int cur; > + bool require_devid; > + struct iommufd_ctx *iommufd; > + struct vfio_device_set *dev_set; > struct vfio_pci_dependent_device *devices; > }; > > static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data) > { > struct vfio_pci_fill_info *fill = data; > + struct vfio_device_set *dev_set = fill->dev_set; > struct iommu_group *iommu_group; > + struct vfio_device *vdev; > + > + lockdep_assert_held(&dev_set->lock); > > if (fill->cur == fill->max) > return -EAGAIN; /* Something changed, try again */ > @@ -791,7 +813,21 @@ static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data) > if (!iommu_group) > return -EPERM; /* Cannot reset non-isolated devices */ > > - fill->devices[fill->cur].group_id = iommu_group_id(iommu_group); > + if (fill->require_devid) { > + /* > + * Report dev_id of the devices that are opened as cdev > + * and have the same iommufd with the fill->iommufd. > + * Otherwise, just fill IOMMUFD_INVALID_ID. > + */ > + vdev = vfio_pci_find_device_in_devset(dev_set, pdev); > + if (vdev && vfio_device_cdev_opened(vdev) && > + fill->iommufd == vfio_iommufd_physical_ictx(vdev)) > + vfio_iommufd_physical_devid(vdev, &fill->devices[fill->cur].dev_id); > + else > + fill->devices[fill->cur].dev_id = IOMMUFD_INVALID_ID; > + } else { > + fill->devices[fill->cur].group_id = iommu_group_id(iommu_group); > + } > fill->devices[fill->cur].segment = pci_domain_nr(pdev->bus); > fill->devices[fill->cur].bus = pdev->bus->number; > fill->devices[fill->cur].devfn = pdev->devfn; > @@ -1230,17 +1266,27 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info( > return -ENOMEM; > > fill.devices = devices; > + fill.dev_set = vdev->vdev.dev_set; > > + mutex_lock(&vdev->vdev.dev_set->lock); > + if (vfio_device_cdev_opened(&vdev->vdev)) { > + fill.require_devid = true; > + fill.iommufd = vfio_iommufd_physical_ictx(&vdev->vdev); > + } > ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_fill_devs, > &fill, slot); > + mutex_unlock(&vdev->vdev.dev_set->lock); > > /* > * If a device was removed between counting and filling, we may come up > * short of fill.max. If a device was added, we'll have a return of > * -EAGAIN above. > */ > - if (!ret) > + if (!ret) { > hdr.count = fill.cur; > + if (fill.require_devid) > + hdr.flags = VFIO_PCI_HOT_RESET_FLAG_IOMMUFD_DEV_ID; > + } > > reset_info_exit: > if (copy_to_user(arg, &hdr, minsz)) > @@ -2346,12 +2392,10 @@ static bool vfio_dev_in_files(struct vfio_pci_core_device *vdev, > static int vfio_pci_is_device_in_set(struct pci_dev *pdev, void *data) > { > struct vfio_device_set *dev_set = data; > - struct vfio_device *cur; > > - list_for_each_entry(cur, &dev_set->device_list, dev_set_list) > - if (cur->dev == &pdev->dev) > - return 0; > - return -EBUSY; > + lockdep_assert_held(&dev_set->lock); > + > + return vfio_pci_find_device_in_devset(dev_set, pdev) ? 0 : -EBUSY; > } > > /* > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h > index 25432ef213ee..5a34364e3b94 100644 > --- a/include/uapi/linux/vfio.h > +++ b/include/uapi/linux/vfio.h > @@ -650,11 +650,32 @@ enum { > * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 12, > * struct vfio_pci_hot_reset_info) > * > + * This command is used to query the affected devices in the hot reset for > + * a given device. User could use the information reported by this command > + * to figure out the affected devices among the devices it has opened. > + * This command always reports the segment, bus and devfn information for > + * each affected device, and selectively report the group_id or the dev_id > + * per the way how the device being queried is opened. > + * - If the device is opened via the traditional group/container manner, > + * this command reports the group_id for each affected device. > + * > + * - If the device is opened as a cdev, this command needs to report s/needs to report/reports > + * dev_id for each affected device and set the > + * VFIO_PCI_HOT_RESET_FLAG_IOMMUFD_DEV_ID flag. For the affected > + * devices that are not opened as cdev or bound to different iommufds > + * with the device that is queried, report an invalid dev_id to avoid s/bound to different iommufds with the device that is queried/bound to iommufds different from the reset device one? > + * potential dev_id conflict as dev_id is local to iommufd. For such > + * affected devices, user shall fall back to use the segment, bus and > + * devfn info to map it to opened device. > + * > * Return: 0 on success, -errno on failure: > * -enospc = insufficient buffer, -enodev = unsupported for device. > */ > struct vfio_pci_dependent_device { > - __u32 group_id; > + union { > + __u32 group_id; > + __u32 dev_id; > + }; > __u16 segment; > __u8 bus; > __u8 devfn; /* Use PCI_SLOT/PCI_FUNC */ > @@ -663,6 +684,7 @@ struct vfio_pci_dependent_device { > struct vfio_pci_hot_reset_info { > __u32 argsz; > __u32 flags; > +#define VFIO_PCI_HOT_RESET_FLAG_IOMMUFD_DEV_ID (1 << 0) > __u32 count; > struct vfio_pci_dependent_device devices[]; > }; Eric