> From: Alex Williamson <alex.williamson@xxxxxxxxxx> > Sent: Friday, April 28, 2023 5:55 AM > > On Wed, 26 Apr 2023 07:54:19 -0700 > Yi Liu <yi.l.liu@xxxxxxxxx> wrote: > > > This is the way user to invoke hot-reset for the devices opened by cdev > > interface. User should check the flag VFIO_PCI_HOT_RESET_FLAG_RESETTABLE > > in the output of VFIO_DEVICE_GET_PCI_HOT_RESET_INFO ioctl before doing > > hot-reset for cdev devices. > > > > Suggested-by: Jason Gunthorpe <jgg@xxxxxxxxxx> > > Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx> > > Reviewed-by: Jason Gunthorpe <jgg@xxxxxxxxxx> > > Tested-by: Yanting Jiang <yanting.jiang@xxxxxxxxx> > > Signed-off-by: Yi Liu <yi.l.liu@xxxxxxxxx> > > --- > > drivers/vfio/pci/vfio_pci_core.c | 66 +++++++++++++++++++++++++++----- > > include/uapi/linux/vfio.h | 22 +++++++++++ > > 2 files changed, 79 insertions(+), 9 deletions(-) > > > > diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c > > index 43858d471447..f70e3b948b16 100644 > > --- a/drivers/vfio/pci/vfio_pci_core.c > > +++ b/drivers/vfio/pci/vfio_pci_core.c > > @@ -180,7 +180,8 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_core_device > *vdev) > > struct vfio_pci_group_info; > > static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set); > > static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, > > - struct vfio_pci_group_info *groups); > > + struct vfio_pci_group_info *groups, > > + struct iommufd_ctx *iommufd_ctx); > > > > /* > > * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND > > @@ -1364,8 +1365,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct > vfio_pci_core_device *vdev, > > if (ret) > > return ret; > > > > - /* Somewhere between 1 and count is OK */ > > - if (!array_count || array_count > count) > > + if (array_count > count) > > return -EINVAL; > > Doesn't this need a || vfio_device_cdev_opened(vdev) test as well? > It's invalid to pass fds for a cdev device. Presumably it would fail > later collecting group fds as well, but might as well enforce the > semantics early. Yes, it is. > > > > > group_fds = kcalloc(array_count, sizeof(*group_fds), GFP_KERNEL); > > @@ -1414,7 +1414,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct > vfio_pci_core_device *vdev, > > info.count = array_count; > > info.files = files; > > > > - ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info); > > + ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info, NULL); > > > > hot_reset_release: > > for (file_idx--; file_idx >= 0; file_idx--) > > @@ -1429,6 +1429,7 @@ static int vfio_pci_ioctl_pci_hot_reset(struct > vfio_pci_core_device *vdev, > > { > > unsigned long minsz = offsetofend(struct vfio_pci_hot_reset, count); > > struct vfio_pci_hot_reset hdr; > > + struct iommufd_ctx *iommufd; > > bool slot = false; > > > > if (copy_from_user(&hdr, arg, minsz)) > > @@ -1443,7 +1444,12 @@ static int vfio_pci_ioctl_pci_hot_reset(struct > vfio_pci_core_device *vdev, > > else if (pci_probe_reset_bus(vdev->pdev->bus)) > > return -ENODEV; > > > > - return vfio_pci_ioctl_pci_hot_reset_groups(vdev, hdr.count, slot, arg); > > + if (hdr.count) > > + return vfio_pci_ioctl_pci_hot_reset_groups(vdev, hdr.count, slot, arg); > > + > > + iommufd = vfio_iommufd_physical_ictx(&vdev->vdev); > > + > > + return vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, NULL, iommufd); > > Why did we need to store iommufd in a variable? will remove it. > > } > > > > static int vfio_pci_ioctl_ioeventfd(struct vfio_pci_core_device *vdev, > > @@ -2415,6 +2421,9 @@ static bool vfio_dev_in_groups(struct vfio_pci_core_device > *vdev, > > { > > unsigned int i; > > > > + if (!groups) > > + return false; > > + > > for (i = 0; i < groups->count; i++) > > if (vfio_file_has_dev(groups->files[i], &vdev->vdev)) > > return true; > > @@ -2488,13 +2497,38 @@ static int vfio_pci_dev_set_pm_runtime_get(struct > vfio_device_set *dev_set) > > return ret; > > } > > > > +static bool vfio_dev_in_iommufd_ctx(struct vfio_pci_core_device *vdev, > > + struct iommufd_ctx *iommufd_ctx) > > +{ > > + struct iommufd_ctx *iommufd = vfio_iommufd_physical_ictx(&vdev->vdev); > > + struct iommu_group *iommu_group; > > + > > + if (!iommufd_ctx) > > + return false; > > + > > + if (iommufd == iommufd_ctx) > > + return true; > > + > > + iommu_group = iommu_group_get(vdev->vdev.dev); > > + if (!iommu_group) > > + return false; > > + > > + /* > > + * Try to check if any device within iommu_group is bound with > > + * the input iommufd_ctx. > > + */ > > + return vfio_devset_iommufd_has_group(vdev->vdev.dev_set, > > + iommufd_ctx, iommu_group); > > +} > > This last test makes this not do what the function name suggests it > does. If it were true, the device is not in the iommufd_ctx, it simply > cannot be within another iommu ctx. Yes. it actually means not possible to be in another iommufd_ctx. > > > + > > /* > > * We need to get memory_lock for each device, but devices can share mmap_lock, > > * therefore we need to zap and hold the vma_lock for each device, and only then > > * get each memory_lock. > > */ > > static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, > > - struct vfio_pci_group_info *groups) > > + struct vfio_pci_group_info *groups, > > + struct iommufd_ctx *iommufd_ctx) > > { > > struct vfio_pci_core_device *cur_mem; > > struct vfio_pci_core_device *cur_vma; > > @@ -2525,10 +2559,24 @@ static int vfio_pci_dev_set_hot_reset(struct > vfio_device_set *dev_set, > > > > list_for_each_entry(cur_vma, &dev_set->device_list, vdev.dev_set_list) { > > /* > > - * Test whether all the affected devices are contained by the > > - * set of groups provided by the user. > > + * Test whether all the affected devices can be reset by the > > + * user. > > + * > > + * If user provides a set of groups, all the opened devices > > + * in the dev_set should be contained by the set of groups > > + * provided by the user. > > + * > > + * If user provides a zero-length group fd array, then all > > + * the affected devices must be bound to same iommufd_ctx as > > + * the input iommufd_ctx. If there is device that has not > > + * been bound to iommufd_ctx yet, shall check if there is any > > + * device within its iommu_group that has been bound to the > > + * input iommufd_ctx. > > + * > > + * Otherwise, reset is not allowed. > > */ > > - if (!vfio_dev_in_groups(cur_vma, groups)) { > > + if (!vfio_dev_in_groups(cur_vma, groups) && > > + !vfio_dev_in_iommufd_ctx(cur_vma, iommufd_ctx)) { > > > Rather than mangling vfio_dev_in_groups() and inventing > vfio_dev_in_iommufd_ctx() that doesn't do what it implies, how about: > > bool vfio_device_owned(struct vfio_device *vdev, > struct vfio_pci_group_info *groups, > struct iommufd_ctx *iommufd_ctx) > { > struct iommu_group *group; > > WARN_ON(!!groups == !!iommufd_ctx); > > if (groups) > return vfio_dev_in_groups(vdev, groups)); > > if (vfio_iommufd_physical_ictx(vdev) == iommufd_ctx) > return true; > > group = iommu_group_get(vdev->dev); > if (group) > return vfio_devset_iommufd_has_group(vdev->vdev.dev_set, > iommufd_ctx, group); > return false; > } Will follow above suggestion. > Seems like such a function would live in vfio_main.c It may require to make the struct vfio_pci_group_info visible outside of vfio-pci. This seems to be strange to make vfio_main.c to refer pci specific structure. > > > ret = -EINVAL; > > goto err_undo; > > } > > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h > > index 4b4e2c28984b..1241d02d8701 100644 > > --- a/include/uapi/linux/vfio.h > > +++ b/include/uapi/linux/vfio.h > > @@ -710,6 +710,28 @@ struct vfio_pci_hot_reset_info { > > * VFIO_DEVICE_PCI_HOT_RESET - _IOW(VFIO_TYPE, VFIO_BASE + 13, > > * struct vfio_pci_hot_reset) > > * > > + * Userspace requests hot reset for the devices it operates. Due to the > > + * underlying topology, multiple devices can be affected in the reset > > + * while some might be opened by another user. To avoid interference > > + * the calling user must ensure all affected devices are owned by itself. > > + * The ownership proof needs to refer the output of > > + * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO. Ownership can be proved as: > > + * > > + * 1) An array of group fds - This is used for the devices opened via > > + * the group/container interface. > > + * 2) A zero-length array - This is used for the devices opened via > > + * the cdev interface. User should check the > > + * flag VFIO_PCI_HOT_RESET_FLAG_IOMMUFD_DEV_ID > > + * and flag VFIO_PCI_HOT_RESET_FLAG_RESETTABLE > > + * before using this method. > > + * > > + * In case a non void group fd array is passed, the devices affected by > > + * the reset must belong to those opened VFIO groups. In case a zero > > + * length array is passed, the other devices affected by the reset, if > > + * any, must be either bound to the same iommufd as this VFIO device or > > + * in the same iommu_group with a device that does. Either of the two > > + * methods is applied to check the feasibility of the hot reset. > > This should probably just refer to the concept of ownership described > in the INFO ioctl and clarify that cdev opened device must exclusively > provide an empty array and group opened devices must exclusively use an > array of group fds for proof of ownership. Mixed access to devices > between cdev and legacy groups are not supported by this interface. > Thanks, Sure. Will make it in next version. Regards, Yi Liu > > Alex > > > + * > > * Return: 0 on success, -errno on failure. > > */ > > struct vfio_pci_hot_reset {