On Wed, Feb 22, 2023 at 01:35:06PM +0000, Liu, Yi L wrote: > > btw this patch is insufficient to handle device fd. The current logic > > requires every device in the dev_set covered by provided fd's: Yes, which is what it should be > > static bool vfio_dev_in_groups(struct vfio_pci_core_device *vdev, > > struct vfio_pci_group_info *groups) > > { > > unsigned int i; > > > > for (i = 0; i < groups->count; i++) > > if (vfio_file_has_dev(groups->files[i], &vdev->vdev)) > > return true; > > return false; > > } > > > > Presumably when cdev fd is provided above should compare iommu > > group of the fd and that of the vdev. Otherwise it expects the user > > to have full access to every device in the set which is impractical. No, it should check the dev's directly, userspace has to provide every dev in the dev set to do a reset. We should not allow userspace to take a shortcut based on hidden group stuff. The dev set is already unrelated to the groups, and userspace cannot discover the devset, so nothing has changed. This is looking worse to me. I think we should not require userspace to pass in lists of devices here. The simpler solution is to just take in a single iommufd and use that as the ownership proof. Something like the below. Jason diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index d81f93a321afcb..a5833bfdd7307e 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -114,6 +114,34 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, } EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD); +/** + * iommufd_ctx_has_device - True if the struct device is bound to this ictx + * @ictx: iommufd file descriptor + * @dev: Pointer to a physical device struct + * + * True if a iommufd_device_bind() is present for dev. + */ +bool iommufd_ctx_has_device(struct iommufd_ctx *ictx, struct device *dev) +{ + unsigned long index; + struct iommufd_object *obj; + + if (!ictx) + return false; + + xa_lock(&ictx->objects); + xa_for_each(&ictx->objects, index, obj) { + if (obj->type == IOMMUFD_OBJ_DEVICE && + container_of(obj, struct iommufd_device, obj)->dev == dev) { + xa_unlock(&ictx->objects); + return true; + } + } + xa_unlock(&ictx->objects); + return false; +} +EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_device, IOMMUFD); + /** * iommufd_device_unbind - Undo iommufd_device_bind() * @idev: Device returned by iommufd_device_bind() diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 26a541cc64d114..28f6db1b81c1af 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -27,6 +27,7 @@ #include <linux/vgaarb.h> #include <linux/nospec.h> #include <linux/sched/mm.h> +#include <linux/iommufd.h> #if IS_ENABLED(CONFIG_EEH) #include <asm/eeh.h> #endif @@ -179,7 +180,8 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev) struct vfio_pci_group_info; static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set); static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, - struct vfio_pci_group_info *groups); + struct vfio_pci_group_info *groups, + struct iommufd_ctx *iommufd_ctx); /* * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND @@ -1254,29 +1256,17 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info( return ret; } -static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev, - struct vfio_pci_hot_reset __user *arg) +static int +vfio_pci_ioctl_pci_hot_reset_groups(struct vfio_pci_core_device *vdev, + struct vfio_pci_hot_reset *hdr, + struct vfio_pci_hot_reset __user *arg) { - unsigned long minsz = offsetofend(struct vfio_pci_hot_reset, count); - struct vfio_pci_hot_reset hdr; int32_t *group_fds; struct file **files; struct vfio_pci_group_info info; bool slot = false; int file_idx, count = 0, ret = 0; - if (copy_from_user(&hdr, arg, minsz)) - return -EFAULT; - - if (hdr.argsz < minsz || hdr.flags) - return -EINVAL; - - /* Can we do a slot or bus reset or neither? */ - if (!pci_probe_reset_slot(vdev->pdev->slot)) - slot = true; - else if (pci_probe_reset_bus(vdev->pdev->bus)) - return -ENODEV; - /* * We can't let userspace give us an arbitrarily large buffer to copy, * so verify how many we think there could be. Note groups can have @@ -1288,11 +1278,11 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev, return ret; /* Somewhere between 1 and count is OK */ - if (!hdr.count || hdr.count > count) + if (!hdr->count || hdr->count > count) return -EINVAL; - group_fds = kcalloc(hdr.count, sizeof(*group_fds), GFP_KERNEL); - files = kcalloc(hdr.count, sizeof(*files), GFP_KERNEL); + group_fds = kcalloc(hdr->count, sizeof(*group_fds), GFP_KERNEL); + files = kcalloc(hdr->count, sizeof(*files), GFP_KERNEL); if (!group_fds || !files) { kfree(group_fds); kfree(files); @@ -1300,7 +1290,7 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev, } if (copy_from_user(group_fds, arg->group_fds, - hdr.count * sizeof(*group_fds))) { + hdr->count * sizeof(*group_fds))) { kfree(group_fds); kfree(files); return -EFAULT; @@ -1311,7 +1301,7 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev, * interface and store the group and iommu ID. This ensures the group * is held across the reset. */ - for (file_idx = 0; file_idx < hdr.count; file_idx++) { + for (file_idx = 0; file_idx < hdr->count; file_idx++) { struct file *file = fget(group_fds[file_idx]); if (!file) { @@ -1335,10 +1325,10 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev, if (ret) goto hot_reset_release; - info.count = hdr.count; + info.count = hdr->count; info.files = files; - ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info); + ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info, NULL); hot_reset_release: for (file_idx--; file_idx >= 0; file_idx--) @@ -1348,6 +1338,50 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev, return ret; } +static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev, + struct vfio_pci_hot_reset __user *arg) +{ + unsigned long minsz = offsetofend(struct vfio_pci_hot_reset, count); + struct vfio_pci_hot_reset hdr; + struct iommufd_ctx *iommufd; + bool slot = false; + struct fd f; + int32_t fd; + int ret; + + if (copy_from_user(&hdr, arg, minsz)) + return -EFAULT; + + if (hdr.argsz < minsz || hdr.flags) + return -EINVAL; + + /* Can we do a slot or bus reset or neither? */ + if (!pci_probe_reset_slot(vdev->pdev->slot)) + slot = true; + else if (pci_probe_reset_bus(vdev->pdev->bus)) + return -ENODEV; + + if (hdr.count != 1) + return vfio_pci_ioctl_pci_hot_reset_groups(vdev, &hdr, arg); + + if (copy_from_user(&fd, arg->group_fds, sizeof(fd))) + return -EFAULT; + + f = fdget(fd); + if (!f.file) + return -EBADF; + iommufd = iommufd_ctx_from_file(f.file); + if (IS_ERR(iommufd)) { + fdput(f); + return vfio_pci_ioctl_pci_hot_reset_groups(vdev, &hdr, arg); + } + fdput(f); + + ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, NULL, iommufd); + iommufd_ctx_put(iommufd); + return ret; +} + static int vfio_pci_ioctl_ioeventfd(struct vfio_pci_core_device *vdev, struct vfio_device_ioeventfd __user *arg) { @@ -2317,6 +2351,9 @@ static bool vfio_dev_in_groups(struct vfio_pci_core_device *vdev, { unsigned int i; + if (!groups) + return false; + for (i = 0; i < groups->count; i++) if (vfio_file_has_dev(groups->files[i], &vdev->vdev)) return true; @@ -2398,7 +2435,8 @@ static int vfio_pci_dev_set_pm_runtime_get(struct vfio_device_set *dev_set) * get each memory_lock. */ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, - struct vfio_pci_group_info *groups) + struct vfio_pci_group_info *groups, + struct iommufd_ctx *iommufd_ctx) { struct vfio_pci_core_device *cur_mem; struct vfio_pci_core_device *cur_vma; @@ -2432,7 +2470,8 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, * Test whether all the affected devices are contained by the * set of groups provided by the user. */ - if (!vfio_dev_in_groups(cur_vma, groups)) { + if (!vfio_dev_in_groups(cur_vma, groups) && + !iommufd_ctx_has_device(iommufd_ctx, &cur_vma->pdev->dev)) { ret = -EINVAL; goto err_undo; } diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 650d45629647a7..1f58673701cb1e 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -58,6 +58,7 @@ void iommufd_access_unpin_pages(struct iommufd_access *access, int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, void *data, size_t len, unsigned int flags); int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id); +bool iommufd_ctx_has_device(struct iommufd_ctx *ictx, struct device *dev); #else /* !CONFIG_IOMMUFD */ static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file) { @@ -94,5 +95,12 @@ static inline int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, { return -EOPNOTSUPP; } + +static inline bool iommufd_ctx_has_device(struct iommufd_ctx *ictx, + struct device *dev) +{ + return false; +} + #endif /* CONFIG_IOMMUFD */ #endif