> From: Christoph Hellwig <hch@xxxxxx> > Sent: Monday, September 13, 2021 3:16 PM > > Reuse the logic in vfio_noiommu_group_alloc to allocate a fake > single-device iommu group for mediated devices by factoring out a common > function, and replacing the noiommu boolean field in struct vfio_group > with an enum to distinguish the three different kinds of groups. > > Signed-off-by: Christoph Hellwig <hch@xxxxxx> Reviewed-by: Kevin Tian <kevin.tian@xxxxxxxxx> > --- > drivers/s390/crypto/vfio_ap_ops.c | 2 +- > drivers/vfio/mdev/mdev_driver.c | 45 ++------------- > drivers/vfio/mdev/vfio_mdev.c | 2 +- > drivers/vfio/vfio.c | 92 ++++++++++++++++++++++--------- > include/linux/vfio.h | 1 + > samples/vfio-mdev/mbochs.c | 2 +- > samples/vfio-mdev/mdpy.c | 2 +- > samples/vfio-mdev/mtty.c | 2 +- > 8 files changed, 76 insertions(+), 72 deletions(-) > > diff --git a/drivers/s390/crypto/vfio_ap_ops.c > b/drivers/s390/crypto/vfio_ap_ops.c > index 118939a7729a1e..24755d1aedd5b0 100644 > --- a/drivers/s390/crypto/vfio_ap_ops.c > +++ b/drivers/s390/crypto/vfio_ap_ops.c > @@ -351,7 +351,7 @@ static int vfio_ap_mdev_probe(struct mdev_device > *mdev) > list_add(&matrix_mdev->node, &matrix_dev->mdev_list); > mutex_unlock(&matrix_dev->lock); > > - ret = vfio_register_group_dev(&matrix_mdev->vdev); > + ret = vfio_register_emulated_iommu_dev(&matrix_mdev->vdev); > if (ret) > goto err_list; > dev_set_drvdata(&mdev->dev, matrix_mdev); > diff --git a/drivers/vfio/mdev/mdev_driver.c > b/drivers/vfio/mdev/mdev_driver.c > index e2cb1ff56f6c9b..7927ed4f1711f2 100644 > --- a/drivers/vfio/mdev/mdev_driver.c > +++ b/drivers/vfio/mdev/mdev_driver.c > @@ -13,60 +13,23 @@ > > #include "mdev_private.h" > > -static int mdev_attach_iommu(struct mdev_device *mdev) > -{ > - int ret; > - struct iommu_group *group; > - > - group = iommu_group_alloc(); > - if (IS_ERR(group)) > - return PTR_ERR(group); > - > - ret = iommu_group_add_device(group, &mdev->dev); > - if (!ret) > - dev_info(&mdev->dev, "MDEV: group_id = %d\n", > - iommu_group_id(group)); > - > - iommu_group_put(group); > - return ret; > -} > - > -static void mdev_detach_iommu(struct mdev_device *mdev) > -{ > - iommu_group_remove_device(&mdev->dev); > - dev_info(&mdev->dev, "MDEV: detaching iommu\n"); > -} > - > static int mdev_probe(struct device *dev) > { > struct mdev_driver *drv = > container_of(dev->driver, struct mdev_driver, driver); > - struct mdev_device *mdev = to_mdev_device(dev); > - int ret; > > - ret = mdev_attach_iommu(mdev); > - if (ret) > - return ret; > - > - if (drv->probe) { > - ret = drv->probe(mdev); > - if (ret) > - mdev_detach_iommu(mdev); > - } > - > - return ret; > + if (!drv->probe) > + return 0; > + return drv->probe(to_mdev_device(dev)); > } > > static void mdev_remove(struct device *dev) > { > struct mdev_driver *drv = > container_of(dev->driver, struct mdev_driver, driver); > - struct mdev_device *mdev = to_mdev_device(dev); > > if (drv->remove) > - drv->remove(mdev); > - > - mdev_detach_iommu(mdev); > + drv->remove(to_mdev_device(dev)); > } > > static int mdev_match(struct device *dev, struct device_driver *drv) > diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c > index 7a9883048216e7..a90e24b0c851d3 100644 > --- a/drivers/vfio/mdev/vfio_mdev.c > +++ b/drivers/vfio/mdev/vfio_mdev.c > @@ -119,7 +119,7 @@ static int vfio_mdev_probe(struct mdev_device > *mdev) > return -ENOMEM; > > vfio_init_group_dev(vdev, &mdev->dev, &vfio_mdev_dev_ops); > - ret = vfio_register_group_dev(vdev); > + ret = vfio_register_emulated_iommu_dev(vdev); > if (ret) > goto out_uninit; > > diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c > index 23eaebd2e28cd9..2508c8c3984091 100644 > --- a/drivers/vfio/vfio.c > +++ b/drivers/vfio/vfio.c > @@ -67,6 +67,30 @@ struct vfio_unbound_dev { > struct list_head unbound_next; > }; > > +enum vfio_group_type { > + /* > + * Physical device with IOMMU backing. > + */ > + VFIO_IOMMU, > + > + /* > + * Virtual device without IOMMU backing. The VFIO core fakes up an > + * iommu_group as the iommu_group sysfs interface is part of the > + * userspace ABI. The user of these devices must not be able to > + * directly trigger unmediated DMA. > + */ > + VFIO_EMULATED_IOMMU, > + > + /* > + * Physical device without IOMMU backing. The VFIO core fakes up an > + * iommu_group as the iommu_group sysfs interface is part of the > + * userspace ABI. Users can trigger unmediated DMA by the device, > + * usage is highly dangerous, requires an explicit opt-in and will > + * taint the kernel. > + */ > + VFIO_NO_IOMMU, > +}; > + > struct vfio_group { > struct kref kref; > int minor; > @@ -83,7 +107,7 @@ struct vfio_group { > struct mutex unbound_lock; > atomic_t opened; > wait_queue_head_t container_q; > - bool noiommu; > + enum vfio_group_type type; > unsigned int dev_counter; > struct kvm *kvm; > struct blocking_notifier_head notifier; > @@ -336,7 +360,7 @@ static void vfio_group_unlock_and_free(struct > vfio_group *group) > * Group objects - create, release, get, put, search > */ > static struct vfio_group *vfio_create_group(struct iommu_group > *iommu_group, > - bool noiommu) > + enum vfio_group_type type) > { > struct vfio_group *group, *tmp; > struct device *dev; > @@ -355,7 +379,7 @@ static struct vfio_group *vfio_create_group(struct > iommu_group *iommu_group, > atomic_set(&group->opened, 0); > init_waitqueue_head(&group->container_q); > group->iommu_group = iommu_group; > - group->noiommu = noiommu; > + group->type = type; > BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); > > group->nb.notifier_call = vfio_iommu_group_notifier; > @@ -391,8 +415,8 @@ static struct vfio_group *vfio_create_group(struct > iommu_group *iommu_group, > } > > dev = device_create(vfio.class, NULL, > - MKDEV(MAJOR(vfio.group_devt), minor), > - group, "%s%d", group->noiommu ? "noiommu-" : > "", > + MKDEV(MAJOR(vfio.group_devt), minor), group, > "%s%d", > + group->type == VFIO_NO_IOMMU ? "noiommu-" : > "", > iommu_group_id(iommu_group)); > if (IS_ERR(dev)) { > vfio_free_group_minor(minor); > @@ -778,8 +802,8 @@ void vfio_uninit_group_dev(struct vfio_device > *device) > } > EXPORT_SYMBOL_GPL(vfio_uninit_group_dev); > > -#ifdef CONFIG_VFIO_NOIOMMU > -static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev) > +static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev, > + enum vfio_group_type type) > { > struct iommu_group *iommu_group; > struct vfio_group *group; > @@ -794,7 +818,7 @@ static struct vfio_group > *vfio_noiommu_group_alloc(struct device *dev) > if (ret) > goto out_put_group; > > - group = vfio_create_group(iommu_group, true); > + group = vfio_create_group(iommu_group, type); > if (IS_ERR(group)) { > ret = PTR_ERR(group); > goto out_remove_device; > @@ -808,7 +832,6 @@ static struct vfio_group > *vfio_noiommu_group_alloc(struct device *dev) > iommu_group_put(iommu_group); > return ERR_PTR(ret); > } > -#endif > > static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) > { > @@ -824,7 +847,7 @@ static struct vfio_group > *vfio_group_find_or_alloc(struct device *dev) > * bus. Taint the kernel because we're about to give a DMA > * capable device to a user without IOMMU protection. > */ > - group = vfio_noiommu_group_alloc(dev); > + group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU); > if (!IS_ERR(group)) { > add_taint(TAINT_USER, LOCKDEP_STILL_OK); > dev_warn(dev, "Adding kernel taint for vfio- > noiommu group on device\n"); > @@ -841,7 +864,7 @@ static struct vfio_group > *vfio_group_find_or_alloc(struct device *dev) > goto out_put; > > /* a newly created vfio_group keeps the reference. */ > - group = vfio_create_group(iommu_group, false); > + group = vfio_create_group(iommu_group, VFIO_IOMMU); > if (IS_ERR(group)) > goto out_put; > return group; > @@ -851,10 +874,13 @@ static struct vfio_group > *vfio_group_find_or_alloc(struct device *dev) > return group; > } > > -int vfio_register_group_dev(struct vfio_device *device) > +static int __vfio_register_dev(struct vfio_device *device, > + struct vfio_group *group) > { > struct vfio_device *existing_device; > - struct vfio_group *group; > + > + if (IS_ERR(group)) > + return PTR_ERR(group); > > /* > * If the driver doesn't specify a set then the device is added to a > @@ -863,16 +889,13 @@ int vfio_register_group_dev(struct vfio_device > *device) > if (!device->dev_set) > vfio_assign_device_set(device, device); > > - group = vfio_group_find_or_alloc(device->dev); > - if (IS_ERR(group)) > - return PTR_ERR(group); > - > existing_device = vfio_group_get_device(group, device->dev); > if (existing_device) { > dev_WARN(device->dev, "Device already exists on > group %d\n", > iommu_group_id(group->iommu_group)); > vfio_device_put(existing_device); > - if (group->noiommu) > + if (group->type == VFIO_NO_IOMMU || > + group->type == VFIO_EMULATED_IOMMU) > iommu_group_remove_device(device->dev); > vfio_group_put(group); > return -EBUSY; > @@ -891,8 +914,25 @@ int vfio_register_group_dev(struct vfio_device > *device) > > return 0; > } > + > +int vfio_register_group_dev(struct vfio_device *device) > +{ > + return __vfio_register_dev(device, > + vfio_group_find_or_alloc(device->dev)); > +} > EXPORT_SYMBOL_GPL(vfio_register_group_dev); > > +/* > + * Register a virtual device without IOMMU backing. The user of this > + * device must not be able to directly trigger unmediated DMA. > + */ > +int vfio_register_emulated_iommu_dev(struct vfio_device *device) > +{ > + return __vfio_register_dev(device, > + vfio_noiommu_group_alloc(device->dev, > VFIO_EMULATED_IOMMU)); > +} > +EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev); > + > /** > * Get a reference to the vfio_device for a device. Even if the > * caller thinks they own the device, they could be racing with a > @@ -1019,7 +1059,7 @@ void vfio_unregister_group_dev(struct vfio_device > *device) > if (list_empty(&group->device_list)) > wait_event(group->container_q, !group->container); > > - if (group->noiommu) > + if (group->type == VFIO_NO_IOMMU || group->type == > VFIO_EMULATED_IOMMU) > iommu_group_remove_device(device->dev); > > /* Matches the get in vfio_register_group_dev() */ > @@ -1368,7 +1408,7 @@ static int vfio_group_set_container(struct > vfio_group *group, int container_fd) > if (atomic_read(&group->container_users)) > return -EINVAL; > > - if (group->noiommu && !capable(CAP_SYS_RAWIO)) > + if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) > return -EPERM; > > f = fdget(container_fd); > @@ -1388,7 +1428,7 @@ static int vfio_group_set_container(struct > vfio_group *group, int container_fd) > > /* Real groups and fake groups cannot mix */ > if (!list_empty(&container->group_list) && > - container->noiommu != group->noiommu) { > + container->noiommu != (group->type == VFIO_NO_IOMMU)) { > ret = -EPERM; > goto unlock_out; > } > @@ -1402,7 +1442,7 @@ static int vfio_group_set_container(struct > vfio_group *group, int container_fd) > } > > group->container = container; > - container->noiommu = group->noiommu; > + container->noiommu = (group->type == VFIO_NO_IOMMU); > list_add(&group->container_next, &container->group_list); > > /* Get a reference on the container and mark a user within the group > */ > @@ -1426,7 +1466,7 @@ static int vfio_group_add_container_user(struct > vfio_group *group) > if (!atomic_inc_not_zero(&group->container_users)) > return -EINVAL; > > - if (group->noiommu) { > + if (group->type == VFIO_NO_IOMMU) { > atomic_dec(&group->container_users); > return -EPERM; > } > @@ -1451,7 +1491,7 @@ static int vfio_group_get_device_fd(struct > vfio_group *group, char *buf) > !group->container->iommu_driver || !vfio_group_viable(group)) > return -EINVAL; > > - if (group->noiommu && !capable(CAP_SYS_RAWIO)) > + if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) > return -EPERM; > > device = vfio_device_get_from_name(group, buf); > @@ -1498,7 +1538,7 @@ static int vfio_group_get_device_fd(struct > vfio_group *group, char *buf) > > fd_install(fdno, filep); > > - if (group->noiommu) > + if (group->type == VFIO_NO_IOMMU) > dev_warn(device->dev, "vfio-noiommu device opened by > user " > "(%s:%d)\n", current->comm, task_pid_nr(current)); > return fdno; > @@ -1594,7 +1634,7 @@ static int vfio_group_fops_open(struct inode > *inode, struct file *filep) > if (!group) > return -ENODEV; > > - if (group->noiommu && !capable(CAP_SYS_RAWIO)) { > + if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) > { > vfio_group_put(group); > return -EPERM; > } > diff --git a/include/linux/vfio.h b/include/linux/vfio.h > index f7083c2fd0d099..bbe29300862649 100644 > --- a/include/linux/vfio.h > +++ b/include/linux/vfio.h > @@ -75,6 +75,7 @@ void vfio_init_group_dev(struct vfio_device *device, > struct device *dev, > const struct vfio_device_ops *ops); > void vfio_uninit_group_dev(struct vfio_device *device); > int vfio_register_group_dev(struct vfio_device *device); > +int vfio_register_emulated_iommu_dev(struct vfio_device *device); > void vfio_unregister_group_dev(struct vfio_device *device); > extern struct vfio_device *vfio_device_get_from_dev(struct device *dev); > extern void vfio_device_put(struct vfio_device *device); > diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c > index c313ab4d1f4e4e..cd41bec5fdeb39 100644 > --- a/samples/vfio-mdev/mbochs.c > +++ b/samples/vfio-mdev/mbochs.c > @@ -553,7 +553,7 @@ static int mbochs_probe(struct mdev_device *mdev) > mbochs_create_config_space(mdev_state); > mbochs_reset(mdev_state); > > - ret = vfio_register_group_dev(&mdev_state->vdev); > + ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev); > if (ret) > goto err_mem; > dev_set_drvdata(&mdev->dev, mdev_state); > diff --git a/samples/vfio-mdev/mdpy.c b/samples/vfio-mdev/mdpy.c > index 8d1a80a0722aa9..fe5d43e797b6d3 100644 > --- a/samples/vfio-mdev/mdpy.c > +++ b/samples/vfio-mdev/mdpy.c > @@ -258,7 +258,7 @@ static int mdpy_probe(struct mdev_device *mdev) > > mdpy_count++; > > - ret = vfio_register_group_dev(&mdev_state->vdev); > + ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev); > if (ret) > goto err_mem; > dev_set_drvdata(&mdev->dev, mdev_state); > diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c > index 5983cdb16e3d1d..a0e1a469bd47af 100644 > --- a/samples/vfio-mdev/mtty.c > +++ b/samples/vfio-mdev/mtty.c > @@ -741,7 +741,7 @@ static int mtty_probe(struct mdev_device *mdev) > > mtty_create_config_space(mdev_state); > > - ret = vfio_register_group_dev(&mdev_state->vdev); > + ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev); > if (ret) > goto err_vconfig; > dev_set_drvdata(&mdev->dev, mdev_state); > -- > 2.30.2