By linking all the device fds we provide to userspace to an address space through a new pseudo fs, we can use tools like unmap_mapping_range() to zap all vmas associated with a device. Suggested-by: Jason Gunthorpe <jgg@xxxxxxxxxx> Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx> --- drivers/vfio/vfio.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/vfio.h | 1 + 2 files changed, 58 insertions(+) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 02cc51ce6891..b88de89bda31 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -21,8 +21,10 @@ #include <linux/list.h> #include <linux/miscdevice.h> #include <linux/module.h> +#include <linux/mount.h> #include <linux/mutex.h> #include <linux/pci.h> +#include <linux/pseudo_fs.h> #include <linux/rwsem.h> #include <linux/sched.h> #include <linux/slab.h> @@ -37,6 +39,14 @@ #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@xxxxxxxxxx>" #define DRIVER_DESC "VFIO - User Level meta-driver" +/* + * Not exposed via UAPI + * + * XXX Adopt the following when available: + * https://lore.kernel.org/lkml/20210309155348.974875-1-hch@xxxxxx/ + */ +#define VFIO_MAGIC 0x5646494f /* "VFIO" */ + static struct vfio { struct class *class; struct list_head iommu_drivers_list; @@ -46,6 +56,8 @@ static struct vfio { struct mutex group_lock; struct cdev group_cdev; dev_t group_devt; + struct vfsmount *vfio_fs_mnt; + int vfio_fs_cnt; } vfio; struct vfio_iommu_driver { @@ -519,6 +531,35 @@ static struct vfio_group *vfio_group_get_from_dev(struct device *dev) return group; } +static int vfio_fs_init_fs_context(struct fs_context *fc) +{ + return init_pseudo(fc, VFIO_MAGIC) ? 0 : -ENOMEM; +} + +static struct file_system_type vfio_fs_type = { + .name = "vfio", + .owner = THIS_MODULE, + .init_fs_context = vfio_fs_init_fs_context, + .kill_sb = kill_anon_super, +}; + +static struct inode *vfio_fs_inode_new(void) +{ + struct inode *inode; + int ret; + + ret = simple_pin_fs(&vfio_fs_type, + &vfio.vfio_fs_mnt, &vfio.vfio_fs_cnt); + if (ret) + return ERR_PTR(ret); + + inode = alloc_anon_inode(vfio.vfio_fs_mnt->mnt_sb); + if (IS_ERR(inode)) + simple_release_fs(&vfio.vfio_fs_mnt, &vfio.vfio_fs_cnt); + + return inode; +} + /** * Device objects - create, release, get, put, search */ @@ -783,6 +824,12 @@ int vfio_register_group_dev(struct vfio_device *device) return -EBUSY; } + device->inode = vfio_fs_inode_new(); + if (IS_ERR(device->inode)) { + vfio_group_put(group); + return PTR_ERR(device->inode); + } + /* Our reference on group is moved to the device */ device->group = group; @@ -907,6 +954,9 @@ void vfio_unregister_group_dev(struct vfio_device *device) group->dev_counter--; mutex_unlock(&group->device_lock); + iput(device->inode); + simple_release_fs(&vfio.vfio_fs_mnt, &vfio.vfio_fs_cnt); + /* * In order to support multiple devices per group, devices can be * plucked from the group while other devices in the group are still @@ -1411,6 +1461,13 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) */ filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); + /* + * Use the pseudo fs inode on the device to link all mmaps + * to the same address space, allowing us to unmap all vmas + * associated to this device using unmap_mapping_range(). + */ + filep->f_mapping = device->inode->i_mapping; + atomic_inc(&group->container_users); fd_install(ret, filep); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index a2c5b30e1763..90bcc2e9c8eb 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -24,6 +24,7 @@ struct vfio_device { refcount_t refcount; struct completion comp; struct list_head group_next; + struct inode *inode; }; /**