By linking all the device fds we provide to userspace to an address space through a new pseudo fs, we can use tools like unmap_mapping_range() to zap all vmas associated with a device. Suggested-by: Jason Gunthorpe <jgg@xxxxxxxxxx> Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx> --- drivers/vfio/vfio.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 38779e6fd80c..abdf8d52a911 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -32,11 +32,18 @@ #include <linux/vfio.h> #include <linux/wait.h> #include <linux/sched/signal.h> +#include <linux/pseudo_fs.h> +#include <linux/mount.h> #define DRIVER_VERSION "0.3" #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@xxxxxxxxxx>" #define DRIVER_DESC "VFIO - User Level meta-driver" +#define VFIO_MAGIC 0x5646494f /* "VFIO" */ + +static int vfio_fs_cnt; +static struct vfsmount *vfio_fs_mnt; + static struct vfio { struct class *class; struct list_head iommu_drivers_list; @@ -97,6 +104,7 @@ struct vfio_device { struct vfio_group *group; struct list_head group_next; void *device_data; + struct inode *inode; }; #ifdef CONFIG_VFIO_NOIOMMU @@ -529,6 +537,34 @@ static struct vfio_group *vfio_group_get_from_dev(struct device *dev) return group; } +static int vfio_fs_init_fs_context(struct fs_context *fc) +{ + return init_pseudo(fc, VFIO_MAGIC) ? 0 : -ENOMEM; +} + +static struct file_system_type vfio_fs_type = { + .name = "vfio", + .owner = THIS_MODULE, + .init_fs_context = vfio_fs_init_fs_context, + .kill_sb = kill_anon_super, +}; + +static struct inode *vfio_fs_inode_new(void) +{ + struct inode *inode; + int ret; + + ret = simple_pin_fs(&vfio_fs_type, &vfio_fs_mnt, &vfio_fs_cnt); + if (ret) + return ERR_PTR(ret); + + inode = alloc_anon_inode(vfio_fs_mnt->mnt_sb); + if (IS_ERR(inode)) + simple_release_fs(&vfio_fs_mnt, &vfio_fs_cnt); + + return inode; +} + /** * Device objects - create, release, get, put, search */ @@ -539,11 +575,19 @@ struct vfio_device *vfio_group_create_device(struct vfio_group *group, void *device_data) { struct vfio_device *device; + struct inode *inode; device = kzalloc(sizeof(*device), GFP_KERNEL); if (!device) return ERR_PTR(-ENOMEM); + inode = vfio_fs_inode_new(); + if (IS_ERR(inode)) { + kfree(device); + return ERR_CAST(inode); + } + device->inode = inode; + kref_init(&device->kref); device->dev = dev; device->group = group; @@ -574,6 +618,9 @@ static void vfio_device_release(struct kref *kref) dev_set_drvdata(device->dev, NULL); + iput(device->inode); + simple_release_fs(&vfio_fs_mnt, &vfio_fs_cnt); + kfree(device); /* vfio_del_group_dev may be waiting for this device */ @@ -1488,6 +1535,13 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) */ filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); + /* + * Use the pseudo fs inode on the device to link all mmaps + * to the same address space, allowing us to unmap all vmas + * associated to this device using unmap_mapping_range(). + */ + filep->f_mapping = device->inode->i_mapping; + atomic_inc(&group->container_users); fd_install(ret, filep);