Define a new vfio_iommu_driver_ops method named close_dma_owner, called when a task closes its mm (ie, exit or exec). This allows the driver to check if the task owns any dma mappings, and take appropriate action, such as unpinning pages. This guarantees that pages do not remain pinned if the task leaks vfio descriptors to another process and then exits or execs. Signed-off-by: Steve Sistare <steven.sistare@xxxxxxxxxx> --- drivers/vfio/container.c | 146 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/vfio/vfio.h | 1 + 2 files changed, 147 insertions(+) diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c index 5bfd10d..b660adc 100644 --- a/drivers/vfio/container.c +++ b/drivers/vfio/container.c @@ -10,6 +10,7 @@ #include <linux/capability.h> #include <linux/iommu.h> #include <linux/miscdevice.h> +#include <linux/mman.h> #include <linux/vfio.h> #include <uapi/linux/vfio.h> @@ -22,6 +23,13 @@ struct vfio_container { struct vfio_iommu_driver *iommu_driver; void *iommu_data; bool noiommu; + struct list_head task_list; + struct mutex task_lock; +}; + +struct vfio_task { + struct task_struct *task; + struct list_head task_next; }; static struct vfio { @@ -330,6 +338,136 @@ static long vfio_ioctl_set_iommu(struct vfio_container *container, return ret; } +/* + * Maintain a list of tasks that have mapped dma regions. + */ + +static void vfio_add_task(struct vfio_container *container) +{ + struct vfio_task *vftask = kzalloc(sizeof(*vftask), GFP_KERNEL); + + vftask->task = get_task_struct(current->group_leader); + list_add(&vftask->task_next, &container->task_list); +} + +static bool vfio_has_task(struct vfio_container *container) +{ + struct vfio_task *vftask; + + list_for_each_entry(vftask, &container->task_list, task_next) { + if (vftask->task == current->group_leader) + return true; + } + return false; +} + +static void vfio_remove_task(struct vfio_container *container) +{ + struct task_struct *task = current->group_leader; + struct vfio_task *vftask; + + list_for_each_entry(vftask, &container->task_list, task_next) { + if (vftask->task == task) { + put_task_struct(task); + list_del(&vftask->task_next); + return; + } + } + WARN_ONCE(1, "%s pid %d not found\n", __func__, task->pid); +} + +static int vfio_canary_create(struct file *filep); + +static int vfio_register_dma_task(struct vfio_container *container, + struct file *filep) +{ + int ret = 0; + + mutex_lock(&container->task_lock); + + if (vfio_has_task(container)) + goto out_unlock; + ret = vfio_canary_create(filep); + if (ret) + goto out_unlock; + + vfio_add_task(container); + +out_unlock: + mutex_unlock(&container->task_lock); + return ret; +} + +static void vfio_unregister_dma_task(struct vfio_container *container) +{ + struct vfio_iommu_driver *driver = container->iommu_driver; + + mutex_lock(&container->task_lock); + vfio_remove_task(container); + mutex_unlock(&container->task_lock); + + if (driver && driver->ops->close_dma_owner) + driver->ops->close_dma_owner(container->iommu_data); +} + +/* + * Create a per-task vma that detects when an address space closes, by getting + * a vm_operations_struct close callback. + */ + +static int vfio_canary_create(struct file *filep) +{ + unsigned long vaddr = vm_mmap(filep, 0, PAGE_SIZE, 0, MAP_PRIVATE, 0); + + if (!vaddr) + return -ENOMEM; + else if (IS_ERR_VALUE(vaddr)) + return (int)vaddr; + else + return 0; +} + +static void vfio_canary_open(struct vm_area_struct *vma) +{ + /* + * This vma is being dup'd after fork. We don't have the new task yet, + * so not useful. Ignore it on close. + */ + vma->vm_private_data = NULL; +} + +static void vfio_canary_close(struct vm_area_struct *vma) +{ + struct vfio_container *container = vma->vm_private_data; + + if (container) { + vfio_unregister_dma_task(container); + vfio_container_put(container); + } +} + +static vm_fault_t vfio_canary_fault(struct vm_fault *vmf) +{ + /* No need for access to the mapped canary */ + return VM_FAULT_SIGBUS; +} + +static const struct vm_operations_struct vfio_canary_mmap_ops = { + .open = vfio_canary_open, + .close = vfio_canary_close, + .fault = vfio_canary_fault, +}; + +static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma) +{ + struct vfio_container *container = filep->private_data; + + vfio_container_get(container); + vma->vm_private_data = container; + vma->vm_ops = &vfio_canary_mmap_ops; + return 0; +} + static long vfio_fops_unl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) { @@ -351,6 +489,11 @@ static long vfio_fops_unl_ioctl(struct file *filep, case VFIO_SET_IOMMU: ret = vfio_ioctl_set_iommu(container, arg); break; + case VFIO_IOMMU_MAP_DMA: + ret = vfio_register_dma_task(container, filep); + if (ret) + return ret; + fallthrough; default: driver = container->iommu_driver; data = container->iommu_data; @@ -372,6 +515,8 @@ static int vfio_fops_open(struct inode *inode, struct file *filep) INIT_LIST_HEAD(&container->group_list); init_rwsem(&container->group_lock); + INIT_LIST_HEAD(&container->task_list); + mutex_init(&container->task_lock); kref_init(&container->kref); filep->private_data = container; @@ -396,6 +541,7 @@ static int vfio_fops_release(struct inode *inode, struct file *filep) .release = vfio_fops_release, .unlocked_ioctl = vfio_fops_unl_ioctl, .compat_ioctl = compat_ptr_ioctl, + .mmap = vfio_fops_mmap, }; struct vfio_container *vfio_container_from_file(struct file *file) diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index 8a439c6..0cf3cfe 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -92,6 +92,7 @@ struct vfio_iommu_driver_ops { void *data, size_t count, bool write); struct iommu_domain *(*group_iommu_domain)(void *iommu_data, struct iommu_group *group); + void (*close_dma_owner)(void *iommu_data); }; struct vfio_iommu_driver { -- 1.8.3.1