When a vfio container is preserved across exec using the VFIO_UPDATE_VADDR interfaces, locked_vm of the new mm becomes 0. If the user later unmaps a dma mapping, locked_vm underflows to a large unsigned value, and a subsequent dma map request fails with ENOMEM in __account_locked_vm. To avoid underflow, do not decrement locked_vm during unmap if the dma's mm has changed. To restore the correct locked_vm count, when VFIO_DMA_MAP_FLAG_VADDR is used and the dma's mm has changed, add the mapping's pinned page count to the new mm->locked_vm, subject to the rlimit. Now that mediated devices are excluded when using VFIO_UPDATE_VADDR, the amount of pinned memory equals the size of the mapping. Fixes: c3cbab24db38 ("vfio/type1: implement interfaces to update vaddr") Signed-off-by: Steve Sistare <steven.sistare@xxxxxxxxxx> --- drivers/vfio/vfio_iommu_type1.c | 50 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index bdfc13c..33dc8ec 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -100,6 +100,7 @@ struct vfio_dma { struct task_struct *task; struct rb_root pfn_list; /* Ex-user pinned pfn list */ unsigned long *bitmap; + struct mm_struct *mm; }; struct vfio_batch { @@ -415,7 +416,7 @@ static int vfio_iova_put_vfio_pfn(struct vfio_dma *dma, struct vfio_pfn *vpfn) static int vfio_lock_acct(struct vfio_dma *dma, long npage, bool async) { struct mm_struct *mm; - int ret; + int ret = 0; if (!npage) return 0; @@ -424,6 +425,10 @@ static int vfio_lock_acct(struct vfio_dma *dma, long npage, bool async) if (!mm) return -ESRCH; /* process exited */ + /* Avoid locked_vm underflow */ + if (dma->mm != mm && npage < 0) + goto out; + ret = mmap_write_lock_killable(mm); if (!ret) { ret = __account_locked_vm(mm, abs(npage), npage > 0, dma->task, @@ -431,6 +436,7 @@ static int vfio_lock_acct(struct vfio_dma *dma, long npage, bool async) mmap_write_unlock(mm); } +out: if (async) mmput(mm); @@ -1180,6 +1186,7 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma) vfio_unmap_unpin(iommu, dma, true); vfio_unlink_dma(iommu, dma); put_task_struct(dma->task); + mmdrop(dma->mm); vfio_dma_bitmap_free(dma); if (dma->vaddr_invalid) { iommu->vaddr_invalid_count--; @@ -1578,6 +1585,42 @@ static bool vfio_iommu_iova_dma_valid(struct vfio_iommu *iommu, return list_empty(iova); } +static int vfio_change_dma_owner(struct vfio_dma *dma) +{ + int ret = 0; + struct mm_struct *mm = get_task_mm(dma->task); + + if (dma->mm != mm) { + long npage = dma->size >> PAGE_SHIFT; + bool new_lock_cap = capable(CAP_IPC_LOCK); + struct task_struct *new_task = current->group_leader; + + ret = mmap_write_lock_killable(new_task->mm); + if (ret) + goto out; + + ret = __account_locked_vm(new_task->mm, npage, true, + new_task, new_lock_cap); + mmap_write_unlock(new_task->mm); + if (ret) + goto out; + + if (dma->task != new_task) { + vfio_lock_acct(dma, -npage, 0); + put_task_struct(dma->task); + dma->task = get_task_struct(new_task); + } + mmdrop(dma->mm); + dma->mm = new_task->mm; + mmgrab(dma->mm); + dma->lock_cap = new_lock_cap; + } +out: + if (mm) + mmput(mm); + return ret; +} + static int vfio_dma_do_map(struct vfio_iommu *iommu, struct vfio_iommu_type1_dma_map *map) { @@ -1627,6 +1670,9 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, dma->size != size) { ret = -EINVAL; } else { + ret = vfio_change_dma_owner(dma); + if (ret) + goto out_unlock; dma->vaddr = vaddr; dma->vaddr_invalid = false; iommu->vaddr_invalid_count--; @@ -1687,6 +1733,8 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, get_task_struct(current->group_leader); dma->task = current->group_leader; dma->lock_cap = capable(CAP_IPC_LOCK); + dma->mm = dma->task->mm; + mmgrab(dma->mm); dma->pfn_list = RB_ROOT; -- 1.8.3.1