vfio_iommu_type1_pin_pages is very inefficient because it is processed page by page when calling vfio_pin_page_external. Added contiguous_vaddr_get_pfn to process continuous pages to reduce the number of loops, thereby improving performance. Signed-off-by: Xiaoyang Xu <xuxiaoyang2@xxxxxxxxxx> --- drivers/vfio/vfio_iommu_type1.c | 241 ++++++++++++++++++++++++++++---- 1 file changed, 214 insertions(+), 27 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 67e827638995..935f80807527 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -628,6 +628,206 @@ static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova, return unlocked; } +static int contiguous_vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, + int prot, long npage, unsigned long *phys_pfn) +{ + struct page **pages = NULL; + unsigned int flags = 0; + int i, ret; + + pages = kvmalloc_array(npage, sizeof(struct page *), GFP_KERNEL); + if (!pages) + return -ENOMEM; + + if (prot & IOMMU_WRITE) + flags |= FOLL_WRITE; + + mmap_read_lock(mm); + ret = pin_user_pages_remote(mm, vaddr, npage, flags | FOLL_LONGTERM, + pages, NULL, NULL); + mmap_read_unlock(mm); + + for (i = 0; i < ret; i++) + *(phys_pfn + i) = page_to_pfn(pages[i]); + + kvfree(pages); + + return ret; +} + +static int vfio_pin_contiguous_pages_external(struct vfio_iommu *iommu, + struct vfio_dma *dma, + unsigned long *user_pfn, + int npage, unsigned long *phys_pfn, + bool do_accounting) +{ + int ret, i, j, lock_acct = 0; + unsigned long remote_vaddr; + dma_addr_t iova; + struct mm_struct *mm; + struct vfio_pfn *vpfn; + + mm = get_task_mm(dma->task); + if (!mm) + return -ENODEV; + + iova = user_pfn[0] << PAGE_SHIFT; + remote_vaddr = dma->vaddr + iova - dma->iova; + ret = contiguous_vaddr_get_pfn(mm, remote_vaddr, dma->prot, + npage, phys_pfn); + mmput(mm); + if (ret <= 0) + return ret; + + npage = ret; + for (i = 0; i < npage; i++) { + iova = user_pfn[i] << PAGE_SHIFT; + ret = vfio_add_to_pfn_list(dma, iova, phys_pfn[i]); + if (ret) + goto unwind; + + if (!is_invalid_reserved_pfn(phys_pfn[i])) + lock_acct++; + + if (iommu->dirty_page_tracking) { + unsigned long pgshift = __ffs(iommu->pgsize_bitmap); + + /* + * Bitmap populated with the smallest supported page + * size + */ + bitmap_set(dma->bitmap, + (iova - dma->iova) >> pgshift, 1); + } + } + + if (do_accounting) { + ret = vfio_lock_acct(dma, lock_acct, true); + if (ret) { + if (ret == -ENOMEM) + pr_warn("%s: Task %s (%d) RLIMIT_MEMLOCK (%ld) exceeded\n", + __func__, dma->task->comm, task_pid_nr(dma->task), + task_rlimit(dma->task, RLIMIT_MEMLOCK)); + goto unwind; + } + } + + return i; +unwind: + for (j = 0; j < npage; j++) { + put_pfn(phys_pfn[j], dma->prot); + phys_pfn[j] = 0; + } + + for (j = 0; j < i; j++) { + iova = user_pfn[j] << PAGE_SHIFT; + vpfn = vfio_find_vpfn(dma, iova); + if (vpfn) + vfio_remove_from_pfn_list(dma, vpfn); + } + + return ret; +} + +static int vfio_iommu_type1_pin_contiguous_pages(struct vfio_iommu *iommu, + struct vfio_dma *dma, + unsigned long *user_pfn, + int npage, unsigned long *phys_pfn, + bool do_accounting) +{ + int ret, i, j; + unsigned long remote_vaddr; + dma_addr_t iova; + + ret = vfio_pin_contiguous_pages_external(iommu, dma, user_pfn, npage, + phys_pfn, do_accounting); + if (ret == npage) + return ret; + + if (ret < 0) + ret = 0; + + for (i = ret; i < npage; i++) { + iova = user_pfn[i] << PAGE_SHIFT; + remote_vaddr = dma->vaddr + iova - dma->iova; + + ret = vfio_pin_page_external(dma, remote_vaddr, &phys_pfn[i], + do_accounting); + if (ret) + goto pin_unwind; + + ret = vfio_add_to_pfn_list(dma, iova, phys_pfn[i]); + if (ret) { + if (put_pfn(phys_pfn[i], dma->prot) && do_accounting) + vfio_lock_acct(dma, -1, true); + goto pin_unwind; + } + + if (iommu->dirty_page_tracking) { + unsigned long pgshift = __ffs(iommu->pgsize_bitmap); + + /* + * Bitmap populated with the smallest supported page + * size + */ + bitmap_set(dma->bitmap, + (iova - dma->iova) >> pgshift, 1); + } + } + + return i; + +pin_unwind: + phys_pfn[i] = 0; + for (j = 0; j < i; j++) { + dma_addr_t iova; + + iova = user_pfn[j] << PAGE_SHIFT; + vfio_unpin_page_external(dma, iova, do_accounting); + phys_pfn[j] = 0; + } + + return ret; +} + +static int vfio_iommu_type1_get_contiguous_pages_length(struct vfio_iommu *iommu, + unsigned long *user_pfn, int npage, int prot) +{ + struct vfio_dma *dma_base; + int i; + dma_addr_t iova; + struct vfio_pfn *vpfn; + + if (npage <= 1) + return npage; + + iova = user_pfn[0] << PAGE_SHIFT; + dma_base = vfio_find_dma(iommu, iova, PAGE_SIZE); + if (!dma_base) + return -EINVAL; + + if ((dma_base->prot & prot) != prot) + return -EPERM; + + for (i = 1; i < npage; i++) { + iova = user_pfn[i] << PAGE_SHIFT; + + if (iova >= dma_base->iova + dma_base->size || + iova + PAGE_SIZE <= dma_base->iova) + break; + + vpfn = vfio_iova_get_vfio_pfn(dma_base, iova); + if (vpfn) { + vfio_iova_put_vfio_pfn(dma_base, vpfn); + break; + } + + if (user_pfn[i] != user_pfn[0] + i) + break; + } + return i; +} + static int vfio_iommu_type1_pin_pages(void *iommu_data, struct iommu_group *iommu_group, unsigned long *user_pfn, @@ -637,9 +837,9 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, struct vfio_iommu *iommu = iommu_data; struct vfio_group *group; int i, j, ret; - unsigned long remote_vaddr; struct vfio_dma *dma; bool do_accounting; + int contiguous_npage; if (!iommu || !user_pfn || !phys_pfn) return -EINVAL; @@ -663,7 +863,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, */ do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu); - for (i = 0; i < npage; i++) { + for (i = 0; i < npage; i += contiguous_npage) { dma_addr_t iova; struct vfio_pfn *vpfn; @@ -682,31 +882,18 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, vpfn = vfio_iova_get_vfio_pfn(dma, iova); if (vpfn) { phys_pfn[i] = vpfn->pfn; - continue; - } - - remote_vaddr = dma->vaddr + (iova - dma->iova); - ret = vfio_pin_page_external(dma, remote_vaddr, &phys_pfn[i], - do_accounting); - if (ret) - goto pin_unwind; - - ret = vfio_add_to_pfn_list(dma, iova, phys_pfn[i]); - if (ret) { - if (put_pfn(phys_pfn[i], dma->prot) && do_accounting) - vfio_lock_acct(dma, -1, true); - goto pin_unwind; - } - - if (iommu->dirty_page_tracking) { - unsigned long pgshift = __ffs(iommu->pgsize_bitmap); - - /* - * Bitmap populated with the smallest supported page - * size - */ - bitmap_set(dma->bitmap, - (iova - dma->iova) >> pgshift, 1); + contiguous_npage = 1; + } else { + ret = vfio_iommu_type1_get_contiguous_pages_length(iommu, + &user_pfn[i], npage - i, prot); + if (ret < 0) + goto pin_unwind; + + ret = vfio_iommu_type1_pin_contiguous_pages(iommu, + dma, &user_pfn[i], ret, &phys_pfn[i], do_accounting); + if (ret < 0) + goto pin_unwind; + contiguous_npage = ret; } } ret = i; -- 2.19.1