Squash contiguous regions of PAGE_SIZE pages into a single SG entry as opposed to one SG entry per page. This reduces the SG table size and is friendliest to the IOMMU. Suggested-by: Jason Gunthorpe <jgg@xxxxxxxx> Reviewed-by: Michael J. Ruhl <michael.j.ruhl@xxxxxxxxx> Signed-off-by: Shiraz Saleem <shiraz.saleem@xxxxxxxxx> --- drivers/infiniband/core/umem.c | 66 ++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 35 deletions(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index c6144df..486d6d7 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -39,6 +39,7 @@ #include <linux/export.h> #include <linux/hugetlb.h> #include <linux/slab.h> +#include <linux/pagemap.h> #include <rdma/ib_umem_odp.h> #include "uverbs.h" @@ -46,18 +47,16 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty) { - struct scatterlist *sg; + struct sg_page_iter sg_iter; struct page *page; - int i; if (umem->nmap > 0) ib_dma_unmap_sg(dev, umem->sg_head.sgl, - umem->npages, + umem->sg_head.orig_nents, DMA_BIDIRECTIONAL); - for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) { - - page = sg_page(sg); + for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_head.orig_nents, 0) { + page = sg_page_iter_page(&sg_iter); if (!PageDirty(page) && umem->writable && dirty) set_page_dirty_lock(page); put_page(page); @@ -92,7 +91,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, int ret; int i; unsigned long dma_attrs = 0; - struct scatterlist *sg, *sg_list_start; unsigned int gup_flags = FOLL_WRITE; if (dmasync) @@ -138,7 +136,13 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, /* We assume the memory is from hugetlb until proved otherwise */ umem->hugetlb = 1; - page_list = (struct page **) __get_free_page(GFP_KERNEL); + npages = ib_umem_num_pages(umem); + if (npages == 0 || npages > UINT_MAX) { + ret = -EINVAL; + goto umem_kfree; + } + + page_list = kmalloc_array(npages, sizeof(*page_list), GFP_KERNEL); if (!page_list) { ret = -ENOMEM; goto umem_kfree; @@ -152,12 +156,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, if (!vma_list) umem->hugetlb = 0; - npages = ib_umem_num_pages(umem); - if (npages == 0 || npages > UINT_MAX) { - ret = -EINVAL; - goto out; - } - lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; down_write(&mm->mmap_sem); @@ -172,50 +170,48 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, cur_base = addr & PAGE_MASK; - ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL); - if (ret) - goto vma; - if (!umem->writable) gup_flags |= FOLL_FORCE; - sg_list_start = umem->sg_head.sgl; - while (npages) { down_read(&mm->mmap_sem); ret = get_user_pages_longterm(cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof (struct page *)), - gup_flags, page_list, vma_list); + gup_flags, page_list + umem->npages, vma_list); if (ret < 0) { up_read(&mm->mmap_sem); - goto umem_release; + release_pages(page_list, umem->npages); + goto vma; } umem->npages += ret; cur_base += ret * PAGE_SIZE; npages -= ret; - /* Continue to hold the mmap_sem as vma_list access - * needs to be protected. - */ - for_each_sg(sg_list_start, sg, ret, i) { + for(i = 0; i < ret && umem->hugetlb; i++) { if (vma_list && !is_vm_hugetlb_page(vma_list[i])) umem->hugetlb = 0; - - sg_set_page(sg, page_list[i], PAGE_SIZE, 0); } up_read(&mm->mmap_sem); + } - /* preparing for next loop */ - sg_list_start = sg; + ret = sg_alloc_table_from_pages(&umem->sg_head, + page_list, + umem->npages, + 0, + umem->npages << PAGE_SHIFT, + GFP_KERNEL); + if (ret) { + release_pages(page_list, umem->npages); + goto vma; } umem->nmap = ib_dma_map_sg_attrs(context->device, - umem->sg_head.sgl, - umem->npages, - DMA_BIDIRECTIONAL, - dma_attrs); + umem->sg_head.sgl, + umem->sg_head.orig_nents, + DMA_BIDIRECTIONAL, + dma_attrs); if (!umem->nmap) { ret = -ENOMEM; @@ -234,7 +230,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, out: if (vma_list) free_page((unsigned long) vma_list); - free_page((unsigned long) page_list); + kfree(page_list); umem_kfree: if (ret) { mmdrop(umem->owning_mm); -- 2.8.3