Combine contiguous regions of PAGE_SIZE pages into single scatter list entries while adding to the scatter table. This minimizes the number of the entries in the scatter list and reduces the DMA mapping overhead, particularly with the IOMMU. This patch should be applied post https://patchwork.kernel.org/cover/10857607/ Suggested-by: Jason Gunthorpe <jgg@xxxxxxxx> Reviewed-by: Michael J. Ruhl <michael.j.ruhl@xxxxxxxxx> Signed-off-by: Shiraz Saleem <shiraz.saleem@xxxxxxxxx> --- This patch is decoupled from series https://patchwork.kernel.org/patch/10819993/ as it can be standalone. Changes since v1: * made ib_umem_add_sg_table() static * simplified bool variable 'first' initialization drivers/infiniband/core/umem.c | 92 ++++++++++++++++++++++++++++++++++-------- include/rdma/ib_umem.h | 17 ++++---- 2 files changed, 84 insertions(+), 25 deletions(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index fe55515..0b71821 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -39,25 +39,22 @@ #include <linux/export.h> #include <linux/hugetlb.h> #include <linux/slab.h> +#include <linux/pagemap.h> #include <rdma/ib_umem_odp.h> #include "uverbs.h" - static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty) { - struct scatterlist *sg; + struct sg_page_iter sg_iter; struct page *page; - int i; if (umem->nmap > 0) - ib_dma_unmap_sg(dev, umem->sg_head.sgl, - umem->npages, + ib_dma_unmap_sg(dev, umem->sg_head.sgl, umem->sg_nents, DMA_BIDIRECTIONAL); - for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) { - - page = sg_page(sg); + for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) { + page = sg_page_iter_page(&sg_iter); if (!PageDirty(page) && umem->writable && dirty) set_page_dirty_lock(page); put_page(page); @@ -66,6 +63,67 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d sg_free_table(&umem->sg_head); } +/* ib_umem_add_sg_table - Add N contiguous pages to scatter table + * + * sg: current scatterlist entry + * page_list: array of npage struct page pointers + * npages: number of pages in page_list + * nents: [out] number of entries in the scatterlist + * + * Return new end of scatterlist + */ +static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg, + struct page **page_list, + unsigned long npages, + int *nents) +{ + unsigned long first_pfn; + unsigned long i = 0; + bool update_cur_sg = false; + bool first = !sg_page(sg); + + /* Check if new page_list is contiguous with end of previous page_list + */ + if (!first && + (page_to_pfn(sg_page(sg)) + (sg->length >> PAGE_SHIFT) == + page_to_pfn(page_list[0]))) + update_cur_sg = true; + + while (i != npages) { + unsigned long len; + struct page *first_page = page_list[i]; + + first_pfn = page_to_pfn(first_page); + + /* Compute the number of contiguous pages we have starting + * at i + */ + for (len = 0; i != npages && + first_pfn + len == page_to_pfn(page_list[i]); + i++, len++) + ; + + /* Squash N contiguous pages from page_list into current sge */ + if (update_cur_sg) { + sg->length += len << PAGE_SHIFT; + sg_set_page(sg, sg_page(sg), sg->length, 0); + update_cur_sg = false; + continue; + } + + /* Squash N contiguous pages into next sge or first sge */ + if (!first) + sg = sg_next(sg); + + (*nents)++; + sg->length = len << PAGE_SHIFT; + sg_set_page(sg, first_page, sg->length, 0); + first = false; + } + + return sg; +} + /** * ib_umem_get - Pin and DMA map userspace memory. * @@ -93,7 +151,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, int ret; int i; unsigned long dma_attrs = 0; - struct scatterlist *sg, *sg_list_start; + struct scatterlist *sg; unsigned int gup_flags = FOLL_WRITE; if (!udata) @@ -185,7 +243,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, if (!umem->writable) gup_flags |= FOLL_FORCE; - sg_list_start = umem->sg_head.sgl; + sg = umem->sg_head.sgl; while (npages) { down_read(&mm->mmap_sem); @@ -202,24 +260,24 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, cur_base += ret * PAGE_SIZE; npages -= ret; + sg = ib_umem_add_sg_table(sg, page_list, ret, &umem->sg_nents); + /* Continue to hold the mmap_sem as vma_list access * needs to be protected. */ - for_each_sg(sg_list_start, sg, ret, i) { + for (i = 0; i < ret && umem->hugetlb; i++) { if (vma_list && !is_vm_hugetlb_page(vma_list[i])) umem->hugetlb = 0; - - sg_set_page(sg, page_list[i], PAGE_SIZE, 0); } - up_read(&mm->mmap_sem); - /* preparing for next loop */ - sg_list_start = sg; + up_read(&mm->mmap_sem); } + sg_mark_end(sg); + umem->nmap = ib_dma_map_sg_attrs(context->device, umem->sg_head.sgl, - umem->npages, + umem->sg_nents, DMA_BIDIRECTIONAL, dma_attrs); diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 73af05d..18407a4 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -42,18 +42,19 @@ struct ib_umem_odp; struct ib_umem { - struct ib_ucontext *context; - struct mm_struct *owning_mm; - size_t length; - unsigned long address; - int page_shift; + struct ib_ucontext *context; + struct mm_struct *owning_mm; + size_t length; + unsigned long address; + int page_shift; u32 writable : 1; u32 hugetlb : 1; u32 is_odp : 1; - struct work_struct work; + struct work_struct work; struct sg_table sg_head; - int nmap; - int npages; + int sg_nents; + int nmap; + int npages; }; /* Returns the offset of the umem start relative to the first page. */ -- 1.8.3.1