On Tue, Apr 02, 2019 at 02:52:52PM -0500, Shiraz Saleem wrote: > Combine contiguous regions of PAGE_SIZE pages > into single scatter list entries while adding > to the scatter table. This minimizes the number > of the entries in the scatter list and reduces > the DMA mapping overhead, particularly with the > IOMMU. > > Set default max_seg_size in core for IB devices > to 2G and do not combine if we exceed this limit. > > Also, purge npages in struct ib_umem as we now > DMA map the umem SGL with sg_nents, and update > remaining non ODP drivers that use umem->npages. > Move npages tracking to ib_umem_odp as ODP drivers > still need it. > > This patch should be applied post > https://patchwork.kernel.org/cover/10857607/ > > Suggested-by: Jason Gunthorpe <jgg@xxxxxxxx> > Reviewed-by: Michael J. Ruhl <michael.j.ruhl@xxxxxxxxx> > Reviewed-by: Ira Weiny <ira.weiny@xxxxxxxxx> > Acked-by: Adit Ranadive <aditr@xxxxxxxxxx> > Signed-off-by: Shiraz Saleem <shiraz.saleem@xxxxxxxxx> > --- > This patch is decoupled from series https://patchwork.kernel.org/patch/10819993/ > as it can be standalone. > Changes since v1: > * made ib_umem_add_sg_table() static > * simplified bool variable 'first' initialization > > v2->v3: > * Added a comment on calculation for checking page_list > contiguity in ib_umem_add_sg_table() > * Moved iterator to body of for loop in ib_umem_add_sg_table() > * Removed explicit update to sg->length prior to calling sg_set_page() > * Removed white space clean-up in ib_umem struct > * Purge npages from ib_umem struct and updated remaining non ODP > drivers to use ib_umem_num_pages() > * Add npages tracking in ib_umem_odp as ODP drivers still need it. > > v3->v4: > *Set default max_seg_size for all IB devices to 2G and > avoid combining if we exceed this limit. > > v4->v5: > *updated function header for ib_umem_add_sg_table() > *updated sg_nents to unsigned int > *fixed max seg size limits check in ib_umem_add_sg_table() > > drivers/infiniband/core/device.c | 3 + > drivers/infiniband/core/umem.c | 101 +++++++++++++++++++++------ > drivers/infiniband/core/umem_odp.c | 4 +- > drivers/infiniband/hw/mlx5/odp.c | 2 +- > drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c | 11 +-- > include/rdma/ib_umem.h | 2 +- > include/rdma/ib_umem_odp.h | 1 + > 7 files changed, 95 insertions(+), 29 deletions(-) > > diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c > index 7421ec4..5a1bdb9 100644 > --- a/drivers/infiniband/core/device.c > +++ b/drivers/infiniband/core/device.c > @@ -711,6 +711,9 @@ static void setup_dma_device(struct ib_device *device) > WARN_ON_ONCE(!parent); > device->dma_device = parent; > } > + /* Setup default max segment size for all IB devices */ > + dma_set_max_seg_size(device->dma_device, SZ_2G); > + > } > > /* > diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c > index 89a7d57..d31f5e3 100644 > --- a/drivers/infiniband/core/umem.c > +++ b/drivers/infiniband/core/umem.c > @@ -39,25 +39,22 @@ > #include <linux/export.h> > #include <linux/hugetlb.h> > #include <linux/slab.h> > +#include <linux/pagemap.h> > #include <rdma/ib_umem_odp.h> > > #include "uverbs.h" > > - > static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty) > { > - struct scatterlist *sg; > + struct sg_page_iter sg_iter; > struct page *page; > - int i; > > if (umem->nmap > 0) > - ib_dma_unmap_sg(dev, umem->sg_head.sgl, > - umem->npages, > + ib_dma_unmap_sg(dev, umem->sg_head.sgl, umem->sg_nents, > DMA_BIDIRECTIONAL); > > - for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) { > - > - page = sg_page(sg); > + for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) { > + page = sg_page_iter_page(&sg_iter); > if (!PageDirty(page) && umem->writable && dirty) > set_page_dirty_lock(page); > put_page(page); > @@ -66,6 +63,69 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d > sg_free_table(&umem->sg_head); > } > > +/* ib_umem_add_sg_table - Add N contiguous pages to scatter table > + * > + * sg: current scatterlist entry > + * page_list: array of npage struct page pointers > + * npages: number of pages in page_list > + * max_seg_sz: maximum segment size in bytes > + * nents: [out] number of entries in the scatterlist > + * > + * Return new end of scatterlist > + */ > +static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg, > + struct page **page_list, > + unsigned long npages, > + unsigned int max_seg_sz, > + int *nents) > +{ > + unsigned long first_pfn; > + unsigned long i = 0; > + bool update_cur_sg = false; > + bool first = !sg_page(sg); > + > + /* Check if new page_list is contiguous with end of previous page_list. > + * sg->length here is a multiple of PAGE_SIZE and sg->offset is 0. > + */ > + if (!first && (page_to_pfn(sg_page(sg)) + (sg->length >> PAGE_SHIFT) == > + page_to_pfn(page_list[0]))) > + update_cur_sg = true; > + > + while (i != npages) { > + unsigned long len; > + struct page *first_page = page_list[i]; > + > + first_pfn = page_to_pfn(first_page); > + > + /* Compute the number of contiguous pages we have starting > + * at i > + */ > + for (len = 0; i != npages && > + first_pfn + len == page_to_pfn(page_list[i]); > + len++) > + i++; > + > + /* Squash N contiguous pages from page_list into current sge */ > + if (update_cur_sg && > + ((max_seg_sz - sg->length) >= (len << PAGE_SHIFT))) { > + sg_set_page(sg, sg_page(sg), > + sg->length + (len << PAGE_SHIFT), 0); > + update_cur_sg = false; > + continue; > + } > + > + /* Squash N contiguous pages into next sge or first sge */ > + if (!first) > + sg = sg_next(sg); > + > + (*nents)++; > + sg_set_page(sg, first_page, len << PAGE_SHIFT, 0); > + first = false; > + } > + > + return sg; > +} > + > /** > * ib_umem_get - Pin and DMA map userspace memory. > * > @@ -93,7 +153,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, > int ret; > int i; > unsigned long dma_attrs = 0; > - struct scatterlist *sg, *sg_list_start; > + struct scatterlist *sg; > unsigned int gup_flags = FOLL_WRITE; > > if (!udata) > @@ -190,7 +250,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, > if (!umem->writable) > gup_flags |= FOLL_FORCE; > > - sg_list_start = umem->sg_head.sgl; > + sg = umem->sg_head.sgl; > > while (npages) { > down_read(&mm->mmap_sem); > @@ -203,28 +263,29 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, > goto umem_release; > } > > - umem->npages += ret; > cur_base += ret * PAGE_SIZE; > npages -= ret; > > + sg = ib_umem_add_sg_table(sg, page_list, ret, > + dma_get_max_seg_size(context->device->dma_device), > + &umem->sg_nents); > + > /* Continue to hold the mmap_sem as vma_list access > * needs to be protected. > */ > - for_each_sg(sg_list_start, sg, ret, i) { > + for (i = 0; i < ret && umem->hugetlb; i++) { > if (vma_list && !is_vm_hugetlb_page(vma_list[i])) > umem->hugetlb = 0; > - > - sg_set_page(sg, page_list[i], PAGE_SIZE, 0); > } I was under wrong impression that we removed hugetlb flag. Is it still needed? And more general question, how was 2G limit chosen? Thanks
Attachment:
signature.asc
Description: PGP signature