Re: [PATCH v5 rdma-next] RDMA/umem: Combine contiguous PAGE_SIZE regions in SGEs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Apr 02, 2019 at 02:52:52PM -0500, Shiraz Saleem wrote:
> Combine contiguous regions of PAGE_SIZE pages
> into single scatter list entries while adding
> to the scatter table. This minimizes the number
> of the entries in the scatter list and reduces
> the DMA mapping overhead, particularly with the
> IOMMU.
>
> Set default max_seg_size in core for IB devices
> to 2G and do not combine if we exceed this limit.
>
> Also, purge npages in struct ib_umem as we now
> DMA map the umem SGL with sg_nents, and update
> remaining non ODP drivers that use umem->npages.
> Move npages tracking to ib_umem_odp as ODP drivers
> still need it.
>
> This patch should be applied post
> https://patchwork.kernel.org/cover/10857607/
>
> Suggested-by: Jason Gunthorpe <jgg@xxxxxxxx>
> Reviewed-by: Michael J. Ruhl <michael.j.ruhl@xxxxxxxxx>
> Reviewed-by: Ira Weiny <ira.weiny@xxxxxxxxx>
> Acked-by: Adit Ranadive <aditr@xxxxxxxxxx>
> Signed-off-by: Shiraz Saleem <shiraz.saleem@xxxxxxxxx>
> ---
> This patch is decoupled from series https://patchwork.kernel.org/patch/10819993/
> as it can be standalone.
> Changes since v1:
> * made ib_umem_add_sg_table() static
> * simplified bool variable 'first' initialization
>
> v2->v3:
> * Added a comment on calculation for checking page_list
> contiguity in ib_umem_add_sg_table()
> * Moved iterator to body of for loop in ib_umem_add_sg_table()
> * Removed explicit update to sg->length prior to calling sg_set_page()
> * Removed white space clean-up in ib_umem struct
> * Purge npages from ib_umem struct and updated remaining non ODP
> drivers to use ib_umem_num_pages()
> * Add npages tracking in ib_umem_odp as ODP drivers still need it.
>
> v3->v4:
> *Set default max_seg_size for all IB devices to 2G and
> avoid combining if we exceed this limit.
>
> v4->v5:
> *updated function header for ib_umem_add_sg_table()
> *updated sg_nents to unsigned int
> *fixed max seg size limits check in ib_umem_add_sg_table()
>
>  drivers/infiniband/core/device.c             |   3 +
>  drivers/infiniband/core/umem.c               | 101 +++++++++++++++++++++------
>  drivers/infiniband/core/umem_odp.c           |   4 +-
>  drivers/infiniband/hw/mlx5/odp.c             |   2 +-
>  drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c |  11 +--
>  include/rdma/ib_umem.h                       |   2 +-
>  include/rdma/ib_umem_odp.h                   |   1 +
>  7 files changed, 95 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
> index 7421ec4..5a1bdb9 100644
> --- a/drivers/infiniband/core/device.c
> +++ b/drivers/infiniband/core/device.c
> @@ -711,6 +711,9 @@ static void setup_dma_device(struct ib_device *device)
>  		WARN_ON_ONCE(!parent);
>  		device->dma_device = parent;
>  	}
> +	/* Setup default max segment size for all IB devices */
> +	dma_set_max_seg_size(device->dma_device, SZ_2G);
> +
>  }
>
>  /*
> diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
> index 89a7d57..d31f5e3 100644
> --- a/drivers/infiniband/core/umem.c
> +++ b/drivers/infiniband/core/umem.c
> @@ -39,25 +39,22 @@
>  #include <linux/export.h>
>  #include <linux/hugetlb.h>
>  #include <linux/slab.h>
> +#include <linux/pagemap.h>
>  #include <rdma/ib_umem_odp.h>
>
>  #include "uverbs.h"
>
> -
>  static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
>  {
> -	struct scatterlist *sg;
> +	struct sg_page_iter sg_iter;
>  	struct page *page;
> -	int i;
>
>  	if (umem->nmap > 0)
> -		ib_dma_unmap_sg(dev, umem->sg_head.sgl,
> -				umem->npages,
> +		ib_dma_unmap_sg(dev, umem->sg_head.sgl, umem->sg_nents,
>  				DMA_BIDIRECTIONAL);
>
> -	for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
> -
> -		page = sg_page(sg);
> +	for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) {
> +		page = sg_page_iter_page(&sg_iter);
>  		if (!PageDirty(page) && umem->writable && dirty)
>  			set_page_dirty_lock(page);
>  		put_page(page);
> @@ -66,6 +63,69 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
>  	sg_free_table(&umem->sg_head);
>  }
>
> +/* ib_umem_add_sg_table - Add N contiguous pages to scatter table
> + *
> + * sg: current scatterlist entry
> + * page_list: array of npage struct page pointers
> + * npages: number of pages in page_list
> + * max_seg_sz: maximum segment size in bytes
> + * nents: [out] number of entries in the scatterlist
> + *
> + * Return new end of scatterlist
> + */
> +static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg,
> +						struct page **page_list,
> +						unsigned long npages,
> +						unsigned int max_seg_sz,
> +						int *nents)
> +{
> +	unsigned long first_pfn;
> +	unsigned long i = 0;
> +	bool update_cur_sg = false;
> +	bool first = !sg_page(sg);
> +
> +	/* Check if new page_list is contiguous with end of previous page_list.
> +	 * sg->length here is a multiple of PAGE_SIZE and sg->offset is 0.
> +	 */
> +	if (!first && (page_to_pfn(sg_page(sg)) + (sg->length >> PAGE_SHIFT) ==
> +		       page_to_pfn(page_list[0])))
> +		update_cur_sg = true;
> +
> +	while (i != npages) {
> +		unsigned long len;
> +		struct page *first_page = page_list[i];
> +
> +		first_pfn = page_to_pfn(first_page);
> +
> +		/* Compute the number of contiguous pages we have starting
> +		 * at i
> +		 */
> +		for (len = 0; i != npages &&
> +			      first_pfn + len == page_to_pfn(page_list[i]);
> +		     len++)
> +			i++;
> +
> +		/* Squash N contiguous pages from page_list into current sge */
> +		if (update_cur_sg &&
> +		    ((max_seg_sz - sg->length) >= (len << PAGE_SHIFT))) {
> +			sg_set_page(sg, sg_page(sg),
> +				    sg->length + (len << PAGE_SHIFT), 0);
> +			update_cur_sg = false;
> +			continue;
> +		}
> +
> +		/* Squash N contiguous pages into next sge or first sge */
> +		if (!first)
> +			sg = sg_next(sg);
> +
> +		(*nents)++;
> +		sg_set_page(sg, first_page, len << PAGE_SHIFT, 0);
> +		first = false;
> +	}
> +
> +	return sg;
> +}
> +
>  /**
>   * ib_umem_get - Pin and DMA map userspace memory.
>   *
> @@ -93,7 +153,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
>  	int ret;
>  	int i;
>  	unsigned long dma_attrs = 0;
> -	struct scatterlist *sg, *sg_list_start;
> +	struct scatterlist *sg;
>  	unsigned int gup_flags = FOLL_WRITE;
>
>  	if (!udata)
> @@ -190,7 +250,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
>  	if (!umem->writable)
>  		gup_flags |= FOLL_FORCE;
>
> -	sg_list_start = umem->sg_head.sgl;
> +	sg = umem->sg_head.sgl;
>
>  	while (npages) {
>  		down_read(&mm->mmap_sem);
> @@ -203,28 +263,29 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
>  			goto umem_release;
>  		}
>
> -		umem->npages += ret;
>  		cur_base += ret * PAGE_SIZE;
>  		npages   -= ret;
>
> +		sg = ib_umem_add_sg_table(sg, page_list, ret,
> +			dma_get_max_seg_size(context->device->dma_device),
> +			&umem->sg_nents);
> +
>  		/* Continue to hold the mmap_sem as vma_list access
>  		 * needs to be protected.
>  		 */
> -		for_each_sg(sg_list_start, sg, ret, i) {
> +		for (i = 0; i < ret && umem->hugetlb; i++) {
>  			if (vma_list && !is_vm_hugetlb_page(vma_list[i]))
>  				umem->hugetlb = 0;
> -
> -			sg_set_page(sg, page_list[i], PAGE_SIZE, 0);
>  		}

I was under wrong impression that we removed hugetlb flag. Is it still needed?
And more general question, how was 2G limit chosen?

Thanks

Attachment: signature.asc
Description: PGP signature


[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux