Re: [PATCH rdma-next v1 1/6] RDMA/umem: Combine contiguous PAGE_SIZE regions in SGEs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Feb 19, 2019 at 08:57:40AM -0600, Shiraz Saleem wrote:
> Combine contiguous regions of PAGE_SIZE pages
> into single scatter list entries while adding
> to the scatter table. This minimizes the number
> of the entries in the scatter list and reduces
> the DMA mapping overhead, particularly with the
> IOMMU.
> 
> Suggested-by: Jason Gunthorpe <jgg@xxxxxxxx>
> Reviewed-by: Michael J. Ruhl <michael.j.ruhl@xxxxxxxxx>
> Signed-off-by: Shiraz Saleem <shiraz.saleem@xxxxxxxxx>
> ---
>  drivers/infiniband/core/umem.c | 92 ++++++++++++++++++++++++++++++++++--------
>  include/rdma/ib_umem.h         | 17 ++++----
>  2 files changed, 84 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
> index b69d3ef..e8611b7 100644
> --- a/drivers/infiniband/core/umem.c
> +++ b/drivers/infiniband/core/umem.c
> @@ -39,25 +39,22 @@
>  #include <linux/export.h>
>  #include <linux/hugetlb.h>
>  #include <linux/slab.h>
> +#include <linux/pagemap.h>
>  #include <rdma/ib_umem_odp.h>
>  
>  #include "uverbs.h"
>  
> -
>  static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
>  {
> -	struct scatterlist *sg;
> +	struct sg_page_iter sg_iter;
>  	struct page *page;
> -	int i;
>  
>  	if (umem->nmap > 0)
> -		ib_dma_unmap_sg(dev, umem->sg_head.sgl,
> -				umem->npages,
> +		ib_dma_unmap_sg(dev, umem->sg_head.sgl, umem->sg_nents,
>  				DMA_BIDIRECTIONAL);
>  
> -	for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
> -
> -		page = sg_page(sg);
> +	for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) {
> +		page = sg_page_iter_page(&sg_iter);
>  		if (!PageDirty(page) && umem->writable && dirty)
>  			set_page_dirty_lock(page);
>  		put_page(page);
> @@ -66,6 +63,67 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
>  	sg_free_table(&umem->sg_head);
>  }
>  
> +/* ib_umem_add_sg_table - Add N contiguous pages to scatter table
> + *
> + * sg: current scatterlist entry
> + * page_list: array of npage struct page pointers
> + * npages: number of pages in page_list
> + * nents: [out] number of entries in the scatterlist
> + *
> + * Return new end of scatterlist
> + */
> +struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg,
> +					 struct page **page_list,
> +					 unsigned long npages,
> +					 int *nents)
> +{
> +	unsigned long first_pfn;
> +	unsigned long i = 0;
> +	bool update_cur_sg = false;
> +	bool first = !sg_page(sg) ? true : false;
> +
> +	/* Check if new page_list is contiguous with end of previous page_list
> +	 */
> +	if (!first &&
> +	    (page_to_pfn(sg_page(sg)) + (sg->length >> PAGE_SHIFT) ==
> +	     page_to_pfn(page_list[0])))
> +		update_cur_sg = true;
> +
> +	while (i != npages) {
> +		unsigned long len;
> +		struct page *first_page = page_list[i];
> +
> +		first_pfn = page_to_pfn(first_page);
> +
> +		/* Compute the number of contiguous pages we have starting
> +		 * at i
> +		 */
> +		for (len = 0; i != npages &&
> +			      first_pfn + len == page_to_pfn(page_list[i]);
> +		     i++, len++)
> +		;
> +
> +		/* Squash N contiguous pages from page_list into current sge */
> +		if (update_cur_sg) {
> +			sg->length += len << PAGE_SHIFT;
> +			sg_set_page(sg, sg_page(sg), sg->length, 0);
> +			update_cur_sg = false;
> +			continue;
> +		}
> +
> +		/* Squash N contiguous pages into next sge or first sge */
> +		if (!first)
> +			sg = sg_next(sg);
> +
> +		(*nents)++;
> +		sg->length = len << PAGE_SHIFT;
> +		sg_set_page(sg, first_page, sg->length, 0);
> +		first = false;
> +	}
> +
> +	return sg;
> +}
> +
>  /**
>   * ib_umem_get - Pin and DMA map userspace memory.
>   *
> @@ -93,7 +151,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
>  	int ret;
>  	int i;
>  	unsigned long dma_attrs = 0;
> -	struct scatterlist *sg, *sg_list_start;
> +	struct scatterlist *sg;
>  	unsigned int gup_flags = FOLL_WRITE;
>  
>  	context = rdma_get_ucontext(udata);
> @@ -181,7 +239,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
>  	if (!umem->writable)
>  		gup_flags |= FOLL_FORCE;
>  
> -	sg_list_start = umem->sg_head.sgl;
> +	sg = umem->sg_head.sgl;
>  
>  	while (npages) {
>  		down_read(&mm->mmap_sem);
> @@ -198,24 +256,24 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
>  		cur_base += ret * PAGE_SIZE;
>  		npages   -= ret;
>  
> +		sg = ib_umem_add_sg_table(sg, page_list, ret, &umem->sg_nents);
> +
>  		/* Continue to hold the mmap_sem as vma_list access
>  		 * needs to be protected.
>  		 */
> -		for_each_sg(sg_list_start, sg, ret, i) {
> +		for (i = 0; i < ret && umem->hugetlb; i++) {
>  			if (vma_list && !is_vm_hugetlb_page(vma_list[i]))
>  				umem->hugetlb = 0;
> -
> -			sg_set_page(sg, page_list[i], PAGE_SIZE, 0);
>  		}
> -		up_read(&mm->mmap_sem);
>  
> -		/* preparing for next loop */
> -		sg_list_start = sg;
> +		up_read(&mm->mmap_sem);
>  	}
>  
> +	sg_mark_end(sg);
> +
>  	umem->nmap = ib_dma_map_sg_attrs(context->device,
>  				  umem->sg_head.sgl,
> -				  umem->npages,
> +				  umem->sg_nents,
>  				  DMA_BIDIRECTIONAL,
>  				  dma_attrs);
>  
> diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
> index 73af05d..18407a4 100644
> --- a/include/rdma/ib_umem.h
> +++ b/include/rdma/ib_umem.h
> @@ -42,18 +42,19 @@
>  struct ib_umem_odp;
>  
>  struct ib_umem {
> -	struct ib_ucontext     *context;
> -	struct mm_struct       *owning_mm;
> -	size_t			length;
> -	unsigned long		address;
> -	int			page_shift;
> +	struct ib_ucontext *context;
> +	struct mm_struct *owning_mm;
> +	size_t length;
> +	unsigned long address;
> +	int page_shift;
>

NIT: Did anything change here?

>  	u32 writable : 1;
>  	u32 hugetlb : 1;
>  	u32 is_odp : 1;
> -	struct work_struct	work;
> +	struct work_struct work;
>  	struct sg_table sg_head;
> -	int             nmap;
> -	int             npages;
> +	int sg_nents;


Why can't we use sg_head.nents instead of tracking this in umem?

FWIW I don't see where sg_table.nents is used anywhere...  That is odd to me.

Ira


> +	int nmap;
> +	int npages;
>  };
>  
>  /* Returns the offset of the umem start relative to the first page. */
> -- 
> 1.8.3.1
> 



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux