Re: [PATCH v2 rdma-next] RDMA/umem: Combine contiguous PAGE_SIZE regions in SGEs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Mar 18, 2019 at 08:59:48AM -0500, Shiraz Saleem wrote:
> Combine contiguous regions of PAGE_SIZE pages
> into single scatter list entries while adding
> to the scatter table. This minimizes the number
> of the entries in the scatter list and reduces
> the DMA mapping overhead, particularly with the
> IOMMU.
> 
> This patch should be applied post
> https://patchwork.kernel.org/cover/10857607/
> 
> Suggested-by: Jason Gunthorpe <jgg@xxxxxxxx>
> Reviewed-by: Michael J. Ruhl <michael.j.ruhl@xxxxxxxxx>
> Signed-off-by: Shiraz Saleem <shiraz.saleem@xxxxxxxxx>
> This patch is decoupled from series https://patchwork.kernel.org/patch/10819993/
> as it can be standalone.
> Changes since v1:
> * made ib_umem_add_sg_table() static
> * simplified bool variable 'first' initialization
> 
>  drivers/infiniband/core/umem.c | 92 ++++++++++++++++++++++++++++++++++--------
>  include/rdma/ib_umem.h         | 17 ++++----
>  2 files changed, 84 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
> index fe55515..0b71821 100644
> +++ b/drivers/infiniband/core/umem.c
> @@ -39,25 +39,22 @@
>  #include <linux/export.h>
>  #include <linux/hugetlb.h>
>  #include <linux/slab.h>
> +#include <linux/pagemap.h>
>  #include <rdma/ib_umem_odp.h>
>  
>  #include "uverbs.h"
>  
> -
>  static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
>  {
> -	struct scatterlist *sg;
> +	struct sg_page_iter sg_iter;
>  	struct page *page;
> -	int i;
>  
>  	if (umem->nmap > 0)
> -		ib_dma_unmap_sg(dev, umem->sg_head.sgl,
> -				umem->npages,
> +		ib_dma_unmap_sg(dev, umem->sg_head.sgl, umem->sg_nents,
>  				DMA_BIDIRECTIONAL);
>  
> -	for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
> -
> -		page = sg_page(sg);
> +	for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) {
> +		page = sg_page_iter_page(&sg_iter);
>  		if (!PageDirty(page) && umem->writable && dirty)
>  			set_page_dirty_lock(page);
>  		put_page(page);
> @@ -66,6 +63,67 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
>  	sg_free_table(&umem->sg_head);
>  }
>  
> +/* ib_umem_add_sg_table - Add N contiguous pages to scatter table
> + *
> + * sg: current scatterlist entry
> + * page_list: array of npage struct page pointers
> + * npages: number of pages in page_list
> + * nents: [out] number of entries in the scatterlist
> + *
> + * Return new end of scatterlist
> + */
> +static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg,
> +						struct page **page_list,
> +						unsigned long npages,
> +						int *nents)
> +{
> +	unsigned long first_pfn;
> +	unsigned long i = 0;
> +	bool update_cur_sg = false;
> +	bool first = !sg_page(sg);
> +
> +	/* Check if new page_list is contiguous with end of previous page_list
> +	 */
> +	if (!first &&
> +	    (page_to_pfn(sg_page(sg)) + (sg->length >> PAGE_SHIFT) ==
> +	     page_to_pfn(page_list[0])))
> +		update_cur_sg = true;

Hurm, this wrong looking math is only OK because offset is always 0
and length is always a multiple of PAGE_SIZE.. Maybe a comment?

> +
> +	while (i != npages) {
> +		unsigned long len;
> +		struct page *first_page = page_list[i];
> +
> +		first_pfn = page_to_pfn(first_page);
> +
> +		/* Compute the number of contiguous pages we have starting
> +		 * at i
> +		 */
> +		for (len = 0; i != npages &&
> +			      first_pfn + len == page_to_pfn(page_list[i]);
> +		     i++, len++)
> +		;

put the i++ as the loop body to avoid an empty body..


> +
> +		/* Squash N contiguous pages from page_list into current sge */
> +		if (update_cur_sg) {
> +			sg->length += len << PAGE_SHIFT;
> +			sg_set_page(sg, sg_page(sg), sg->length, 0);

Weird to write to sg->length then call sg_set_page. How about:

   sg_set_page(sg, sg_page(sg), sg->length + (len >> PAGE_SHIFT), 0);


> +			update_cur_sg = false;
> +			continue;
> +		}
> +
> +		/* Squash N contiguous pages into next sge or first sge */
> +		if (!first)
> +			sg = sg_next(sg);
> +
> +		(*nents)++;
> +		sg->length = len << PAGE_SHIFT;
> +		sg_set_page(sg, first_page, sg->length, 0);

Ditto

>  /**
>   * ib_umem_get - Pin and DMA map userspace memory.
>   *
> @@ -93,7 +151,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
>  	int ret;
>  	int i;
>  	unsigned long dma_attrs = 0;
> -	struct scatterlist *sg, *sg_list_start;
> +	struct scatterlist *sg;
>  	unsigned int gup_flags = FOLL_WRITE;
>  
>  	if (!udata)
> @@ -185,7 +243,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
>  	if (!umem->writable)
>  		gup_flags |= FOLL_FORCE;
>  
> -	sg_list_start = umem->sg_head.sgl;
> +	sg = umem->sg_head.sgl;
>  
>  	while (npages) {
>  		down_read(&mm->mmap_sem);
> @@ -202,24 +260,24 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
>  		cur_base += ret * PAGE_SIZE;
>  		npages   -= ret;
>  
> +		sg = ib_umem_add_sg_table(sg, page_list, ret, &umem->sg_nents);
> +
>  		/* Continue to hold the mmap_sem as vma_list access
>  		 * needs to be protected.
>  		 */
> -		for_each_sg(sg_list_start, sg, ret, i) {
> +		for (i = 0; i < ret && umem->hugetlb; i++) {
>  			if (vma_list && !is_vm_hugetlb_page(vma_list[i]))
>  				umem->hugetlb = 0;
> -
> -			sg_set_page(sg, page_list[i], PAGE_SIZE, 0);
>  		}
> -		up_read(&mm->mmap_sem);
>  
> -		/* preparing for next loop */
> -		sg_list_start = sg;
> +		up_read(&mm->mmap_sem);
>  	}
>  
> +	sg_mark_end(sg);
> +
>  	umem->nmap = ib_dma_map_sg_attrs(context->device,
>  				  umem->sg_head.sgl,
> -				  umem->npages,
> +				  umem->sg_nents,
>  				  DMA_BIDIRECTIONAL,
>  				  dma_attrs);
>  
> diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
> index 73af05d..18407a4 100644
> +++ b/include/rdma/ib_umem.h
> @@ -42,18 +42,19 @@
>  struct ib_umem_odp;
>  
>  struct ib_umem {
> -	struct ib_ucontext     *context;
> -	struct mm_struct       *owning_mm;
> -	size_t			length;
> -	unsigned long		address;
> -	int			page_shift;
> +	struct ib_ucontext *context;
> +	struct mm_struct *owning_mm;
> +	size_t length;
> +	unsigned long address;
> +	int page_shift;
>  	u32 writable : 1;
>  	u32 hugetlb : 1;
>  	u32 is_odp : 1;
> -	struct work_struct	work;
> +	struct work_struct work;
>  	struct sg_table sg_head;
> -	int             nmap;
> -	int             npages;
> +	int sg_nents;
> +	int nmap;
> +	int npages;
>  };

Avoid the white space changes please

Jason



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux