Re: [Suspected Spam] [PATCH v3 rdma-next] RDMA/umem: Combine contiguous PAGE_SIZE regions in SGEs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 3/28/19 9:54 AM, Shiraz Saleem wrote:
> Combine contiguous regions of PAGE_SIZE pages
> into single scatter list entries while adding
> to the scatter table. This minimizes the number
> of the entries in the scatter list and reduces
> the DMA mapping overhead, particularly with the
> IOMMU.
> 
> Also, purge npages in struct ib_umem as we now
> DMA map the umem SGL with sg_nents, and update
> remaining non ODP drivers that use umem->npages.
> Move npages tracking to ib_umem_odp as ODP drivers
> still need it.
> 
> This patch should be applied post
> https://patchwork.kernel.org/cover/10857607/

The vmw_pvrdma changes look fine.

Acked-by: Adit Ranadive <aditr@xxxxxxxxxx>

> 
> Suggested-by: Jason Gunthorpe <jgg@xxxxxxxx>
> Reviewed-by: Michael J. Ruhl <michael.j.ruhl@xxxxxxxxx>
> Reviewed-by: Ira Weiny <ira.weiny@xxxxxxxxx>
> Signed-off-by: Shiraz Saleem <shiraz.saleem@xxxxxxxxx>
> ---
> This patch is decoupled from series https://patchwork.kernel.org/patch/10819993/
> as it can be standalone.
> Changes since v1:
> * made ib_umem_add_sg_table() static
> * simplified bool variable 'first' initialization
> 
> v2->v3:
> * Added a comment on calculation for checking page_list
> contiguity in ib_umem_add_sg_table()
> * Moved iterator to body of for loop in ib_umem_add_sg_table()
> * Removed explicit update to sg->length prior to calling sg_set_page()
> * Removed white space clean-up in ib_umem struct
> * Purge npages from ib_umem struct and updated remaining non ODP
> drivers to use ib_umem_num_pages()
> * Add npages tracking in ib_umem_odp as ODP drivers still need it.
> 
>  drivers/infiniband/core/umem.c               | 96 ++++++++++++++++++++++------
>  drivers/infiniband/core/umem_odp.c           |  4 +-
>  drivers/infiniband/hw/mlx5/odp.c             |  2 +-
>  drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c | 11 ++--
>  include/rdma/ib_umem.h                       |  2 +-
>  include/rdma/ib_umem_odp.h                   |  1 +
>  6 files changed, 87 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
> index fe55515..17ebad9 100644
> --- a/drivers/infiniband/core/umem.c
> +++ b/drivers/infiniband/core/umem.c
> @@ -39,25 +39,22 @@
>  #include <linux/export.h>
>  #include <linux/hugetlb.h>
>  #include <linux/slab.h>
> +#include <linux/pagemap.h>
>  #include <rdma/ib_umem_odp.h>
>  
>  #include "uverbs.h"
>  
> -
>  static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
>  {
> -	struct scatterlist *sg;
> +	struct sg_page_iter sg_iter;
>  	struct page *page;
> -	int i;
>  
>  	if (umem->nmap > 0)
> -		ib_dma_unmap_sg(dev, umem->sg_head.sgl,
> -				umem->npages,
> +		ib_dma_unmap_sg(dev, umem->sg_head.sgl, umem->sg_nents,
>  				DMA_BIDIRECTIONAL);
>  
> -	for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
> -
> -		page = sg_page(sg);
> +	for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) {
> +		page = sg_page_iter_page(&sg_iter);
>  		if (!PageDirty(page) && umem->writable && dirty)
>  			set_page_dirty_lock(page);
>  		put_page(page);
> @@ -66,6 +63,66 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
>  	sg_free_table(&umem->sg_head);
>  }
>  
> +/* ib_umem_add_sg_table - Add N contiguous pages to scatter table
> + *
> + * sg: current scatterlist entry
> + * page_list: array of npage struct page pointers
> + * npages: number of pages in page_list
> + * nents: [out] number of entries in the scatterlist
> + *
> + * Return new end of scatterlist
> + */
> +static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg,
> +						struct page **page_list,
> +						unsigned long npages,
> +						int *nents)
> +{
> +	unsigned long first_pfn;
> +	unsigned long i = 0;
> +	bool update_cur_sg = false;
> +	bool first = !sg_page(sg);
> +
> +	/* Check if new page_list is contiguous with end of previous page_list.
> +	 * sg->length here is a multiple of PAGE_SIZE and sg->offset is 0.
> +	 */
> +	if (!first && (page_to_pfn(sg_page(sg)) + (sg->length >> PAGE_SHIFT) ==
> +		       page_to_pfn(page_list[0])))
> +		update_cur_sg = true;
> +
> +	while (i != npages) {
> +		unsigned long len;
> +		struct page *first_page = page_list[i];
> +
> +		first_pfn = page_to_pfn(first_page);
> +
> +		/* Compute the number of contiguous pages we have starting
> +		 * at i
> +		 */
> +		for (len = 0; i != npages &&
> +			      first_pfn + len == page_to_pfn(page_list[i]);
> +		     len++)
> +			i++;
> +
> +		/* Squash N contiguous pages from page_list into current sge */
> +		if (update_cur_sg) {
> +			sg_set_page(sg, sg_page(sg),
> +				    sg->length + (len << PAGE_SHIFT), 0);
> +			update_cur_sg = false;
> +			continue;
> +		}
> +
> +		/* Squash N contiguous pages into next sge or first sge */
> +		if (!first)
> +			sg = sg_next(sg);
> +
> +		(*nents)++;
> +		sg_set_page(sg, first_page, len << PAGE_SHIFT, 0);
> +		first = false;
> +	}
> +
> +	return sg;
> +}
> +
>  /**
>   * ib_umem_get - Pin and DMA map userspace memory.
>   *
> @@ -93,7 +150,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
>  	int ret;
>  	int i;
>  	unsigned long dma_attrs = 0;
> -	struct scatterlist *sg, *sg_list_start;
> +	struct scatterlist *sg;
>  	unsigned int gup_flags = FOLL_WRITE;
>  
>  	if (!udata)
> @@ -185,7 +242,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
>  	if (!umem->writable)
>  		gup_flags |= FOLL_FORCE;
>  
> -	sg_list_start = umem->sg_head.sgl;
> +	sg = umem->sg_head.sgl;
>  
>  	while (npages) {
>  		down_read(&mm->mmap_sem);
> @@ -198,28 +255,27 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
>  			goto umem_release;
>  		}
>  
> -		umem->npages += ret;
>  		cur_base += ret * PAGE_SIZE;
>  		npages   -= ret;
>  
> +		sg = ib_umem_add_sg_table(sg, page_list, ret, &umem->sg_nents);
> +
>  		/* Continue to hold the mmap_sem as vma_list access
>  		 * needs to be protected.
>  		 */
> -		for_each_sg(sg_list_start, sg, ret, i) {
> +		for (i = 0; i < ret && umem->hugetlb; i++) {
>  			if (vma_list && !is_vm_hugetlb_page(vma_list[i]))
>  				umem->hugetlb = 0;
> -
> -			sg_set_page(sg, page_list[i], PAGE_SIZE, 0);
>  		}
> -		up_read(&mm->mmap_sem);
>  
> -		/* preparing for next loop */
> -		sg_list_start = sg;
> +		up_read(&mm->mmap_sem);
>  	}
>  
> +	sg_mark_end(sg);
> +
>  	umem->nmap = ib_dma_map_sg_attrs(context->device,
>  				  umem->sg_head.sgl,
> -				  umem->npages,
> +				  umem->sg_nents,
>  				  DMA_BIDIRECTIONAL,
>  				  dma_attrs);
>  
> @@ -315,8 +371,8 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
>  		return -EINVAL;
>  	}
>  
> -	ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->npages, dst, length,
> -				 offset + ib_umem_offset(umem));
> +	ret = sg_pcopy_to_buffer(umem->sg_head.sgl, ib_umem_num_pages(umem),
> +				 dst, length, offset + ib_umem_offset(umem));
>  
>  	if (ret < 0)
>  		return ret;
> diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
> index e6ec79a..e2d87c4 100644
> --- a/drivers/infiniband/core/umem_odp.c
> +++ b/drivers/infiniband/core/umem_odp.c
> @@ -527,7 +527,7 @@ static int ib_umem_odp_map_dma_single_page(
>  		}
>  		umem_odp->dma_list[page_index] = dma_addr | access_mask;
>  		umem_odp->page_list[page_index] = page;
> -		umem->npages++;
> +		umem_odp->npages++;
>  		stored_page = 1;
>  	} else if (umem_odp->page_list[page_index] == page) {
>  		umem_odp->dma_list[page_index] |= access_mask;
> @@ -759,7 +759,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
>  				put_page(page);
>  			umem_odp->page_list[idx] = NULL;
>  			umem_odp->dma_list[idx] = 0;
> -			umem->npages--;
> +			umem_odp->npages--;
>  		}
>  	}
>  	mutex_unlock(&umem_odp->umem_mutex);
> diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
> index c20bfc4..0c0dfa4 100644
> --- a/drivers/infiniband/hw/mlx5/odp.c
> +++ b/drivers/infiniband/hw/mlx5/odp.c
> @@ -288,7 +288,7 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
>  
>  	ib_umem_odp_unmap_dma_pages(umem_odp, start, end);
>  
> -	if (unlikely(!umem->npages && mr->parent &&
> +	if (unlikely(!umem_odp->npages && mr->parent &&
>  		     !umem_odp->dying)) {
>  		WRITE_ONCE(umem_odp->dying, 1);
>  		atomic_inc(&mr->parent->num_leaf_free);
> diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
> index a85884e..83167fa 100644
> --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
> +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
> @@ -119,7 +119,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
>  	union pvrdma_cmd_resp rsp;
>  	struct pvrdma_cmd_create_mr *cmd = &req.create_mr;
>  	struct pvrdma_cmd_create_mr_resp *resp = &rsp.create_mr_resp;
> -	int ret;
> +	int ret, npages;
>  
>  	if (length == 0 || length > dev->dsr->caps.max_mr_size) {
>  		dev_warn(&dev->pdev->dev, "invalid mem region length\n");
> @@ -133,9 +133,10 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
>  		return ERR_CAST(umem);
>  	}
>  
> -	if (umem->npages < 0 || umem->npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
> +	npages = ib_umem_num_pages(umem);
> +	if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
>  		dev_warn(&dev->pdev->dev, "overflow %d pages in mem region\n",
> -			 umem->npages);
> +			 npages);
>  		ret = -EINVAL;
>  		goto err_umem;
>  	}
> @@ -150,7 +151,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
>  	mr->mmr.size = length;
>  	mr->umem = umem;
>  
> -	ret = pvrdma_page_dir_init(dev, &mr->pdir, umem->npages, false);
> +	ret = pvrdma_page_dir_init(dev, &mr->pdir, npages, false);
>  	if (ret) {
>  		dev_warn(&dev->pdev->dev,
>  			 "could not allocate page directory\n");
> @@ -167,7 +168,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
>  	cmd->length = length;
>  	cmd->pd_handle = to_vpd(pd)->pd_handle;
>  	cmd->access_flags = access_flags;
> -	cmd->nchunks = umem->npages;
> +	cmd->nchunks = npages;
>  	cmd->pdir_dma = mr->pdir.dir_dma;
>  
>  	ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_MR_RESP);
> diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
> index 73af05d..ccc44c0 100644
> --- a/include/rdma/ib_umem.h
> +++ b/include/rdma/ib_umem.h
> @@ -53,7 +53,7 @@ struct ib_umem {
>  	struct work_struct	work;
>  	struct sg_table sg_head;
>  	int             nmap;
> -	int             npages;
> +	int             sg_nents;
>  };
>  
>  /* Returns the offset of the umem start relative to the first page. */
> diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
> index dadc96d..eeec4e5 100644
> --- a/include/rdma/ib_umem_odp.h
> +++ b/include/rdma/ib_umem_odp.h
> @@ -69,6 +69,7 @@ struct ib_umem_odp {
>  
>  	int notifiers_seq;
>  	int notifiers_count;
> +	int npages;
>  
>  	/* Tree tracking */
>  	struct umem_odp_node	interval_tree;
> 





[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux