[PATCH v2 rdma-next] RDMA/umem: Combine contiguous PAGE_SIZE regions in SGEs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Combine contiguous regions of PAGE_SIZE pages
into single scatter list entries while adding
to the scatter table. This minimizes the number
of the entries in the scatter list and reduces
the DMA mapping overhead, particularly with the
IOMMU.

This patch should be applied post
https://patchwork.kernel.org/cover/10857607/

Suggested-by: Jason Gunthorpe <jgg@xxxxxxxx>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@xxxxxxxxx>
Signed-off-by: Shiraz Saleem <shiraz.saleem@xxxxxxxxx>
---
This patch is decoupled from series https://patchwork.kernel.org/patch/10819993/
as it can be standalone.
Changes since v1:
* made ib_umem_add_sg_table() static
* simplified bool variable 'first' initialization

 drivers/infiniband/core/umem.c | 92 ++++++++++++++++++++++++++++++++++--------
 include/rdma/ib_umem.h         | 17 ++++----
 2 files changed, 84 insertions(+), 25 deletions(-)

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index fe55515..0b71821 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -39,25 +39,22 @@
 #include <linux/export.h>
 #include <linux/hugetlb.h>
 #include <linux/slab.h>
+#include <linux/pagemap.h>
 #include <rdma/ib_umem_odp.h>
 
 #include "uverbs.h"
 
-
 static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
 {
-	struct scatterlist *sg;
+	struct sg_page_iter sg_iter;
 	struct page *page;
-	int i;
 
 	if (umem->nmap > 0)
-		ib_dma_unmap_sg(dev, umem->sg_head.sgl,
-				umem->npages,
+		ib_dma_unmap_sg(dev, umem->sg_head.sgl, umem->sg_nents,
 				DMA_BIDIRECTIONAL);
 
-	for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
-
-		page = sg_page(sg);
+	for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) {
+		page = sg_page_iter_page(&sg_iter);
 		if (!PageDirty(page) && umem->writable && dirty)
 			set_page_dirty_lock(page);
 		put_page(page);
@@ -66,6 +63,67 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
 	sg_free_table(&umem->sg_head);
 }
 
+/* ib_umem_add_sg_table - Add N contiguous pages to scatter table
+ *
+ * sg: current scatterlist entry
+ * page_list: array of npage struct page pointers
+ * npages: number of pages in page_list
+ * nents: [out] number of entries in the scatterlist
+ *
+ * Return new end of scatterlist
+ */
+static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg,
+						struct page **page_list,
+						unsigned long npages,
+						int *nents)
+{
+	unsigned long first_pfn;
+	unsigned long i = 0;
+	bool update_cur_sg = false;
+	bool first = !sg_page(sg);
+
+	/* Check if new page_list is contiguous with end of previous page_list
+	 */
+	if (!first &&
+	    (page_to_pfn(sg_page(sg)) + (sg->length >> PAGE_SHIFT) ==
+	     page_to_pfn(page_list[0])))
+		update_cur_sg = true;
+
+	while (i != npages) {
+		unsigned long len;
+		struct page *first_page = page_list[i];
+
+		first_pfn = page_to_pfn(first_page);
+
+		/* Compute the number of contiguous pages we have starting
+		 * at i
+		 */
+		for (len = 0; i != npages &&
+			      first_pfn + len == page_to_pfn(page_list[i]);
+		     i++, len++)
+		;
+
+		/* Squash N contiguous pages from page_list into current sge */
+		if (update_cur_sg) {
+			sg->length += len << PAGE_SHIFT;
+			sg_set_page(sg, sg_page(sg), sg->length, 0);
+			update_cur_sg = false;
+			continue;
+		}
+
+		/* Squash N contiguous pages into next sge or first sge */
+		if (!first)
+			sg = sg_next(sg);
+
+		(*nents)++;
+		sg->length = len << PAGE_SHIFT;
+		sg_set_page(sg, first_page, sg->length, 0);
+		first = false;
+	}
+
+	return sg;
+}
+
 /**
  * ib_umem_get - Pin and DMA map userspace memory.
  *
@@ -93,7 +151,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
 	int ret;
 	int i;
 	unsigned long dma_attrs = 0;
-	struct scatterlist *sg, *sg_list_start;
+	struct scatterlist *sg;
 	unsigned int gup_flags = FOLL_WRITE;
 
 	if (!udata)
@@ -185,7 +243,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
 	if (!umem->writable)
 		gup_flags |= FOLL_FORCE;
 
-	sg_list_start = umem->sg_head.sgl;
+	sg = umem->sg_head.sgl;
 
 	while (npages) {
 		down_read(&mm->mmap_sem);
@@ -202,24 +260,24 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
 		cur_base += ret * PAGE_SIZE;
 		npages   -= ret;
 
+		sg = ib_umem_add_sg_table(sg, page_list, ret, &umem->sg_nents);
+
 		/* Continue to hold the mmap_sem as vma_list access
 		 * needs to be protected.
 		 */
-		for_each_sg(sg_list_start, sg, ret, i) {
+		for (i = 0; i < ret && umem->hugetlb; i++) {
 			if (vma_list && !is_vm_hugetlb_page(vma_list[i]))
 				umem->hugetlb = 0;
-
-			sg_set_page(sg, page_list[i], PAGE_SIZE, 0);
 		}
-		up_read(&mm->mmap_sem);
 
-		/* preparing for next loop */
-		sg_list_start = sg;
+		up_read(&mm->mmap_sem);
 	}
 
+	sg_mark_end(sg);
+
 	umem->nmap = ib_dma_map_sg_attrs(context->device,
 				  umem->sg_head.sgl,
-				  umem->npages,
+				  umem->sg_nents,
 				  DMA_BIDIRECTIONAL,
 				  dma_attrs);
 
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 73af05d..18407a4 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -42,18 +42,19 @@
 struct ib_umem_odp;
 
 struct ib_umem {
-	struct ib_ucontext     *context;
-	struct mm_struct       *owning_mm;
-	size_t			length;
-	unsigned long		address;
-	int			page_shift;
+	struct ib_ucontext *context;
+	struct mm_struct *owning_mm;
+	size_t length;
+	unsigned long address;
+	int page_shift;
 	u32 writable : 1;
 	u32 hugetlb : 1;
 	u32 is_odp : 1;
-	struct work_struct	work;
+	struct work_struct work;
 	struct sg_table sg_head;
-	int             nmap;
-	int             npages;
+	int sg_nents;
+	int nmap;
+	int npages;
 };
 
 /* Returns the offset of the umem start relative to the first page. */
-- 
1.8.3.1




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux