[PATCH RFC 1/4] RDMA/umem: Minimize SG table entries

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Squash contiguous regions of PAGE_SIZE pages
into a single SG entry as opposed to one
SG entry per page. This reduces the SG table
size and is friendliest to the IOMMU.

Suggested-by: Jason Gunthorpe <jgg@xxxxxxxx>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@xxxxxxxxx>
Signed-off-by: Shiraz Saleem <shiraz.saleem@xxxxxxxxx>
---
 drivers/infiniband/core/umem.c | 66 ++++++++++++++++++++----------------------
 1 file changed, 31 insertions(+), 35 deletions(-)

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index c6144df..486d6d7 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -39,6 +39,7 @@
 #include <linux/export.h>
 #include <linux/hugetlb.h>
 #include <linux/slab.h>
+#include <linux/pagemap.h>
 #include <rdma/ib_umem_odp.h>
 
 #include "uverbs.h"
@@ -46,18 +47,16 @@
 
 static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
 {
-	struct scatterlist *sg;
+	struct sg_page_iter sg_iter;
 	struct page *page;
-	int i;
 
 	if (umem->nmap > 0)
 		ib_dma_unmap_sg(dev, umem->sg_head.sgl,
-				umem->npages,
+				umem->sg_head.orig_nents,
 				DMA_BIDIRECTIONAL);
 
-	for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
-
-		page = sg_page(sg);
+	for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_head.orig_nents, 0) {
+		page = sg_page_iter_page(&sg_iter);
 		if (!PageDirty(page) && umem->writable && dirty)
 			set_page_dirty_lock(page);
 		put_page(page);
@@ -92,7 +91,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 	int ret;
 	int i;
 	unsigned long dma_attrs = 0;
-	struct scatterlist *sg, *sg_list_start;
 	unsigned int gup_flags = FOLL_WRITE;
 
 	if (dmasync)
@@ -138,7 +136,13 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 	/* We assume the memory is from hugetlb until proved otherwise */
 	umem->hugetlb   = 1;
 
-	page_list = (struct page **) __get_free_page(GFP_KERNEL);
+	npages = ib_umem_num_pages(umem);
+	if (npages == 0 || npages > UINT_MAX) {
+		ret = -EINVAL;
+		goto umem_kfree;
+	}
+
+	page_list = kmalloc_array(npages, sizeof(*page_list), GFP_KERNEL);
 	if (!page_list) {
 		ret = -ENOMEM;
 		goto umem_kfree;
@@ -152,12 +156,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 	if (!vma_list)
 		umem->hugetlb = 0;
 
-	npages = ib_umem_num_pages(umem);
-	if (npages == 0 || npages > UINT_MAX) {
-		ret = -EINVAL;
-		goto out;
-	}
-
 	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 
 	down_write(&mm->mmap_sem);
@@ -172,50 +170,48 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 
 	cur_base = addr & PAGE_MASK;
 
-	ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
-	if (ret)
-		goto vma;
-
 	if (!umem->writable)
 		gup_flags |= FOLL_FORCE;
 
-	sg_list_start = umem->sg_head.sgl;
-
 	while (npages) {
 		down_read(&mm->mmap_sem);
 		ret = get_user_pages_longterm(cur_base,
 				     min_t(unsigned long, npages,
 					   PAGE_SIZE / sizeof (struct page *)),
-				     gup_flags, page_list, vma_list);
+				     gup_flags, page_list + umem->npages, vma_list);
 		if (ret < 0) {
 			up_read(&mm->mmap_sem);
-			goto umem_release;
+			release_pages(page_list, umem->npages);
+			goto vma;
 		}
 
 		umem->npages += ret;
 		cur_base += ret * PAGE_SIZE;
 		npages   -= ret;
 
-		/* Continue to hold the mmap_sem as vma_list access
-		 * needs to be protected.
-		 */
-		for_each_sg(sg_list_start, sg, ret, i) {
+		for(i = 0; i < ret && umem->hugetlb; i++) {
 			if (vma_list && !is_vm_hugetlb_page(vma_list[i]))
 				umem->hugetlb = 0;
-
-			sg_set_page(sg, page_list[i], PAGE_SIZE, 0);
 		}
 		up_read(&mm->mmap_sem);
+	}
 
-		/* preparing for next loop */
-		sg_list_start = sg;
+	ret = sg_alloc_table_from_pages(&umem->sg_head,
+					page_list,
+					umem->npages,
+					0,
+					umem->npages << PAGE_SHIFT,
+					GFP_KERNEL);
+	if (ret) {
+		release_pages(page_list, umem->npages);
+		goto vma;
 	}
 
 	umem->nmap = ib_dma_map_sg_attrs(context->device,
-				  umem->sg_head.sgl,
-				  umem->npages,
-				  DMA_BIDIRECTIONAL,
-				  dma_attrs);
+					 umem->sg_head.sgl,
+					 umem->sg_head.orig_nents,
+					 DMA_BIDIRECTIONAL,
+					 dma_attrs);
 
 	if (!umem->nmap) {
 		ret = -ENOMEM;
@@ -234,7 +230,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 out:
 	if (vma_list)
 		free_page((unsigned long) vma_list);
-	free_page((unsigned long) page_list);
+	kfree(page_list);
 umem_kfree:
 	if (ret) {
 		mmdrop(umem->owning_mm);
-- 
2.8.3




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux