[PATCH v1 2/3] udmabuf: Add support for page migration out of movable zone or CMA

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Since udmabuf could potentially pin pages that may reside in the
movable zone or CMA and thereby break features such as memory
hotunplug, it makes sense to migrate the pages out of these
areas. In order to accomplish this, we note the mapping and the
index of each page and then call check_and_migrate_movable_pages().

As check_and_migrate_movable_pages() unpins all the pages (and
also replaces the migrated pages in the mapping) upon successful
migration, we need to retrieve all the pages from their associated
mapping using the index we noted down earlier and re-pin them again.

Cc: David Hildenbrand <david@xxxxxxxxxx>
Cc: Daniel Vetter <daniel.vetter@xxxxxxxx>
Cc: Mike Kravetz <mike.kravetz@xxxxxxxxxx>
Cc: Hugh Dickins <hughd@xxxxxxxxxx>
Cc: Peter Xu <peterx@xxxxxxxxxx>
Cc: Jason Gunthorpe <jgg@xxxxxxxxxx>
Cc: Gerd Hoffmann <kraxel@xxxxxxxxxx>
Cc: Dongwon Kim <dongwon.kim@xxxxxxxxx>
Cc: Junxiao Chang <junxiao.chang@xxxxxxxxx>
Suggested-by: David Hildenbrand <david@xxxxxxxxxx>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@xxxxxxxxx>
---
 drivers/dma-buf/udmabuf.c | 106 +++++++++++++++++++++++++++++++++++---
 1 file changed, 100 insertions(+), 6 deletions(-)

diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
index 1a41c4a069ea..63912c73d122 100644
--- a/drivers/dma-buf/udmabuf.c
+++ b/drivers/dma-buf/udmabuf.c
@@ -30,6 +30,12 @@ struct udmabuf {
 	struct sg_table *sg;
 	struct miscdevice *device;
 	pgoff_t *subpgoff;
+	struct udmabuf_backing_info *backing;
+};
+
+struct udmabuf_backing_info {
+	struct address_space *mapping;
+	pgoff_t mapidx;
 };
 
 static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
@@ -156,8 +162,10 @@ static void release_udmabuf(struct dma_buf *buf)
 		put_sg_table(dev, ubuf->sg, DMA_BIDIRECTIONAL);
 
 	for (pg = 0; pg < ubuf->pagecount; pg++)
-		put_page(ubuf->pages[pg]);
+		unpin_user_page(ubuf->pages[pg]);
+
 	kfree(ubuf->subpgoff);
+	kfree(ubuf->backing);
 	kfree(ubuf->pages);
 	kfree(ubuf);
 }
@@ -211,6 +219,76 @@ static const struct dma_buf_ops udmabuf_ops = {
 #define SEALS_WANTED (F_SEAL_SHRINK)
 #define SEALS_DENIED (F_SEAL_WRITE)
 
+static int udmabuf_pin_pages(struct udmabuf *ubuf)
+{
+	struct address_space *mapping;
+	struct folio *folio;
+	struct page *page;
+	pgoff_t pg, mapidx;
+	int ret;
+
+	for (pg = 0; pg < ubuf->pagecount; pg++) {
+		mapping = ubuf->backing[pg].mapping;
+		mapidx = ubuf->backing[pg].mapidx;
+
+		if (!ubuf->pages[pg]) {
+			page = find_get_page_flags(mapping, mapidx,
+						   FGP_ACCESSED);
+			if (!page) {
+				if (!shmem_mapping(mapping)) {
+					ret = -EINVAL;
+					goto err;
+				}
+
+				page = shmem_read_mapping_page(mapping,
+							       mapidx);
+				if (IS_ERR(page)) {
+					ret = PTR_ERR(page);
+					goto err;
+				}
+			}
+			ubuf->pages[pg] = page;
+		}
+
+		folio = page_folio(ubuf->pages[pg]);
+		if (folio_test_large(folio))
+			atomic_add(1, &folio->_pincount);
+		else
+			folio_ref_add(folio, GUP_PIN_COUNTING_BIAS);
+
+		/* Since we are doing the equivalent of FOLL_PIN above, we can
+		 * go ahead and release our (udmabuf) reference on the pages.
+		 * Otherwise, migrate_pages() will fail as it doesn't like the
+		 * extra reference.
+		 */
+		put_page(ubuf->pages[pg]);
+	}
+	return 0;
+
+err:
+	while (pg > 0 && ubuf->pages[--pg]) {
+		unpin_user_page(ubuf->pages[pg]);
+		ubuf->pages[pg] = NULL;
+	}
+	return ret;
+}
+
+static long udmabuf_migrate_pages(struct udmabuf *ubuf)
+{
+	long ret;
+
+	do {
+		ret = udmabuf_pin_pages(ubuf);
+		if (ret < 0)
+			break;
+
+		ret = check_and_migrate_movable_pages(ubuf->pagecount,
+						      ubuf->pages);
+	} while (ret == -EAGAIN);
+
+	return ret;
+}
+
 static long udmabuf_create(struct miscdevice *device,
 			   struct udmabuf_create_list *head,
 			   struct udmabuf_create_item *list)
@@ -224,7 +302,8 @@ static long udmabuf_create(struct miscdevice *device,
 	struct page *page, *hpage = NULL;
 	pgoff_t mapidx, chunkoff, maxchunks;
 	struct hstate *hpstate;
-	int seals, ret = -EINVAL;
+	long ret = -EINVAL;
+	int seals;
 	u32 i, flags;
 
 	ubuf = kzalloc(sizeof(*ubuf), GFP_KERNEL);
@@ -252,6 +331,13 @@ static long udmabuf_create(struct miscdevice *device,
 		goto err;
 	}
 
+	ubuf->backing = kmalloc_array(ubuf->pagecount, sizeof(*ubuf->backing),
+				      GFP_KERNEL);
+	if (!ubuf->backing) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
 	pgbuf = 0;
 	for (i = 0; i < head->count; i++) {
 		ret = -EBADFD;
@@ -298,7 +384,8 @@ static long udmabuf_create(struct miscdevice *device,
 				}
 				get_page(hpage);
 				ubuf->pages[pgbuf] = hpage;
-				ubuf->subpgoff[pgbuf++] = chunkoff << PAGE_SHIFT;
+				ubuf->subpgoff[pgbuf] = chunkoff << PAGE_SHIFT;
+				ubuf->backing[pgbuf].mapidx = mapidx;
 				if (++chunkoff == maxchunks) {
 					put_page(hpage);
 					hpage = NULL;
@@ -312,8 +399,10 @@ static long udmabuf_create(struct miscdevice *device,
 					ret = PTR_ERR(page);
 					goto err;
 				}
-				ubuf->pages[pgbuf++] = page;
+				ubuf->pages[pgbuf] = page;
+				ubuf->backing[pgbuf].mapidx = mapidx;
 			}
+			ubuf->backing[pgbuf++].mapping = mapping;
 		}
 		fput(memfd);
 		memfd = NULL;
@@ -323,6 +412,10 @@ static long udmabuf_create(struct miscdevice *device,
 		}
 	}
 
+	ret = udmabuf_migrate_pages(ubuf);
+	if (ret < 0)
+		goto err;
+
 	exp_info.ops  = &udmabuf_ops;
 	exp_info.size = ubuf->pagecount << PAGE_SHIFT;
 	exp_info.priv = ubuf;
@@ -341,11 +434,12 @@ static long udmabuf_create(struct miscdevice *device,
 	return dma_buf_fd(buf, flags);
 
 err:
-	while (pgbuf > 0)
-		put_page(ubuf->pages[--pgbuf]);
+	while (pgbuf > 0 && ubuf->pages[--pgbuf])
+		put_page(ubuf->pages[pgbuf]);
 	if (memfd)
 		fput(memfd);
 	kfree(ubuf->subpgoff);
+	kfree(ubuf->backing);
 	kfree(ubuf->pages);
 	kfree(ubuf);
 	return ret;
-- 
2.39.2




[Index of Archives]     [Linux DRI Users]     [Linux Intel Graphics]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux