[PATCH v1 04/11] mm/migrate: convert add_page_for_migration() from follow_page() to folio_walk

David Hildenbrand <david@xxxxxxxxxx> · Fri, 2 Aug 2024 17:55:17 +0200

Let's use folio_walk instead, so we can avoid taking a folio reference
when we won't even be trying to migrate the folio and to get rid of
another follow_page()/FOLL_DUMP user. Use FW_ZEROPAGE so we can return
"-EFAULT" for it as documented.

We now perform the folio_likely_mapped_shared() check under PTL, which
is what we want: relying on the mapcount and friends after dropping the
PTL does not make too much sense, as the page can get unmapped
concurrently from this process.

Further, we perform the folio isolation under PTL, similar to how we
handle it for MADV_PAGEOUT.

The possible return values for follow_page() were confusing, especially
with FOLL_DUMP set. We'll handle it like documented in the man page:
 * -EFAULT: This is a zero page or the memory area is not mapped by the
    process.
 * -ENOENT: The page is not present.

We'll keep setting -ENOENT for ZONE_DEVICE. Maybe not the right thing to
do, but it likely doesn't really matter (just like for weird devmap,
whereby we fake "not present").

The other errros are left as is, and match the documentation in the man
page.

While at it, rename add_page_for_migration() to
add_folio_for_migration().

We'll lose the "secretmem" check, but that shouldn't really matter
because these folios cannot ever be migrated. Should vma_migratable()
refuse these VMAs? Maybe.

Signed-off-by: David Hildenbrand <david@xxxxxxxxxx>
---
 mm/migrate.c | 100 +++++++++++++++++++++++----------------------------
 1 file changed, 45 insertions(+), 55 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index b5365a434ba9..e1383d9cc944 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2112,76 +2112,66 @@ static int do_move_pages_to_node(struct list_head *pagelist, int node)
 	return err;
 }
 
+static int __add_folio_for_migration(struct folio *folio, int node,
+		struct list_head *pagelist, bool migrate_all)
+{
+	if (is_zero_folio(folio) || is_huge_zero_folio(folio))
+		return -EFAULT;
+
+	if (folio_is_zone_device(folio))
+		return -ENOENT;
+
+	if (folio_nid(folio) == node)
+		return 0;
+
+	if (folio_likely_mapped_shared(folio) && !migrate_all)
+		return -EACCES;
+
+	if (folio_test_hugetlb(folio)) {
+		if (isolate_hugetlb(folio, pagelist))
+			return 1;
+	} else if (folio_isolate_lru(folio)) {
+		list_add_tail(&folio->lru, pagelist);
+		node_stat_mod_folio(folio,
+			NR_ISOLATED_ANON + folio_is_file_lru(folio),
+			folio_nr_pages(folio));
+		return 1;
+	}
+	return -EBUSY;
+}
+
 /*
- * Resolves the given address to a struct page, isolates it from the LRU and
+ * Resolves the given address to a struct folio, isolates it from the LRU and
  * puts it to the given pagelist.
  * Returns:
- *     errno - if the page cannot be found/isolated
+ *     errno - if the folio cannot be found/isolated
  *     0 - when it doesn't have to be migrated because it is already on the
  *         target node
  *     1 - when it has been queued
  */
-static int add_page_for_migration(struct mm_struct *mm, const void __user *p,
+static int add_folio_for_migration(struct mm_struct *mm, const void __user *p,
 		int node, struct list_head *pagelist, bool migrate_all)
 {
 	struct vm_area_struct *vma;
-	unsigned long addr;
-	struct page *page;
+	struct folio_walk fw;
 	struct folio *folio;
-	int err;
+	unsigned long addr;
+	int err = -EFAULT;
 
 	mmap_read_lock(mm);
 	addr = (unsigned long)untagged_addr_remote(mm, p);
 
-	err = -EFAULT;
 	vma = vma_lookup(mm, addr);
-	if (!vma || !vma_migratable(vma))
-		goto out;
-
-	/* FOLL_DUMP to ignore special (like zero) pages */
-	page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP);
-
-	err = PTR_ERR(page);
-	if (IS_ERR(page))
-		goto out;
-
-	err = -ENOENT;
-	if (!page)
-		goto out;
-
-	folio = page_folio(page);
-	if (folio_is_zone_device(folio))
-		goto out_putfolio;
-
-	err = 0;
-	if (folio_nid(folio) == node)
-		goto out_putfolio;
-
-	err = -EACCES;
-	if (folio_likely_mapped_shared(folio) && !migrate_all)
-		goto out_putfolio;
-
-	err = -EBUSY;
-	if (folio_test_hugetlb(folio)) {
-		if (isolate_hugetlb(folio, pagelist))
-			err = 1;
-	} else {
-		if (!folio_isolate_lru(folio))
-			goto out_putfolio;
-
-		err = 1;
-		list_add_tail(&folio->lru, pagelist);
-		node_stat_mod_folio(folio,
-			NR_ISOLATED_ANON + folio_is_file_lru(folio),
-			folio_nr_pages(folio));
+	if (vma && vma_migratable(vma)) {
+		folio = folio_walk_start(&fw, vma, addr, FW_ZEROPAGE);
+		if (folio) {
+			err = __add_folio_for_migration(folio, node, pagelist,
+							migrate_all);
+			folio_walk_end(&fw, vma);
+		} else {
+			err = -ENOENT;
+		}
 	}
-out_putfolio:
-	/*
-	 * Either remove the duplicate refcount from folio_isolate_lru()
-	 * or drop the folio ref if it was not isolated.
-	 */
-	folio_put(folio);
-out:
 	mmap_read_unlock(mm);
 	return err;
 }
@@ -2275,8 +2265,8 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
 		 * Errors in the page lookup or isolation are not fatal and we simply
 		 * report them via status
 		 */
-		err = add_page_for_migration(mm, p, current_node, &pagelist,
-					     flags & MPOL_MF_MOVE_ALL);
+		err = add_folio_for_migration(mm, p, current_node, &pagelist,
+					      flags & MPOL_MF_MOVE_ALL);
 
 		if (err > 0) {
 			/* The page is successfully queued for migration */
-- 
2.45.2