[PATCH v3 2/5] rmap: move page unmap operation to dedicated function

Yin Fengwei <fengwei.yin@xxxxxxxxx> · Mon, 6 Mar 2023 17:22:56 +0800

No functional change. Just code reorganized.

Signed-off-by: Yin Fengwei <fengwei.yin@xxxxxxxxx>
---
 mm/rmap.c | 369 ++++++++++++++++++++++++++++--------------------------
 1 file changed, 194 insertions(+), 175 deletions(-)

diff --git a/mm/rmap.c b/mm/rmap.c
index 508d141dacc5..013643122d0c 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1538,17 +1538,204 @@ static bool try_to_unmap_one_hugetlb(struct folio *folio,
 	return ret;
 }
 
+static bool try_to_unmap_one_page(struct folio *folio,
+		struct vm_area_struct *vma, struct mmu_notifier_range range,
+		struct page_vma_mapped_walk pvmw, unsigned long address,
+		enum ttu_flags flags)
+{
+	bool anon_exclusive, ret = true;
+	struct page *subpage;
+	struct mm_struct *mm = vma->vm_mm;
+	pte_t pteval;
+
+	subpage = folio_page(folio,
+			pte_pfn(*pvmw.pte) - folio_pfn(folio));
+	anon_exclusive = folio_test_anon(folio) &&
+		PageAnonExclusive(subpage);
+
+	flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
+	/* Nuke the page table entry. */
+	if (should_defer_flush(mm, flags)) {
+		/*
+		 * We clear the PTE but do not flush so potentially
+		 * a remote CPU could still be writing to the folio.
+		 * If the entry was previously clean then the
+		 * architecture must guarantee that a clear->dirty
+		 * transition on a cached TLB entry is written through
+		 * and traps if the PTE is unmapped.
+		 */
+		pteval = ptep_get_and_clear(mm, address, pvmw.pte);
+
+		set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
+	} else {
+		pteval = ptep_clear_flush(vma, address, pvmw.pte);
+	}
+
+	/*
+	 * Now the pte is cleared. If this pte was uffd-wp armed,
+	 * we may want to replace a none pte with a marker pte if
+	 * it's file-backed, so we don't lose the tracking info.
+	 */
+	pte_install_uffd_wp_if_needed(vma, address, pvmw.pte, pteval);
+
+	/* Set the dirty flag on the folio now the pte is gone. */
+	if (pte_dirty(pteval))
+		folio_mark_dirty(folio);
+
+	/* Update high watermark before we lower rss */
+	update_hiwater_rss(mm);
+
+	if (PageHWPoison(subpage) && !(flags & TTU_HWPOISON)) {
+		pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
+		dec_mm_counter(mm, mm_counter(&folio->page));
+		set_pte_at(mm, address, pvmw.pte, pteval);
+	} else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
+		/*
+		 * The guest indicated that the page content is of no
+		 * interest anymore. Simply discard the pte, vmscan
+		 * will take care of the rest.
+		 * A future reference will then fault in a new zero
+		 * page. When userfaultfd is active, we must not drop
+		 * this page though, as its main user (postcopy
+		 * migration) will not expect userfaults on already
+		 * copied pages.
+		 */
+		dec_mm_counter(mm, mm_counter(&folio->page));
+		/* We have to invalidate as we cleared the pte */
+		mmu_notifier_invalidate_range(mm, address,
+				address + PAGE_SIZE);
+	} else if (folio_test_anon(folio)) {
+		swp_entry_t entry = { .val = page_private(subpage) };
+		pte_t swp_pte;
+		/*
+		 * Store the swap location in the pte.
+		 * See handle_pte_fault() ...
+		 */
+		if (unlikely(folio_test_swapbacked(folio) !=
+					folio_test_swapcache(folio))) {
+			WARN_ON_ONCE(1);
+			ret = false;
+			/* We have to invalidate as we cleared the pte */
+			mmu_notifier_invalidate_range(mm, address,
+					address + PAGE_SIZE);
+			page_vma_mapped_walk_done(&pvmw);
+			goto discard;
+		}
+
+		/* MADV_FREE page check */
+		if (!folio_test_swapbacked(folio)) {
+			int ref_count, map_count;
+
+			/*
+			 * Synchronize with gup_pte_range():
+			 * - clear PTE; barrier; read refcount
+			 * - inc refcount; barrier; read PTE
+			 */
+			smp_mb();
+
+			ref_count = folio_ref_count(folio);
+			map_count = folio_mapcount(folio);
+
+			/*
+			 * Order reads for page refcount and dirty flag
+			 * (see comments in __remove_mapping()).
+			 */
+			smp_rmb();
+
+			/*
+			 * The only page refs must be one from isolation
+			 * plus the rmap(s) (dropped by discard:).
+			 */
+			if (ref_count == 1 + map_count &&
+					!folio_test_dirty(folio)) {
+				/* Invalidate as we cleared the pte */
+				mmu_notifier_invalidate_range(mm,
+						address, address + PAGE_SIZE);
+				dec_mm_counter(mm, MM_ANONPAGES);
+				goto discard;
+			}
+
+			/*
+			 * If the folio was redirtied, it cannot be
+			 * discarded. Remap the page to page table.
+			 */
+			set_pte_at(mm, address, pvmw.pte, pteval);
+			folio_set_swapbacked(folio);
+			ret = false;
+			page_vma_mapped_walk_done(&pvmw);
+			goto discard;
+		}
+
+		if (swap_duplicate(entry) < 0) {
+			set_pte_at(mm, address, pvmw.pte, pteval);
+			ret = false;
+			page_vma_mapped_walk_done(&pvmw);
+			goto discard;
+		}
+		if (arch_unmap_one(mm, vma, address, pteval) < 0) {
+			swap_free(entry);
+			set_pte_at(mm, address, pvmw.pte, pteval);
+			ret = false;
+			page_vma_mapped_walk_done(&pvmw);
+			goto discard;
+		}
+
+		/* See page_try_share_anon_rmap(): clear PTE first. */
+		if (anon_exclusive &&
+				page_try_share_anon_rmap(subpage)) {
+			swap_free(entry);
+			set_pte_at(mm, address, pvmw.pte, pteval);
+			ret = false;
+			page_vma_mapped_walk_done(&pvmw);
+			goto discard;
+		}
+		if (list_empty(&mm->mmlist)) {
+			spin_lock(&mmlist_lock);
+			if (list_empty(&mm->mmlist))
+				list_add(&mm->mmlist, &init_mm.mmlist);
+			spin_unlock(&mmlist_lock);
+		}
+		dec_mm_counter(mm, MM_ANONPAGES);
+		inc_mm_counter(mm, MM_SWAPENTS);
+		swp_pte = swp_entry_to_pte(entry);
+		if (anon_exclusive)
+			swp_pte = pte_swp_mkexclusive(swp_pte);
+		if (pte_soft_dirty(pteval))
+			swp_pte = pte_swp_mksoft_dirty(swp_pte);
+		if (pte_uffd_wp(pteval))
+			swp_pte = pte_swp_mkuffd_wp(swp_pte);
+		set_pte_at(mm, address, pvmw.pte, swp_pte);
+		/* Invalidate as we cleared the pte */
+		mmu_notifier_invalidate_range(mm, address,
+				address + PAGE_SIZE);
+	} else {
+		/*
+		 * This is a locked file-backed folio,
+		 * so it cannot be removed from the page
+		 * cache and replaced by a new folio before
+		 * mmu_notifier_invalidate_range_end, so no
+		 * concurrent thread might update its page table
+		 * to point at a new folio while a device is
+		 * still using this folio.
+		 *
+		 * See Documentation/mm/mmu_notifier.rst
+		 */
+		dec_mm_counter(mm, mm_counter_file(&folio->page));
+	}
+
+discard:
+	return ret;
+}
+
 /*
  * @arg: enum ttu_flags will be passed to this argument
  */
 static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
 		     unsigned long address, void *arg)
 {
-	struct mm_struct *mm = vma->vm_mm;
 	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
-	pte_t pteval;
 	struct page *subpage;
-	bool anon_exclusive, ret = true;
+	bool ret = true;
 	struct mmu_notifier_range range;
 	enum ttu_flags flags = (enum ttu_flags)(long)arg;
 
@@ -1613,179 +1800,11 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
 
 		subpage = folio_page(folio,
 					pte_pfn(*pvmw.pte) - folio_pfn(folio));
-		anon_exclusive = folio_test_anon(folio) &&
-				 PageAnonExclusive(subpage);
-
-		flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
-		/* Nuke the page table entry. */
-		if (should_defer_flush(mm, flags)) {
-			/*
-			 * We clear the PTE but do not flush so potentially
-			 * a remote CPU could still be writing to the folio.
-			 * If the entry was previously clean then the
-			 * architecture must guarantee that a clear->dirty
-			 * transition on a cached TLB entry is written through
-			 * and traps if the PTE is unmapped.
-			 */
-			pteval = ptep_get_and_clear(mm, address, pvmw.pte);
-
-			set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
-		} else {
-			pteval = ptep_clear_flush(vma, address, pvmw.pte);
-		}
-
-		/*
-		 * Now the pte is cleared. If this pte was uffd-wp armed,
-		 * we may want to replace a none pte with a marker pte if
-		 * it's file-backed, so we don't lose the tracking info.
-		 */
-		pte_install_uffd_wp_if_needed(vma, address, pvmw.pte, pteval);
-
-		/* Set the dirty flag on the folio now the pte is gone. */
-		if (pte_dirty(pteval))
-			folio_mark_dirty(folio);
-
-		/* Update high watermark before we lower rss */
-		update_hiwater_rss(mm);
-
-		if (PageHWPoison(subpage) && (flags & TTU_HWPOISON)) {
-			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
-			dec_mm_counter(mm, mm_counter(&folio->page));
-			set_pte_at(mm, address, pvmw.pte, pteval);
-		} else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
-			/*
-			 * The guest indicated that the page content is of no
-			 * interest anymore. Simply discard the pte, vmscan
-			 * will take care of the rest.
-			 * A future reference will then fault in a new zero
-			 * page. When userfaultfd is active, we must not drop
-			 * this page though, as its main user (postcopy
-			 * migration) will not expect userfaults on already
-			 * copied pages.
-			 */
-			dec_mm_counter(mm, mm_counter(&folio->page));
-			/* We have to invalidate as we cleared the pte */
-			mmu_notifier_invalidate_range(mm, address,
-						      address + PAGE_SIZE);
-		} else if (folio_test_anon(folio)) {
-			swp_entry_t entry = { .val = page_private(subpage) };
-			pte_t swp_pte;
-			/*
-			 * Store the swap location in the pte.
-			 * See handle_pte_fault() ...
-			 */
-			if (unlikely(folio_test_swapbacked(folio) !=
-					folio_test_swapcache(folio))) {
-				WARN_ON_ONCE(1);
-				ret = false;
-				/* We have to invalidate as we cleared the pte */
-				mmu_notifier_invalidate_range(mm, address,
-							address + PAGE_SIZE);
-				page_vma_mapped_walk_done(&pvmw);
-				break;
-			}
-
-			/* MADV_FREE page check */
-			if (!folio_test_swapbacked(folio)) {
-				int ref_count, map_count;
-
-				/*
-				 * Synchronize with gup_pte_range():
-				 * - clear PTE; barrier; read refcount
-				 * - inc refcount; barrier; read PTE
-				 */
-				smp_mb();
-
-				ref_count = folio_ref_count(folio);
-				map_count = folio_mapcount(folio);
-
-				/*
-				 * Order reads for page refcount and dirty flag
-				 * (see comments in __remove_mapping()).
-				 */
-				smp_rmb();
-
-				/*
-				 * The only page refs must be one from isolation
-				 * plus the rmap(s) (dropped by discard:).
-				 */
-				if (ref_count == 1 + map_count &&
-				    !folio_test_dirty(folio)) {
-					/* Invalidate as we cleared the pte */
-					mmu_notifier_invalidate_range(mm,
-						address, address + PAGE_SIZE);
-					dec_mm_counter(mm, MM_ANONPAGES);
-					goto discard;
-				}
-
-				/*
-				 * If the folio was redirtied, it cannot be
-				 * discarded. Remap the page to page table.
-				 */
-				set_pte_at(mm, address, pvmw.pte, pteval);
-				folio_set_swapbacked(folio);
-				ret = false;
-				page_vma_mapped_walk_done(&pvmw);
-				break;
-			}
-
-			if (swap_duplicate(entry) < 0) {
-				set_pte_at(mm, address, pvmw.pte, pteval);
-				ret = false;
-				page_vma_mapped_walk_done(&pvmw);
-				break;
-			}
-			if (arch_unmap_one(mm, vma, address, pteval) < 0) {
-				swap_free(entry);
-				set_pte_at(mm, address, pvmw.pte, pteval);
-				ret = false;
-				page_vma_mapped_walk_done(&pvmw);
-				break;
-			}
+		ret = try_to_unmap_one_page(folio, vma,
+						range, pvmw, address, flags);
+		if (!ret)
+			break;
 
-			/* See page_try_share_anon_rmap(): clear PTE first. */
-			if (anon_exclusive &&
-			    page_try_share_anon_rmap(subpage)) {
-				swap_free(entry);
-				set_pte_at(mm, address, pvmw.pte, pteval);
-				ret = false;
-				page_vma_mapped_walk_done(&pvmw);
-				break;
-			}
-			if (list_empty(&mm->mmlist)) {
-				spin_lock(&mmlist_lock);
-				if (list_empty(&mm->mmlist))
-					list_add(&mm->mmlist, &init_mm.mmlist);
-				spin_unlock(&mmlist_lock);
-			}
-			dec_mm_counter(mm, MM_ANONPAGES);
-			inc_mm_counter(mm, MM_SWAPENTS);
-			swp_pte = swp_entry_to_pte(entry);
-			if (anon_exclusive)
-				swp_pte = pte_swp_mkexclusive(swp_pte);
-			if (pte_soft_dirty(pteval))
-				swp_pte = pte_swp_mksoft_dirty(swp_pte);
-			if (pte_uffd_wp(pteval))
-				swp_pte = pte_swp_mkuffd_wp(swp_pte);
-			set_pte_at(mm, address, pvmw.pte, swp_pte);
-			/* Invalidate as we cleared the pte */
-			mmu_notifier_invalidate_range(mm, address,
-						      address + PAGE_SIZE);
-		} else {
-			/*
-			 * This is a locked file-backed folio,
-			 * so it cannot be removed from the page
-			 * cache and replaced by a new folio before
-			 * mmu_notifier_invalidate_range_end, so no
-			 * concurrent thread might update its page table
-			 * to point at a new folio while a device is
-			 * still using this folio.
-			 *
-			 * See Documentation/mm/mmu_notifier.rst
-			 */
-			dec_mm_counter(mm, mm_counter_file(&folio->page));
-		}
-discard:
 		/*
 		 * No need to call mmu_notifier_invalidate_range() it has be
 		 * done above for all cases requiring it to happen under page
-- 
2.30.2