Re: [PATCH v3 5/5] try_to_unmap_one: batched remove rmap, update folio refcount

haoxin <xhao@xxxxxxxxxxxxxxxxx> · Mon, 6 Mar 2023 20:39:52 +0800

在 2023/3/6 下午5:22, Yin Fengwei 写道:
If unmap one page fails, or the vma walk will skip next pte,
or the vma walk will end on next pte, batched remove map,
update folio refcount.

Signed-off-by: Yin Fengwei <fengwei.yin@xxxxxxxxx>
---
  include/linux/rmap.h |  1 +
  mm/page_vma_mapped.c | 30 +++++++++++++++++++++++++++
  mm/rmap.c            | 48 ++++++++++++++++++++++++++++++++++----------
  3 files changed, 68 insertions(+), 11 deletions(-)

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index d2569b42e21a..18193d1d5a8e 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -424,6 +424,7 @@ static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw)
  }
  
  bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw);
+bool pvmw_walk_skip_or_end_on_next(struct page_vma_mapped_walk *pvmw);
  
  /*
   * Used by swapoff to help locate where page is expected in vma.
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 4e448cfbc6ef..19e997dfb5c6 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -291,6 +291,36 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
  	return false;
  }
  
+/**
+ * pvmw_walk_skip_or_end_on_next - check if next pte will be skipped or
+ *                                 end the walk
+ * @pvmw: pointer to struct page_vma_mapped_walk.
+ *
+ * This function can only be called with correct pte lock hold
+ */
+bool pvmw_walk_skip_or_end_on_next(struct page_vma_mapped_walk *pvmw)
+{
+       unsigned long address = pvmw->address + PAGE_SIZE;
+
+       if (address >= vma_address_end(pvmw))
+               return true;

If vma_address_end is exactly equal to next address(pvmw->address + 
PAGE_SIZE) , does this mean that we are ignored to unmap the last page 
here ? so

there can just use ' > '      ' if (address > vma_address_end(pvmw))' .

I may have misunderstood, please correct me.

+
+       if ((address & (PMD_SIZE - PAGE_SIZE)) == 0)
+               return true;
+
+       if (pte_none(*pvmw->pte))
+               return true;
+
+       pvmw->pte++;
+       if (!check_pte(pvmw)) {
+               pvmw->pte--;
+               return true;
+       }
+       pvmw->pte--;
+
+       return false;
+}
+
  /**
   * page_mapped_in_vma - check whether a page is really mapped in a VMA
   * @page: the page to test
diff --git a/mm/rmap.c b/mm/rmap.c
index bb3fcb8df579..a64e9cbb52dd 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1741,6 +1741,26 @@ static bool try_to_unmap_one_page(struct folio *folio,
  	return false;
  }
  
+static void folio_remove_rmap_and_update_count(struct folio *folio,
+		struct page *start, struct vm_area_struct *vma, int count)
+{
+	if (count == 0)
+		return;
+
+	/*
+	 * No need to call mmu_notifier_invalidate_range() it has be
+	 * done above for all cases requiring it to happen under page
+	 * table lock before mmu_notifier_invalidate_range_end()
+	 *
+	 * See Documentation/mm/mmu_notifier.rst
+	 */
+	folio_remove_rmap_range(folio, start, count, vma,
+					folio_test_hugetlb(folio));
+	if (vma->vm_flags & VM_LOCKED)
+		mlock_drain_local();
+	folio_ref_sub(folio, count);
+}
+
  /*
   * @arg: enum ttu_flags will be passed to this argument
   */
@@ -1748,10 +1768,11 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
  		     unsigned long address, void *arg)
  {
  	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
-	struct page *subpage;
+	struct page *start = NULL;
  	bool ret = true;
  	struct mmu_notifier_range range;
  	enum ttu_flags flags = (enum ttu_flags)(long)arg;
+	int count = 0;
  
  	/*
  	 * When racing against e.g. zap_pte_range() on another cpu,
@@ -1812,26 +1833,31 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
  			break;
  		}
  
-		subpage = folio_page(folio,
+		if (!start)
+			start = folio_page(folio,
  					pte_pfn(*pvmw.pte) - folio_pfn(folio));
  		ret = try_to_unmap_one_page(folio, vma,
  						range, pvmw, address, flags);
  		if (!ret) {
+			folio_remove_rmap_and_update_count(folio,
+							start, vma, count);
  			page_vma_mapped_walk_done(&pvmw);
  			break;
  		}
+		count++;
  
  		/*
-		 * No need to call mmu_notifier_invalidate_range() it has be
-		 * done above for all cases requiring it to happen under page
-		 * table lock before mmu_notifier_invalidate_range_end()
-		 *
-		 * See Documentation/mm/mmu_notifier.rst
+		 * If next pte will be skipped in page_vma_mapped_walk() or
+		 * the walk will end at it, batched remove rmap and update
+		 * page refcount. We can't do it after page_vma_mapped_walk()
+		 * return false because the pte lock will not be hold.
  		 */
-		page_remove_rmap(subpage, vma, false);
-		if (vma->vm_flags & VM_LOCKED)
-			mlock_drain_local();
-		folio_put(folio);
+		if (pvmw_walk_skip_or_end_on_next(&pvmw)) {
+			folio_remove_rmap_and_update_count(folio,
+							start, vma, count);
+			count = 0;
+			start = NULL;
+		}
  	}
  
  	mmu_notifier_invalidate_range_end(&range);