Re: [PATCH v7 3/4] mm/rmap: integrate PMD-mapped folio splitting into pagewalk loop

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 13.06.24 10:45, David Hildenbrand wrote:
On 13.06.24 10:34, David Hildenbrand wrote:
On 10.06.24 14:06, Lance Yang wrote:
In preparation for supporting try_to_unmap_one() to unmap PMD-mapped
folios, start the pagewalk first, then call split_huge_pmd_address() to
split the folio.

Suggested-by: David Hildenbrand <david@xxxxxxxxxx>
Suggested-by: Baolin Wang <baolin.wang@xxxxxxxxxxxxxxxxx>
Signed-off-by: Lance Yang <ioworker0@xxxxxxxxx>
---
    include/linux/huge_mm.h |  6 ++++++
    mm/huge_memory.c        | 42 +++++++++++++++++++++--------------------
    mm/rmap.c               | 21 +++++++++++++++------
    3 files changed, 43 insertions(+), 26 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 088d66a54643..4670c6ee118b 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -415,6 +415,9 @@ static inline bool thp_migration_supported(void)
    	return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION);
    }
+void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
+			   pmd_t *pmd, bool freeze, struct folio *folio);
+
    #else /* CONFIG_TRANSPARENT_HUGEPAGE */
static inline bool folio_test_pmd_mappable(struct folio *folio)
@@ -477,6 +480,9 @@ static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
    		unsigned long address, bool freeze, struct folio *folio) {}
    static inline void split_huge_pmd_address(struct vm_area_struct *vma,
    		unsigned long address, bool freeze, struct folio *folio) {}
+static inline void split_huge_pmd_locked(struct vm_area_struct *vma,
+					 unsigned long address, pmd_t *pmd,
+					 bool freeze, struct folio *folio) {}
#define split_huge_pud(__vma, __pmd, __address) \
    	do { } while (0)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index e6d26c2eb670..d2697cc8f9d4 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2581,6 +2581,27 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
    	pmd_populate(mm, pmd, pgtable);
    }
+void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
+			   pmd_t *pmd, bool freeze, struct folio *folio)
+{
+	VM_WARN_ON_ONCE(folio && !folio_test_pmd_mappable(folio));
+	VM_WARN_ON_ONCE(!IS_ALIGNED(address, HPAGE_PMD_SIZE));
+	VM_WARN_ON_ONCE(folio && !folio_test_locked(folio));
+	VM_BUG_ON(freeze && !folio);

Curious: could we actually end up here without a folio right now? That
would mean, that try_to_unmap_one() would be called with folio==NULL.

+
+	/*
+	 * When the caller requests to set up a migration entry, we
+	 * require a folio to check the PMD against. Otherwise, there
+	 * is a risk of replacing the wrong folio.
+	 */
+	if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) ||
+	    is_pmd_migration_entry(*pmd)) {
+		if (folio && folio != pmd_folio(*pmd))
+			return;
+		__split_huge_pmd_locked(vma, pmd, address, freeze);
+	}
+}
+
    void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
    		unsigned long address, bool freeze, struct folio *folio)
    {
@@ -2592,26 +2613,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
    				(address & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE);
    	mmu_notifier_invalidate_range_start(&range);
    	ptl = pmd_lock(vma->vm_mm, pmd);
-
-	/*
-	 * If caller asks to setup a migration entry, we need a folio to check
-	 * pmd against. Otherwise we can end up replacing wrong folio.
-	 */
-	VM_BUG_ON(freeze && !folio);
-	VM_WARN_ON_ONCE(folio && !folio_test_locked(folio));
-
-	if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) ||
-	    is_pmd_migration_entry(*pmd)) {
-		/*
-		 * It's safe to call pmd_page when folio is set because it's
-		 * guaranteed that pmd is present.
-		 */
-		if (folio && folio != pmd_folio(*pmd))
-			goto out;
-		__split_huge_pmd_locked(vma, pmd, range.start, freeze);
-	}
-
-out:
+	split_huge_pmd_locked(vma, range.start, pmd, freeze, folio);
    	spin_unlock(ptl);
    	mmu_notifier_invalidate_range_end(&range);
    }
diff --git a/mm/rmap.c b/mm/rmap.c
index ddffa30c79fb..b77f88695588 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1640,9 +1640,6 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
    	if (flags & TTU_SYNC)
    		pvmw.flags = PVMW_SYNC;
- if (flags & TTU_SPLIT_HUGE_PMD)
-		split_huge_pmd_address(vma, address, false, folio);
-
    	/*
    	 * For THP, we have to assume the worse case ie pmd for invalidation.
    	 * For hugetlb, it could be much worse if we need to do pud
@@ -1668,9 +1665,6 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
    	mmu_notifier_invalidate_range_start(&range);
while (page_vma_mapped_walk(&pvmw)) {
-		/* Unexpected PMD-mapped THP? */
-		VM_BUG_ON_FOLIO(!pvmw.pte, folio);
-
    		/*
    		 * If the folio is in an mlock()d vma, we must not swap it out.
    		 */
@@ -1682,6 +1676,21 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
    			goto walk_done_err;
    		}
+ if (!pvmw.pte && (flags & TTU_SPLIT_HUGE_PMD)) {
+			/*
+			 * We temporarily have to drop the PTL and start once
+			 * again from that now-PTE-mapped page table.
+			 */
+			split_huge_pmd_locked(vma, pvmw.address, pvmw.pmd,
+					      false, folio);
+			flags &= ~TTU_SPLIT_HUGE_PMD;
+			page_vma_mapped_walk_restart(&pvmw);

If, for some reason, split_huge_pmd_locked() would fail, we would keep
looping and never hit the VM_BUG_ON_FOLIO() below. Maybe we'd want to
let split_huge_pmd_locked() return whether splitting succeeded, and
handle that case differently?

I assume it could fail if we race with concurrent split? Or isn't that
possible?


(I assume we hold the PTL, so such races should not be possible)

--
Cheers,

David / dhildenb





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux