On 8 May 2024, at 1:43, Lance Yang wrote: > On Tue, May 7, 2024 at 11:26 PM Zi Yan <ziy@xxxxxxxxxx> wrote: >> >> On 1 May 2024, at 0:26, Lance Yang wrote: >> >>> In preparation for supporting try_to_unmap_one() to unmap PMD-mapped >>> folios, start the pagewalk first, then call split_huge_pmd_address() >>> to split the folio. >>> >>> Suggested-by: David Hildenbrand <david@xxxxxxxxxx> >>> Signed-off-by: Lance Yang <ioworker0@xxxxxxxxx> >>> --- >>> include/linux/huge_mm.h | 20 ++++++++++++++++++++ >>> mm/huge_memory.c | 42 +++++++++++++++++++++-------------------- >>> mm/rmap.c | 24 +++++++++++++++++------ >>> 3 files changed, 60 insertions(+), 26 deletions(-) >>> >>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h >>> index c8d3ec116e29..38c4b5537715 100644 >>> --- a/include/linux/huge_mm.h >>> +++ b/include/linux/huge_mm.h >>> @@ -409,6 +409,20 @@ static inline bool thp_migration_supported(void) >>> return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION); >>> } >>> >>> +void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address, >>> + pmd_t *pmd, bool freeze, struct folio *folio); >>> + >>> +static inline void align_huge_pmd_range(struct vm_area_struct *vma, >>> + unsigned long *start, >>> + unsigned long *end) >>> +{ >>> + *start = ALIGN(*start, HPAGE_PMD_SIZE); >>> + *end = ALIGN_DOWN(*end, HPAGE_PMD_SIZE); >>> + >>> + VM_WARN_ON_ONCE(vma->vm_start > *start); >>> + VM_WARN_ON_ONCE(vma->vm_end < *end); >>> +} >>> + >>> #else /* CONFIG_TRANSPARENT_HUGEPAGE */ >>> >>> static inline bool folio_test_pmd_mappable(struct folio *folio) >>> @@ -471,6 +485,12 @@ static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, >>> unsigned long address, bool freeze, struct folio *folio) {} >>> static inline void split_huge_pmd_address(struct vm_area_struct *vma, >>> unsigned long address, bool freeze, struct folio *folio) {} >>> +static inline void split_huge_pmd_locked(struct vm_area_struct *vma, >>> + unsigned long address, pmd_t *pmd, >>> + bool freeze, struct folio *folio) {} >>> +static inline void align_huge_pmd_range(struct vm_area_struct *vma, >>> + unsigned long *start, >>> + unsigned long *end) {} >>> >>> #define split_huge_pud(__vma, __pmd, __address) \ >>> do { } while (0) >>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c >>> index 8261b5669397..145505a1dd05 100644 >>> --- a/mm/huge_memory.c >>> +++ b/mm/huge_memory.c >>> @@ -2584,6 +2584,27 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, >>> pmd_populate(mm, pmd, pgtable); >>> } >>> >>> +void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address, >>> + pmd_t *pmd, bool freeze, struct folio *folio) >>> +{ >>> + VM_WARN_ON_ONCE(folio && !folio_test_pmd_mappable(folio)); >>> + VM_WARN_ON_ONCE(!IS_ALIGNED(address, HPAGE_PMD_SIZE)); >>> + VM_WARN_ON_ONCE(folio && !folio_test_locked(folio)); >>> + VM_BUG_ON(freeze && !folio); >>> + >>> + /* >>> + * When the caller requests to set up a migration entry, we >>> + * require a folio to check the PMD against. Otherwise, there >>> + * is a risk of replacing the wrong folio. >>> + */ >>> + if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) || >>> + is_pmd_migration_entry(*pmd)) { >>> + if (folio && folio != pmd_folio(*pmd)) >>> + return; >>> + __split_huge_pmd_locked(vma, pmd, address, freeze); >>> + } >>> +} >>> + >>> void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, >>> unsigned long address, bool freeze, struct folio *folio) >>> { >>> @@ -2595,26 +2616,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, >>> (address & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE); >>> mmu_notifier_invalidate_range_start(&range); >>> ptl = pmd_lock(vma->vm_mm, pmd); >>> - >>> - /* >>> - * If caller asks to setup a migration entry, we need a folio to check >>> - * pmd against. Otherwise we can end up replacing wrong folio. >>> - */ >>> - VM_BUG_ON(freeze && !folio); >>> - VM_WARN_ON_ONCE(folio && !folio_test_locked(folio)); >>> - >>> - if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) || >>> - is_pmd_migration_entry(*pmd)) { >>> - /* >>> - * It's safe to call pmd_page when folio is set because it's >>> - * guaranteed that pmd is present. >>> - */ >>> - if (folio && folio != pmd_folio(*pmd)) >>> - goto out; >>> - __split_huge_pmd_locked(vma, pmd, range.start, freeze); >>> - } >>> - >>> -out: >>> + split_huge_pmd_locked(vma, range.start, pmd, freeze, folio); >>> spin_unlock(ptl); >>> mmu_notifier_invalidate_range_end(&range); >>> } >>> diff --git a/mm/rmap.c b/mm/rmap.c >>> index 7e2575d669a9..432601154583 100644 >>> --- a/mm/rmap.c >>> +++ b/mm/rmap.c >>> @@ -1636,9 +1636,6 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, >>> if (flags & TTU_SYNC) >>> pvmw.flags = PVMW_SYNC; >>> >>> - if (flags & TTU_SPLIT_HUGE_PMD) >>> - split_huge_pmd_address(vma, address, false, folio); >>> - >>> /* >>> * For THP, we have to assume the worse case ie pmd for invalidation. >>> * For hugetlb, it could be much worse if we need to do pud >>> @@ -1650,6 +1647,8 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, >>> range.end = vma_address_end(&pvmw); >>> mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm, >>> address, range.end); >>> + if (flags & TTU_SPLIT_HUGE_PMD) >>> + align_huge_pmd_range(vma, &range.start, &range.end); >>> if (folio_test_hugetlb(folio)) { >>> /* >>> * If sharing is possible, start and end will be adjusted >>> @@ -1664,9 +1663,6 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, >>> mmu_notifier_invalidate_range_start(&range); >>> >>> while (page_vma_mapped_walk(&pvmw)) { >>> - /* Unexpected PMD-mapped THP? */ >>> - VM_BUG_ON_FOLIO(!pvmw.pte, folio); >>> - >>> /* >>> * If the folio is in an mlock()d vma, we must not swap it out. >>> */ >>> @@ -1678,6 +1674,22 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, >>> goto walk_done_err; >>> } >>> >>> + if (!pvmw.pte && (flags & TTU_SPLIT_HUGE_PMD)) { >>> + /* >>> + * We temporarily have to drop the PTL and start once >>> + * again from that now-PTE-mapped page table. >>> + */ >>> + split_huge_pmd_locked(vma, range.start, pvmw.pmd, false, >>> + folio); >> >> Just in case you might miss here, since you will no longer align >> range.start as Baolin mentioned in another email and you have a VM_WARN_ONCE >> in split_huge_pmd_locked(), you will need to align the input address now. > > Thanks for bringing that up! > > I do miss the alignment here when I decide to no longer align range.start > in another email - thanks! > No problem. > Zi, could I move the alignment here? > IIUC, we will not encounter a partially mapped THP here, and range.start > and range.end should also not beyond the VMA limits. > > align_huge_pmd_range(vma, &range.start, &range.end); > split_huge_pmd_locked(vma, range.start, pvmw.pmd, false, > folio); I think you can just do split_huge_pmd_locked(vma, ALIGN(range.start, HPAGE_PMD_SIZE), pvmw.pmd, false, folio); since range will later be used by mmu_notifier_invalidate_range_end() and changing it might cause secondary TLB invalidation issues. > > Thanks, > Lance > >> >>> + pvmw.pmd = NULL; >>> + spin_unlock(pvmw.ptl); >>> + flags &= ~TTU_SPLIT_HUGE_PMD; >>> + continue; >>> + } >>> + >>> + /* Unexpected PMD-mapped THP? */ >>> + VM_BUG_ON_FOLIO(!pvmw.pte, folio); >>> + >>> pfn = pte_pfn(ptep_get(pvmw.pte)); >>> subpage = folio_page(folio, pfn - folio_pfn(folio)); >>> address = pvmw.address; >>> -- >>> 2.33.1 >> >> >> -- >> Best Regards, >> Yan, Zi -- Best Regards, Yan, Zi
Attachment:
signature.asc
Description: OpenPGP digital signature