On Wed, Sep 02, 2020 at 02:06:17PM -0400, Zi Yan wrote: > From: Zi Yan <ziy@xxxxxxxxxx> > > Add PUD-level TLB flush ops and teach page_vma_mapped_talk about 1GB > THPs. > > Signed-off-by: Zi Yan <ziy@xxxxxxxxxx> > --- > arch/x86/include/asm/pgtable.h | 3 +++ > arch/x86/mm/pgtable.c | 13 +++++++++++++ > include/linux/mmu_notifier.h | 13 +++++++++++++ > include/linux/pgtable.h | 14 ++++++++++++++ > include/linux/rmap.h | 1 + > mm/page_vma_mapped.c | 33 +++++++++++++++++++++++++++++---- > mm/rmap.c | 12 +++++++++--- > 7 files changed, 82 insertions(+), 7 deletions(-) > > diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h > index 26255cac78c0..15334f5ba172 100644 > --- a/arch/x86/include/asm/pgtable.h > +++ b/arch/x86/include/asm/pgtable.h > @@ -1127,6 +1127,9 @@ extern int pudp_test_and_clear_young(struct vm_area_struct *vma, > extern int pmdp_clear_flush_young(struct vm_area_struct *vma, > unsigned long address, pmd_t *pmdp); > > +#define __HAVE_ARCH_PUDP_CLEAR_YOUNG_FLUSH > +extern int pudp_clear_flush_young(struct vm_area_struct *vma, > + unsigned long address, pud_t *pudp); > > #define pmd_write pmd_write > static inline int pmd_write(pmd_t pmd) > diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c > index 7be73aee6183..e4a2dffcc418 100644 > --- a/arch/x86/mm/pgtable.c > +++ b/arch/x86/mm/pgtable.c > @@ -633,6 +633,19 @@ int pmdp_clear_flush_young(struct vm_area_struct *vma, > > return young; > } > +int pudp_clear_flush_young(struct vm_area_struct *vma, > + unsigned long address, pud_t *pudp) > +{ > + int young; > + > + VM_BUG_ON(address & ~HPAGE_PUD_MASK); > + > + young = pudp_test_and_clear_young(vma, address, pudp); > + if (young) > + flush_tlb_range(vma, address, address + HPAGE_PUD_SIZE); > + > + return young; > +} > #endif > > /** > diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h > index b8200782dede..4ffa179e654f 100644 > --- a/include/linux/mmu_notifier.h > +++ b/include/linux/mmu_notifier.h > @@ -557,6 +557,19 @@ static inline void mmu_notifier_range_init_migrate( > __young; \ > }) > > +#define pudp_clear_flush_young_notify(__vma, __address, __pudp) \ > +({ \ > + int __young; \ > + struct vm_area_struct *___vma = __vma; \ > + unsigned long ___address = __address; \ > + __young = pudp_clear_flush_young(___vma, ___address, __pudp); \ > + __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ > + ___address, \ > + ___address + \ > + PUD_SIZE); \ > + __young; \ > +}) > + > #define ptep_clear_young_notify(__vma, __address, __ptep) \ > ({ \ > int __young; \ > diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h > index 255275d5b73e..8ef358c386af 100644 > --- a/include/linux/pgtable.h > +++ b/include/linux/pgtable.h > @@ -240,6 +240,20 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, > #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ > #endif > > +#ifndef __HAVE_ARCH_PUDP_CLEAR_YOUNG_FLUSH > +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD > +extern int pudp_clear_flush_young(struct vm_area_struct *vma, > + unsigned long address, pud_t *pudp); > +#else > +int pudp_clear_flush_young(struct vm_area_struct *vma, > + unsigned long address, pud_t *pudp) > +{ > + BUILD_BUG(); > + return 0; > +} > +#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ > +#endif > + > #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR > static inline pte_t ptep_get_and_clear(struct mm_struct *mm, > unsigned long address, > diff --git a/include/linux/rmap.h b/include/linux/rmap.h > index 3a6adfa70fb0..0af61dd193d2 100644 > --- a/include/linux/rmap.h > +++ b/include/linux/rmap.h > @@ -206,6 +206,7 @@ struct page_vma_mapped_walk { > struct page *page; > struct vm_area_struct *vma; > unsigned long address; > + pud_t *pud; > pmd_t *pmd; > pte_t *pte; > spinlock_t *ptl; > diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c > index 5e77b269c330..d9d39ec06e21 100644 > --- a/mm/page_vma_mapped.c > +++ b/mm/page_vma_mapped.c > @@ -145,9 +145,12 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) > struct page *page = pvmw->page; > pgd_t *pgd; > p4d_t *p4d; > - pud_t *pud; > + pud_t pude; > pmd_t pmde; > > + if (!pvmw->pte && !pvmw->pmd && pvmw->pud) > + return not_found(pvmw); > + > /* The only possible pmd mapping has been handled on last iteration */ > if (pvmw->pmd && !pvmw->pte) > return not_found(pvmw); > @@ -174,10 +177,31 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) > p4d = p4d_offset(pgd, pvmw->address); > if (!p4d_present(*p4d)) > return false; > - pud = pud_offset(p4d, pvmw->address); > - if (!pud_present(*pud)) > + pvmw->pud = pud_offset(p4d, pvmw->address); > + > + /* > + * Make sure the pud value isn't cached in a register by the > + * compiler and used as a stale value after we've observed a > + * subsequent update. > + */ > + pude = READ_ONCE(*pvmw->pud); > + if (pud_trans_huge(pude)) { > + pvmw->ptl = pud_lock(mm, pvmw->pud); > + if (likely(pud_trans_huge(*pvmw->pud))) { > + if (pvmw->flags & PVMW_MIGRATION) > + return not_found(pvmw); > + if (pud_page(*pvmw->pud) != page) > + return not_found(pvmw); > + return true; > + } else { > + /* THP pud was split under us: handle on pmd level */ > + spin_unlock(pvmw->ptl); > + pvmw->ptl = NULL; Hm. What makes you sure the pmd table is established here? I have not looked at PUD THP handling of MADV_DONTNEED yet, but for PMD THP can became pmd_none() at any point (unless ptl is locked). > + } > + } else if (!pud_present(pude)) > return false; > - pvmw->pmd = pmd_offset(pud, pvmw->address); > + > + pvmw->pmd = pmd_offset(pvmw->pud, pvmw->address); > /* > * Make sure the pmd value isn't cached in a register by the > * compiler and used as a stale value after we've observed a > @@ -213,6 +237,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) > } else if (!pmd_present(pmde)) { > return false; > } > + > if (!map_pte(pvmw)) > goto next_pte; > while (1) { > diff --git a/mm/rmap.c b/mm/rmap.c Why? > index 10195a2421cf..77cec0658b76 100644 > --- a/mm/rmap.c > +++ b/mm/rmap.c > @@ -803,9 +803,15 @@ static bool page_referenced_one(struct page *page, struct vm_area_struct *vma, > referenced++; > } > } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { > - if (pmdp_clear_flush_young_notify(vma, address, > - pvmw.pmd)) > - referenced++; > + if (pvmw.pmd) { > + if (pmdp_clear_flush_young_notify(vma, address, > + pvmw.pmd)) > + referenced++; > + } else if (pvmw.pud) { > + if (pudp_clear_flush_young_notify(vma, address, > + pvmw.pud)) > + referenced++; > + } > } else { > /* unexpected pmd-mapped page? */ > WARN_ON_ONCE(1); > -- > 2.28.0 > > -- Kirill A. Shutemov