With the new thp refcounting we don't need to mark the PMD splitting. Drop the code to handle this. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxxxxxxxxxx> --- arch/powerpc/include/asm/kvm_book3s_64.h | 6 -- arch/powerpc/include/asm/pgtable-ppc64.h | 29 ++------ arch/powerpc/mm/hugepage-hash64.c | 3 - arch/powerpc/mm/hugetlbpage.c | 2 +- arch/powerpc/mm/pgtable_64.c | 111 ++++++++++++------------------- mm/gup.c | 2 +- 6 files changed, 52 insertions(+), 101 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 2d81e202bdcc..9a96fe3caa48 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -298,12 +298,6 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing, cpu_relax(); continue; } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - /* If hugepage and is trans splitting return None */ - if (unlikely(hugepage && - pmd_trans_splitting(pte_pmd(old_pte)))) - return __pte(0); -#endif /* If pte is not present return None */ if (unlikely(!(old_pte & _PAGE_PRESENT))) return __pte(0); diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index 843cb35e6add..655dde8e9683 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -361,11 +361,6 @@ void pgtable_cache_init(void); #endif /* __ASSEMBLY__ */ /* - * THP pages can't be special. So use the _PAGE_SPECIAL - */ -#define _PAGE_SPLITTING _PAGE_SPECIAL - -/* * We need to differentiate between explicit huge page and THP huge * page, since THP huge page also need to track real subpage details */ @@ -375,8 +370,7 @@ void pgtable_cache_init(void); * set of bits not changed in pmd_modify. */ #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | \ - _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \ - _PAGE_THP_HUGE) + _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_THP_HUGE) #ifndef __ASSEMBLY__ /* @@ -458,13 +452,6 @@ static inline int pmd_trans_huge(pmd_t pmd) return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE); } -static inline int pmd_trans_splitting(pmd_t pmd) -{ - if (pmd_trans_huge(pmd)) - return pmd_val(pmd) & _PAGE_SPLITTING; - return 0; -} - extern int has_transparent_hugepage(void); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -517,12 +504,6 @@ static inline pmd_t pmd_mknotpresent(pmd_t pmd) return pmd; } -static inline pmd_t pmd_mksplitting(pmd_t pmd) -{ - pmd_val(pmd) |= _PAGE_SPLITTING; - return pmd; -} - #define __HAVE_ARCH_PMD_SAME static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) { @@ -577,8 +558,12 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0); } -#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH -extern void pmdp_splitting_flush(struct vm_area_struct *vma, +#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH_NOTIFY +extern void pmdp_splitting_flush_notify(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); + +#define __HAVE_ARCH_PMDP_COLLAPSE_FLUSH +extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); #define __HAVE_ARCH_PGTABLE_DEPOSIT diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 86686514ae13..078f7207afd2 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -39,9 +39,6 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, /* If PMD busy, retry the access */ if (unlikely(old_pmd & _PAGE_BUSY)) return 0; - /* If PMD is trans splitting retry the access */ - if (unlikely(old_pmd & _PAGE_SPLITTING)) - return 0; /* If PMD permissions don't match, take page fault */ if (unlikely(access & ~old_pmd)) return 1; diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index f30ae0f7f570..dfd7db0cfbee 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -1008,7 +1008,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift * hpte invalidate * */ - if (pmd_none(pmd) || pmd_trans_splitting(pmd)) + if (pmd_none(pmd)) return NULL; if (pmd_huge(pmd) || pmd_large(pmd)) { diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 91bb8836825a..fa49e2ff042b 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -36,6 +36,7 @@ #include <linux/memblock.h> #include <linux/slab.h> #include <linux/hugetlb.h> +#include <linux/mmu_notifier.h> #include <asm/pgalloc.h> #include <asm/page.h> @@ -557,45 +558,9 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t pmdp_clear_flush(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { - pmd_t pmd; - VM_BUG_ON(address & ~HPAGE_PMD_MASK); - if (pmd_trans_huge(*pmdp)) { - pmd = pmdp_get_and_clear(vma->vm_mm, address, pmdp); - } else { - /* - * khugepaged calls this for normal pmd - */ - pmd = *pmdp; - pmd_clear(pmdp); - /* - * Wait for all pending hash_page to finish. This is needed - * in case of subpage collapse. When we collapse normal pages - * to hugepage, we first clear the pmd, then invalidate all - * the PTE entries. The assumption here is that any low level - * page fault will see a none pmd and take the slow path that - * will wait on mmap_sem. But we could very well be in a - * hash_page with local ptep pointer value. Such a hash page - * can result in adding new HPTE entries for normal subpages. - * That means we could be modifying the page content as we - * copy them to a huge page. So wait for parallel hash_page - * to finish before invalidating HPTE entries. We can do this - * by sending an IPI to all the cpus and executing a dummy - * function there. - */ - kick_all_cpus_sync(); - /* - * Now invalidate the hpte entries in the range - * covered by pmd. This make sure we take a - * fault and will find the pmd as none, which will - * result in a major fault which takes mmap_sem and - * hence wait for collapse to complete. Without this - * the __collapse_huge_page_copy can result in copying - * the old content. - */ - flush_tlb_pmd_range(vma->vm_mm, &pmd, address); - } - return pmd; + VM_BUG_ON(!pmd_trans_huge(*pmdp)); + return pmdp_get_and_clear(vma->vm_mm, address, pmdp); } int pmdp_test_and_clear_young(struct vm_area_struct *vma, @@ -622,49 +587,59 @@ int pmdp_clear_flush_young(struct vm_area_struct *vma, * We mark the pmd splitting and invalidate all the hpte * entries for this hugepage. */ -void pmdp_splitting_flush(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) +void pmdp_splitting_flush_notify(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) { - unsigned long old, tmp; - VM_BUG_ON(address & ~HPAGE_PMD_MASK); #ifdef CONFIG_DEBUG_VM WARN_ON(!pmd_trans_huge(*pmdp)); assert_spin_locked(&vma->vm_mm->page_table_lock); #endif + trace_hugepage_splitting(address, *pmdp); + pmdp_clear_flush_notify(vma, address, pmdp); + /* + * This ensures that generic code that rely on IRQ disabling + * to prevent a parallel THP PMD split work as expected. + */ + kick_all_cpus_sync(); +} -#ifdef PTE_ATOMIC_UPDATES +pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp) +{ + pmd_t pmd; - __asm__ __volatile__( - "1: ldarx %0,0,%3\n\ - andi. %1,%0,%6\n\ - bne- 1b \n\ - ori %1,%0,%4 \n\ - stdcx. %1,0,%3 \n\ - bne- 1b" - : "=&r" (old), "=&r" (tmp), "=m" (*pmdp) - : "r" (pmdp), "i" (_PAGE_SPLITTING), "m" (*pmdp), "i" (_PAGE_BUSY) - : "cc" ); -#else - old = pmd_val(*pmdp); - *pmdp = __pmd(old | _PAGE_SPLITTING); -#endif + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + pmd = *pmdp; + pmd_clear(pmdp); /* - * If we didn't had the splitting flag set, go and flush the - * HPTE entries. + * Wait for all pending hash_page to finish. This is needed + * in case of subpage collapse. When we collapse normal pages + * to hugepage, we first clear the pmd, then invalidate all + * the PTE entries. The assumption here is that any low level + * page fault will see a none pmd and take the slow path that + * will wait on mmap_sem. But we could very well be in a + * hash_page with local ptep pointer value. Such a hash page + * can result in adding new HPTE entries for normal subpages. + * That means we could be modifying the page content as we + * copy them to a huge page. So wait for parallel hash_page + * to finish before invalidating HPTE entries. We can do this + * by sending an IPI to all the cpus and executing a dummy + * function there. */ - trace_hugepage_splitting(address, old); - if (!(old & _PAGE_SPLITTING)) { - /* We need to flush the hpte */ - if (old & _PAGE_HASHPTE) - hpte_do_hugepage_flush(vma->vm_mm, address, pmdp, old); - } + kick_all_cpus_sync(); /* - * This ensures that generic code that rely on IRQ disabling - * to prevent a parallel THP split work as expected. + * Now invalidate the hpte entries in the range + * covered by pmd. This make sure we take a + * fault and will find the pmd as none, which will + * result in a major fault which takes mmap_sem and + * hence wait for collapse to complete. Without this + * the __collapse_huge_page_copy can result in copying + * the old content. */ - kick_all_cpus_sync(); + flush_tlb_pmd_range(vma->vm_mm, &pmd, address); + return pmd; } /* diff --git a/mm/gup.c b/mm/gup.c index 0cebfa76fd0c..8375781b76f0 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1215,7 +1215,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, pmd_t pmd = ACCESS_ONCE(*pmdp); next = pmd_addr_end(addr, end); - if (pmd_none(pmd) || pmd_trans_splitting(pmd)) + if (pmd_none(pmd)) return 0; if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd))) { -- 2.1.4 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>