The patch titled Subject: powerpc/64s: use contiguous PMD/PUD instead of HUGEPD has been added to the -mm mm-unstable branch. Its filename is powerpc-64s-use-contiguous-pmd-pud-instead-of-hugepd.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/powerpc-64s-use-contiguous-pmd-pud-instead-of-hugepd.patch This patch will later appear in the mm-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Christophe Leroy <christophe.leroy@xxxxxxxxxx> Subject: powerpc/64s: use contiguous PMD/PUD instead of HUGEPD Date: Tue, 2 Jul 2024 15:51:33 +0200 On book3s/64, the only user of hugepd is hash in 4k mode. All other setups (hash-64, radix-4, radix-64) use leaf PMD/PUD. Rework hash-4k to use contiguous PMD and PUD instead. In that setup there are only two huge page sizes: 16M and 16G. 16M sits at PMD level and 16G at PUD level. pte_update doesn't know page size, lets use the same trick as hpte_need_flush() to get page size from segment properties. That's not the most efficient way but let's do that until callers of pte_update() provide page size instead of just a huge flag. Link: https://lkml.kernel.org/r/7448f60a9b3efd396595f4f735d1e0babc5ae379.1719928057.git.christophe.leroy@xxxxxxxxxx Signed-off-by: Christophe Leroy <christophe.leroy@xxxxxxxxxx> Acked-by: Michael Ellerman <mpe@xxxxxxxxxxxxxx> (powerpc) Cc: Jason Gunthorpe <jgg@xxxxxxxxxx> Cc: Nicholas Piggin <npiggin@xxxxxxxxx> Cc: Oscar Salvador <osalvador@xxxxxxx> Cc: Peter Xu <peterx@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- arch/powerpc/include/asm/book3s/64/hash-4k.h | 15 ---- arch/powerpc/include/asm/book3s/64/hash.h | 40 +++++++++-- arch/powerpc/include/asm/book3s/64/hugetlb.h | 38 ---------- arch/powerpc/include/asm/book3s/64/pgtable-4k.h | 47 ------------- arch/powerpc/include/asm/book3s/64/pgtable-64k.h | 20 ----- arch/powerpc/include/asm/book3s/64/pgtable.h | 22 +++++- arch/powerpc/include/asm/hugetlb.h | 4 + arch/powerpc/include/asm/nohash/hugetlb-e500.h | 4 - arch/powerpc/include/asm/page.h | 8 -- arch/powerpc/mm/book3s64/hash_utils.c | 11 +-- arch/powerpc/mm/book3s64/hugetlbpage.c | 10 ++ arch/powerpc/mm/book3s64/pgtable.c | 12 --- arch/powerpc/mm/hugetlbpage.c | 26 ------- arch/powerpc/mm/pgtable.c | 2 arch/powerpc/platforms/Kconfig.cputype | 1 15 files changed, 74 insertions(+), 186 deletions(-) --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h~powerpc-64s-use-contiguous-pmd-pud-instead-of-hugepd +++ a/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -74,21 +74,6 @@ #define remap_4k_pfn(vma, addr, pfn, prot) \ remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot)) -#ifdef CONFIG_HUGETLB_PAGE -static inline int hash__hugepd_ok(hugepd_t hpd) -{ - unsigned long hpdval = hpd_val(hpd); - /* - * if it is not a pte and have hugepd shift mask - * set, then it is a hugepd directory pointer - */ - if (!(hpdval & _PAGE_PTE) && (hpdval & _PAGE_PRESENT) && - ((hpdval & HUGEPD_SHIFT_MASK) != 0)) - return true; - return false; -} -#endif - /* * 4K PTE format is different from 64K PTE format. Saving the hash_slot is just * a matter of returning the PTE bits that need to be modified. On 64K PTE, --- a/arch/powerpc/include/asm/book3s/64/hash.h~powerpc-64s-use-contiguous-pmd-pud-instead-of-hugepd +++ a/arch/powerpc/include/asm/book3s/64/hash.h @@ -4,6 +4,7 @@ #ifdef __KERNEL__ #include <asm/asm-const.h> +#include <asm/book3s/64/slice.h> /* * Common bits between 4K and 64K pages in a linux-style PTE. @@ -161,14 +162,10 @@ extern void hpte_need_flush(struct mm_st pte_t *ptep, unsigned long pte, int huge); unsigned long htab_convert_pte_flags(unsigned long pteflags, unsigned long flags); /* Atomic PTE updates */ -static inline unsigned long hash__pte_update(struct mm_struct *mm, - unsigned long addr, - pte_t *ptep, unsigned long clr, - unsigned long set, - int huge) +static inline unsigned long hash__pte_update_one(pte_t *ptep, unsigned long clr, + unsigned long set) { __be64 old_be, tmp_be; - unsigned long old; __asm__ __volatile__( "1: ldarx %0,0,%3 # pte_update\n\ @@ -182,11 +179,40 @@ static inline unsigned long hash__pte_up : "r" (ptep), "r" (cpu_to_be64(clr)), "m" (*ptep), "r" (cpu_to_be64(H_PAGE_BUSY)), "r" (cpu_to_be64(set)) : "cc" ); + + return be64_to_cpu(old_be); +} + +static inline unsigned long hash__pte_update(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, unsigned long clr, + unsigned long set, + int huge) +{ + unsigned long old; + + old = hash__pte_update_one(ptep, clr, set); + + if (IS_ENABLED(CONFIG_PPC_4K_PAGES) && huge) { + unsigned int psize = get_slice_psize(mm, addr); + int nb, i; + + if (psize == MMU_PAGE_16M) + nb = SZ_16M / PMD_SIZE; + else if (psize == MMU_PAGE_16G) + nb = SZ_16G / PUD_SIZE; + else + nb = 1; + + WARN_ON_ONCE(nb == 1); /* Should never happen */ + + for (i = 1; i < nb; i++) + hash__pte_update_one(ptep + i, clr, set); + } /* huge pages use the old page table lock */ if (!huge) assert_pte_locked(mm, addr); - old = be64_to_cpu(old_be); if (old & H_PAGE_HASHPTE) hpte_need_flush(mm, addr, ptep, old, huge); --- a/arch/powerpc/include/asm/book3s/64/hugetlb.h~powerpc-64s-use-contiguous-pmd-pud-instead-of-hugepd +++ a/arch/powerpc/include/asm/book3s/64/hugetlb.h @@ -49,9 +49,6 @@ static inline bool gigantic_page_runtime return true; } -/* hugepd entry valid bit */ -#define HUGEPD_VAL_BITS (0x8000000000000000UL) - #define huge_ptep_modify_prot_start huge_ptep_modify_prot_start extern pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep); @@ -60,29 +57,7 @@ extern pte_t huge_ptep_modify_prot_start extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t old_pte, pte_t new_pte); -/* - * This should work for other subarchs too. But right now we use the - * new format only for 64bit book3s - */ -static inline pte_t *hugepd_page(hugepd_t hpd) -{ - BUG_ON(!hugepd_ok(hpd)); - /* - * We have only four bits to encode, MMU page size - */ - BUILD_BUG_ON((MMU_PAGE_COUNT - 1) > 0xf); - return __va(hpd_val(hpd) & HUGEPD_ADDR_MASK); -} - -static inline unsigned int hugepd_mmu_psize(hugepd_t hpd) -{ - return (hpd_val(hpd) & HUGEPD_SHIFT_MASK) >> 2; -} -static inline unsigned int hugepd_shift(hugepd_t hpd) -{ - return mmu_psize_to_shift(hugepd_mmu_psize(hpd)); -} static inline void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { @@ -90,19 +65,6 @@ static inline void flush_hugetlb_page(st return radix__flush_hugetlb_page(vma, vmaddr); } -static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, - unsigned int pdshift) -{ - unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(hpd); - - return hugepd_page(hpd) + idx; -} - -static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift) -{ - *hpdp = __hugepd(__pa(new) | HUGEPD_VAL_BITS | (shift_to_mmu_psize(pshift) << 2)); -} - void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); static inline int check_and_get_huge_psize(int shift) --- a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h +++ /dev/null @@ -1,47 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_4K_H -#define _ASM_POWERPC_BOOK3S_64_PGTABLE_4K_H -/* - * hash 4k can't share hugetlb and also doesn't support THP - */ -#ifndef __ASSEMBLY__ -#ifdef CONFIG_HUGETLB_PAGE -/* - * With radix , we have hugepage ptes in the pud and pmd entries. We don't - * need to setup hugepage directory for them. Our pte and page directory format - * enable us to have this enabled. - */ -static inline int hugepd_ok(hugepd_t hpd) -{ - if (radix_enabled()) - return 0; - return hash__hugepd_ok(hpd); -} -#define is_hugepd(hpd) (hugepd_ok(hpd)) - -/* - * 16M and 16G huge page directory tables are allocated from slab cache - * - */ -#define H_16M_CACHE_INDEX (PAGE_SHIFT + H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE - 24) -#define H_16G_CACHE_INDEX \ - (PAGE_SHIFT + H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE + H_PUD_INDEX_SIZE - 34) - -static inline int get_hugepd_cache_index(int index) -{ - switch (index) { - case H_16M_CACHE_INDEX: - return HTLB_16M_INDEX; - case H_16G_CACHE_INDEX: - return HTLB_16G_INDEX; - default: - BUG(); - } - /* should not reach */ -} - -#endif /* CONFIG_HUGETLB_PAGE */ - -#endif /* __ASSEMBLY__ */ - -#endif /*_ASM_POWERPC_BOOK3S_64_PGTABLE_4K_H */ --- a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h~powerpc-64s-use-contiguous-pmd-pud-instead-of-hugepd +++ a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h @@ -5,26 +5,6 @@ #ifndef __ASSEMBLY__ #ifdef CONFIG_HUGETLB_PAGE -/* - * With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't - * need to setup hugepage directory for them. Our pte and page directory format - * enable us to have this enabled. - */ -static inline int hugepd_ok(hugepd_t hpd) -{ - return 0; -} - -#define is_hugepd(pdep) 0 - -/* - * This should never get called - */ -static __always_inline int get_hugepd_cache_index(int index) -{ - BUILD_BUG(); -} - #endif /* CONFIG_HUGETLB_PAGE */ static inline int remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr, --- a/arch/powerpc/include/asm/book3s/64/pgtable.h~powerpc-64s-use-contiguous-pmd-pud-instead-of-hugepd +++ a/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -274,6 +274,24 @@ static inline bool pud_leaf(pud_t pud) { return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE)); } + +#define pmd_leaf_size pmd_leaf_size +static inline unsigned long pmd_leaf_size(pmd_t pmd) +{ + if (IS_ENABLED(CONFIG_PPC_4K_PAGES) && !radix_enabled()) + return SZ_16M; + else + return PMD_SIZE; +} + +#define pud_leaf_size pud_leaf_size +static inline unsigned long pud_leaf_size(pud_t pud) +{ + if (IS_ENABLED(CONFIG_PPC_4K_PAGES) && !radix_enabled()) + return SZ_16G; + else + return PUD_SIZE; +} #endif /* __ASSEMBLY__ */ #include <asm/book3s/64/hash.h> @@ -285,11 +303,9 @@ static inline bool pud_leaf(pud_t pud) #define MAX_PHYSMEM_BITS R_MAX_PHYSMEM_BITS #endif - +/* hash 4k can't share hugetlb and also doesn't support THP */ #ifdef CONFIG_PPC_64K_PAGES #include <asm/book3s/64/pgtable-64k.h> -#else -#include <asm/book3s/64/pgtable-4k.h> #endif #include <asm/barrier.h> --- a/arch/powerpc/include/asm/hugetlb.h~powerpc-64s-use-contiguous-pmd-pud-instead-of-hugepd +++ a/arch/powerpc/include/asm/hugetlb.h @@ -37,6 +37,10 @@ void hugetlb_free_pgd_range(struct mmu_g unsigned long ceiling); #endif +#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, + pte_t pte, unsigned long sz); + #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) --- a/arch/powerpc/include/asm/nohash/hugetlb-e500.h~powerpc-64s-use-contiguous-pmd-pud-instead-of-hugepd +++ a/arch/powerpc/include/asm/nohash/hugetlb-e500.h @@ -2,10 +2,6 @@ #ifndef _ASM_POWERPC_NOHASH_HUGETLB_E500_H #define _ASM_POWERPC_NOHASH_HUGETLB_E500_H -#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT -void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, - pte_t pte, unsigned long sz); - void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); static inline int check_and_get_huge_psize(int shift) --- a/arch/powerpc/include/asm/page.h~powerpc-64s-use-contiguous-pmd-pud-instead-of-hugepd +++ a/arch/powerpc/include/asm/page.h @@ -269,14 +269,6 @@ static inline const void *pfn_to_kaddr(u #define is_kernel_addr(x) ((x) >= TASK_SIZE) #endif -#ifdef CONFIG_PPC_BOOK3S_64 -/* - * Book3S 64 stores real addresses in the hugepd entries to - * avoid overlaps with _PAGE_PRESENT and _PAGE_PTE. - */ -#define HUGEPD_ADDR_MASK (0x0ffffffffffffffful & ~HUGEPD_SHIFT_MASK) -#endif /* CONFIG_PPC_BOOK3S_64 */ - /* * Some number of bits at the level of the page table that points to * a hugepte are used to encode the size. This masks those bits. --- a/arch/powerpc/mm/book3s64/hash_utils.c~powerpc-64s-use-contiguous-pmd-pud-instead-of-hugepd +++ a/arch/powerpc/mm/book3s64/hash_utils.c @@ -1233,10 +1233,6 @@ void __init hash__early_init_mmu(void) __pmd_table_size = H_PMD_TABLE_SIZE; __pud_table_size = H_PUD_TABLE_SIZE; __pgd_table_size = H_PGD_TABLE_SIZE; - /* - * 4k use hugepd format, so for hash set then to - * zero - */ __pmd_val_bits = HASH_PMD_VAL_BITS; __pud_val_bits = HASH_PUD_VAL_BITS; __pgd_val_bits = HASH_PGD_VAL_BITS; @@ -1546,6 +1542,13 @@ int hash_page_mm(struct mm_struct *mm, u goto bail; } + if (IS_ENABLED(CONFIG_PPC_4K_PAGES) && !radix_enabled()) { + if (hugeshift == PMD_SHIFT && psize == MMU_PAGE_16M) + hugeshift = mmu_psize_defs[MMU_PAGE_16M].shift; + if (hugeshift == PUD_SHIFT && psize == MMU_PAGE_16G) + hugeshift = mmu_psize_defs[MMU_PAGE_16G].shift; + } + /* * Add _PAGE_PRESENT to the required access perm. If there are parallel * updates to the pte that can possibly clear _PAGE_PTE, catch that too. --- a/arch/powerpc/mm/book3s64/hugetlbpage.c~powerpc-64s-use-contiguous-pmd-pud-instead-of-hugepd +++ a/arch/powerpc/mm/book3s64/hugetlbpage.c @@ -53,6 +53,16 @@ int __hash_page_huge(unsigned long ea, u /* If PTE permissions don't match, take page fault */ if (unlikely(!check_pte_access(access, old_pte))) return 1; + /* + * If hash-4k, hugepages use seeral contiguous PxD entries + * so bail out and let mm make the page young or dirty + */ + if (IS_ENABLED(CONFIG_PPC_4K_PAGES)) { + if (!(old_pte & _PAGE_ACCESSED)) + return 1; + if ((access & _PAGE_WRITE) && !(old_pte & _PAGE_DIRTY)) + return 1; + } /* * Try to lock the PTE, add ACCESSED and DIRTY if it was --- a/arch/powerpc/mm/book3s64/pgtable.c~powerpc-64s-use-contiguous-pmd-pud-instead-of-hugepd +++ a/arch/powerpc/mm/book3s64/pgtable.c @@ -461,18 +461,6 @@ static inline void pgtable_free(void *ta case PUD_INDEX: __pud_free(table); break; -#if defined(CONFIG_PPC_4K_PAGES) && defined(CONFIG_HUGETLB_PAGE) - /* 16M hugepd directory at pud level */ - case HTLB_16M_INDEX: - BUILD_BUG_ON(H_16M_CACHE_INDEX <= 0); - kmem_cache_free(PGT_CACHE(H_16M_CACHE_INDEX), table); - break; - /* 16G hugepd directory at the pgd level */ - case HTLB_16G_INDEX: - BUILD_BUG_ON(H_16G_CACHE_INDEX <= 0); - kmem_cache_free(PGT_CACHE(H_16G_CACHE_INDEX), table); - break; -#endif /* We don't free pgd table via RCU callback */ default: BUG(); --- a/arch/powerpc/mm/hugetlbpage.c~powerpc-64s-use-contiguous-pmd-pud-instead-of-hugepd +++ a/arch/powerpc/mm/hugetlbpage.c @@ -592,40 +592,14 @@ static int __init hugetlbpage_init(void) for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { unsigned shift; - unsigned pdshift; if (!mmu_psize_defs[psize].shift) continue; shift = mmu_psize_to_shift(psize); -#ifdef CONFIG_PPC_BOOK3S_64 - if (shift > PGDIR_SHIFT) - continue; - else if (shift > PUD_SHIFT) - pdshift = PGDIR_SHIFT; - else if (shift > PMD_SHIFT) - pdshift = PUD_SHIFT; - else - pdshift = PMD_SHIFT; -#else - if (shift < PUD_SHIFT) - pdshift = PMD_SHIFT; - else if (shift < PGDIR_SHIFT) - pdshift = PUD_SHIFT; - else - pdshift = PGDIR_SHIFT; -#endif - if (add_huge_page_size(1ULL << shift) < 0) continue; - /* - * if we have pdshift and shift value same, we don't - * use pgt cache for hugepd. - */ - if (pdshift > shift) { - pgtable_cache_add(pdshift - shift); - } configured = true; } --- a/arch/powerpc/mm/pgtable.c~powerpc-64s-use-contiguous-pmd-pud-instead-of-hugepd +++ a/arch/powerpc/mm/pgtable.c @@ -331,7 +331,7 @@ void set_huge_pte_at(struct mm_struct *m __set_huge_pte_at(pmdp, ptep, pte_val(pte)); } } -#elif defined(CONFIG_PPC_E500) +#else void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte, unsigned long sz) { --- a/arch/powerpc/platforms/Kconfig.cputype~powerpc-64s-use-contiguous-pmd-pud-instead-of-hugepd +++ a/arch/powerpc/platforms/Kconfig.cputype @@ -98,7 +98,6 @@ config PPC_BOOK3S_64 select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION select ARCH_ENABLE_SPLIT_PMD_PTLOCK select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE - select ARCH_HAS_HUGEPD if HUGETLB_PAGE select ARCH_SUPPORTS_HUGETLBFS select ARCH_SUPPORTS_NUMA_BALANCING select HAVE_MOVE_PMD _ Patches currently in -mm which might be from christophe.leroy@xxxxxxxxxx are mm-define-__pte_leaf_size-to-also-take-a-pmd-entry.patch mm-provide-mm_struct-and-address-to-huge_ptep_get.patch powerpc-mm-remove-_page_psize.patch powerpc-mm-fix-__find_linux_pte-on-32-bits-with-pmd-leaf-entries.patch powerpc-mm-allow-hugepages-without-hugepd.patch powerpc-8xx-fix-size-given-to-set_huge_pte_at.patch powerpc-8xx-rework-support-for-8m-pages-using-contiguous-pte-entries.patch powerpc-8xx-simplify-struct-mmu_psize_def.patch powerpc-e500-remove-enc-and-ind-fields-from-struct-mmu_psize_def.patch powerpc-e500-switch-to-64-bits-pgd-on-85xx-32-bits.patch powerpc-e500-encode-hugepage-size-in-pte-bits.patch powerpc-e500-dont-pre-check-write-access-on-data-tlb-error.patch powerpc-e500-free-r10-for-find_pte.patch powerpc-e500-use-contiguous-pmd-instead-of-hugepd.patch powerpc-64s-use-contiguous-pmd-pud-instead-of-hugepd.patch powerpc-mm-remove-hugepd-leftovers.patch mm-remove-config_arch_has_hugepd.patch