From: Christophe Leroy <christophe.leroy@xxxxxxxxxx> Subject: mm/vmalloc: enable mapping of huge pages at pte level in vmap On some architectures like powerpc, there are huge pages that are mapped at pte level. Enable it in vmap. For that, architectures can provide arch_vmap_pte_range_map_size() that returns the size of pages to map at pte level. Link: https://lkml.kernel.org/r/fb3ccc73377832ac6708181ec419128a2f98ce36.1620795204.git.christophe.leroy@xxxxxxxxxx Signed-off-by: Christophe Leroy <christophe.leroy@xxxxxxxxxx> Cc: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx> Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx> Cc: Mike Kravetz <mike.kravetz@xxxxxxxxxx> Cc: Mike Rapoport <rppt@xxxxxxxxxx> Cc: Nicholas Piggin <npiggin@xxxxxxxxx> Cc: Paul Mackerras <paulus@xxxxxxxxx> Cc: Uladzislau Rezki <uladzislau.rezki@xxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- include/linux/vmalloc.h | 8 ++++++++ mm/vmalloc.c | 21 ++++++++++++++++++--- 2 files changed, 26 insertions(+), 3 deletions(-) --- a/include/linux/vmalloc.h~mm-vmalloc-enable-mapping-of-huge-pages-at-pte-level-in-vmap +++ a/include/linux/vmalloc.h @@ -104,6 +104,14 @@ static inline bool arch_vmap_pmd_support } #endif +#ifndef arch_vmap_pte_range_map_size +static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr, unsigned long end, + u64 pfn, unsigned int max_page_shift) +{ + return PAGE_SIZE; +} +#endif + /* * Highlevel APIs for driver use */ --- a/mm/vmalloc.c~mm-vmalloc-enable-mapping-of-huge-pages-at-pte-level-in-vmap +++ a/mm/vmalloc.c @@ -36,6 +36,7 @@ #include <linux/overflow.h> #include <linux/pgtable.h> #include <linux/uaccess.h> +#include <linux/hugetlb.h> #include <asm/tlbflush.h> #include <asm/shmparam.h> @@ -83,10 +84,11 @@ static void free_work(struct work_struct /*** Page table manipulation functions ***/ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot, - pgtbl_mod_mask *mask) + unsigned int max_page_shift, pgtbl_mod_mask *mask) { pte_t *pte; u64 pfn; + unsigned long size = PAGE_SIZE; pfn = phys_addr >> PAGE_SHIFT; pte = pte_alloc_kernel_track(pmd, addr, mask); @@ -94,9 +96,22 @@ static int vmap_pte_range(pmd_t *pmd, un return -ENOMEM; do { BUG_ON(!pte_none(*pte)); + +#ifdef CONFIG_HUGETLB_PAGE + size = arch_vmap_pte_range_map_size(addr, end, pfn, max_page_shift); + if (size != PAGE_SIZE) { + pte_t entry = pfn_pte(pfn, prot); + + entry = pte_mkhuge(entry); + entry = arch_make_huge_pte(entry, ilog2(size), 0); + set_huge_pte_at(&init_mm, addr, pte, entry); + pfn += PFN_DOWN(size); + continue; + } +#endif set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot)); pfn++; - } while (pte++, addr += PAGE_SIZE, addr != end); + } while (pte += PFN_DOWN(size), addr += size, addr != end); *mask |= PGTBL_PTE_MODIFIED; return 0; } @@ -145,7 +160,7 @@ static int vmap_pmd_range(pud_t *pud, un continue; } - if (vmap_pte_range(pmd, addr, next, phys_addr, prot, mask)) + if (vmap_pte_range(pmd, addr, next, phys_addr, prot, max_page_shift, mask)) return -ENOMEM; } while (pmd++, phys_addr += (next - addr), addr = next, addr != end); return 0; _