> On Mar 30, 2022, at 4:40 PM, Edgecombe, Rick P <rick.p.edgecombe@xxxxxxxxx> wrote: > > On Wed, 2022-03-30 at 15:56 -0700, Song Liu wrote: >> With HAVE_ARCH_HUGE_VMALLOC_FLAG, users of __vmalloc_node_range() >> could >> use VM_TRY_HUGE_VMAP to (try to) allocate PMD_SIZE pages for >> size >= PMD_SIZE cases. Similar to HAVE_ARCH_HUGE_VMALLOC, the use >> can >> disable huge page by specifying nohugeiomap in kernel command line. >> >> The first user of VM_TRY_HUGE_VMAP will be bpf_prog_pack. >> >> Signed-off-by: Song Liu <song@xxxxxxxxxx> >> --- >> arch/Kconfig | 9 +++++++++ >> include/linux/vmalloc.h | 9 +++++++-- >> mm/vmalloc.c | 28 +++++++++++++++++++--------- >> 3 files changed, 35 insertions(+), 11 deletions(-) >> >> diff --git a/arch/Kconfig b/arch/Kconfig >> index 33e06966f248..23b6e92aebaa 100644 >> --- a/arch/Kconfig >> +++ b/arch/Kconfig >> @@ -864,6 +864,15 @@ config HAVE_ARCH_HUGE_VMALLOC >> depends on HAVE_ARCH_HUGE_VMAP >> bool >> >> +# >> +# HAVE_ARCH_HUGE_VMALLOC_FLAG allows users of __vmalloc_node_range >> to allocate >> +# huge page without HAVE_ARCH_HUGE_VMALLOC. To allocate huge pages, >> the user >> +# need to call __vmalloc_node_range with VM_TRY_HUGE_VMAP. >> +# >> +config HAVE_ARCH_HUGE_VMALLOC_FLAG >> + depends on HAVE_ARCH_HUGE_VMAP >> + bool >> + >> config ARCH_WANT_HUGE_PMD_SHARE >> bool >> >> diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h >> index 3b1df7da402d..a48d0690b66f 100644 >> --- a/include/linux/vmalloc.h >> +++ b/include/linux/vmalloc.h >> @@ -35,6 +35,11 @@ struct notifier_block; /* in >> notifier.h */ >> #define VM_DEFER_KMEMLEAK 0 >> #endif >> >> +#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC_FLAG >> +#define VM_TRY_HUGE_VMAP 0x00001000 /* Allow for huge >> pages on HAVE_ARCH_HUGE_VMALLOC_FLAG arch's */ >> +#else >> +#define VM_TRY_HUGE_VMAP 0 >> +#endif >> /* bits [20..32] reserved for arch specific ioremap internals */ >> >> /* >> @@ -51,7 +56,7 @@ struct vm_struct { >> unsigned long size; >> unsigned long flags; >> struct page **pages; >> -#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC >> +#if (defined(CONFIG_HAVE_ARCH_HUGE_VMALLOC) || >> defined(CONFIG_HAVE_ARCH_HUGE_VMALLOC_FLAG)) >> unsigned int page_order; >> #endif >> unsigned int nr_pages; >> @@ -225,7 +230,7 @@ static inline bool is_vm_area_hugepages(const >> void *addr) >> * prevents that. This only indicates the size of the physical >> page >> * allocated in the vmalloc layer. >> */ >> -#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC >> +#if (defined(CONFIG_HAVE_ARCH_HUGE_VMALLOC) || >> defined(CONFIG_HAVE_ARCH_HUGE_VMALLOC_FLAG)) > > Since HAVE_ARCH_HUGE_VMALLOC_FLAG depends on HAVE_ARCH_HUGE_VMAP, I > don't think you need both here. I think we still need this one (_VMALLOC || _VMALLOC_FLAG)? Note that this is not _VMAP || _VMALLOC_FLAG. > >> return find_vm_area(addr)->page_order > 0; >> #else >> return false; >> diff --git a/mm/vmalloc.c b/mm/vmalloc.c >> index e163372d3967..179200bce285 100644 >> --- a/mm/vmalloc.c >> +++ b/mm/vmalloc.c >> @@ -46,7 +46,7 @@ >> #include "internal.h" >> #include "pgalloc-track.h" >> >> -#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP >> +#if (defined(CONFIG_HAVE_ARCH_HUGE_VMALLOC) || >> defined(CONFIG_HAVE_ARCH_HUGE_VMALLOC_FLAG)) > > Same as above. And this one could be just CONFIG_HAVE_ARCH_HUGE_VMAP? > >> static unsigned int __ro_after_init ioremap_max_page_shift = >> BITS_PER_LONG - 1; >> >> static int __init set_nohugeiomap(char *str) >> @@ -55,11 +55,11 @@ static int __init set_nohugeiomap(char *str) >> return 0; >> } >> early_param("nohugeiomap", set_nohugeiomap); >> -#else /* CONFIG_HAVE_ARCH_HUGE_VMAP */ >> +#else /* CONFIG_HAVE_ARCH_HUGE_VMAP || >> CONFIG_HAVE_ARCH_HUGE_VMALLOC_FLAG */ >> static const unsigned int ioremap_max_page_shift = PAGE_SHIFT; >> -#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ >> +#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP || >> CONFIG_HAVE_ARCH_HUGE_VMALLOC_FLAG*/ > > Same here, and for the rest below. I think having > HAVE_ARCH_HUGE_VMALLOC_FLAG depend on HAVE_ARCH_HUGE_VMAP like you did > is nice because you don't need to special logic for most of the huge > page parts. It should shrink this patch. > >> >> -#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC >> +#if (defined(CONFIG_HAVE_ARCH_HUGE_VMALLOC) || >> defined(CONFIG_HAVE_ARCH_HUGE_VMALLOC_FLAG)) >> static bool __ro_after_init vmap_allow_huge = true; >> >> static int __init set_nohugevmalloc(char *str) >> @@ -582,8 +582,9 @@ int vmap_pages_range_noflush(unsigned long addr, >> unsigned long end, >> >> WARN_ON(page_shift < PAGE_SHIFT); >> >> - if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMALLOC) || >> - page_shift == PAGE_SHIFT) >> + if ((!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMALLOC) && >> + !IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMALLOC_FLAG)) || >> + (page_shift == PAGE_SHIFT)) >> return vmap_small_pages_range_noflush(addr, end, prot, >> pages); >> >> for (i = 0; i < nr; i += 1U << (page_shift - PAGE_SHIFT)) { >> @@ -2252,7 +2253,7 @@ static struct vm_struct *vmlist __initdata; >> >> static inline unsigned int vm_area_page_order(struct vm_struct *vm) >> { >> -#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC >> +#if (defined(CONFIG_HAVE_ARCH_HUGE_VMALLOC) || >> defined(CONFIG_HAVE_ARCH_HUGE_VMALLOC_FLAG)) >> return vm->page_order; >> #else >> return 0; >> @@ -2261,7 +2262,7 @@ static inline unsigned int >> vm_area_page_order(struct vm_struct *vm) >> >> static inline void set_vm_area_page_order(struct vm_struct *vm, >> unsigned int order) >> { >> -#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC >> +#if (defined(CONFIG_HAVE_ARCH_HUGE_VMALLOC) || >> defined(CONFIG_HAVE_ARCH_HUGE_VMALLOC_FLAG)) >> vm->page_order = order; >> #else >> BUG_ON(order != 0); >> @@ -3056,6 +3057,15 @@ static void *__vmalloc_area_node(struct >> vm_struct *area, gfp_t gfp_mask, >> return NULL; >> } >> >> +static bool vmalloc_try_huge_page(unsigned long vm_flags) >> +{ >> + if (!vmap_allow_huge || (vm_flags & VM_NO_HUGE_VMAP)) >> + return false; >> + >> + /* VM_TRY_HUGE_VMAP only works for >> CONFIG_HAVE_ARCH_HUGE_VMALLOC_FLAG */ >> + return vm_flags & VM_TRY_HUGE_VMAP; >> +} >> + > > It won't return true in the case of just CONFIG_HAVE_ARCH_HUGE_VMALLOC > and vmap_allow_huge. If you have CONFIG_HAVE_ARCH_HUGE_VMALLOC, but not > CONFIG_HAVE_ARCH_HUGE_VMALLOC_FLAG like powerpc, it should still try > huge pages like before. Yeah, I missed this one.. Will fix in the next version. > >> /** >> * __vmalloc_node_range - allocate virtually contiguous memory >> * @size: allocation size >> @@ -3106,7 +3116,7 @@ void *__vmalloc_node_range(unsigned long size, >> unsigned long align, >> return NULL; >> } >> >> - if (vmap_allow_huge && !(vm_flags & VM_NO_HUGE_VMAP)) { >> + if (vmalloc_try_huge_page(vm_flags)) { >> unsigned long size_per_node; >> >> /*