On Wed, 2022-03-30 at 15:56 -0700, Song Liu wrote: > With HAVE_ARCH_HUGE_VMALLOC_FLAG, users of __vmalloc_node_range() > could > use VM_TRY_HUGE_VMAP to (try to) allocate PMD_SIZE pages for > size >= PMD_SIZE cases. Similar to HAVE_ARCH_HUGE_VMALLOC, the use > can > disable huge page by specifying nohugeiomap in kernel command line. > > The first user of VM_TRY_HUGE_VMAP will be bpf_prog_pack. > > Signed-off-by: Song Liu <song@xxxxxxxxxx> > --- > arch/Kconfig | 9 +++++++++ > include/linux/vmalloc.h | 9 +++++++-- > mm/vmalloc.c | 28 +++++++++++++++++++--------- > 3 files changed, 35 insertions(+), 11 deletions(-) > > diff --git a/arch/Kconfig b/arch/Kconfig > index 33e06966f248..23b6e92aebaa 100644 > --- a/arch/Kconfig > +++ b/arch/Kconfig > @@ -864,6 +864,15 @@ config HAVE_ARCH_HUGE_VMALLOC > depends on HAVE_ARCH_HUGE_VMAP > bool > > +# > +# HAVE_ARCH_HUGE_VMALLOC_FLAG allows users of __vmalloc_node_range > to allocate > +# huge page without HAVE_ARCH_HUGE_VMALLOC. To allocate huge pages, > the user > +# need to call __vmalloc_node_range with VM_TRY_HUGE_VMAP. > +# > +config HAVE_ARCH_HUGE_VMALLOC_FLAG > + depends on HAVE_ARCH_HUGE_VMAP > + bool > + > config ARCH_WANT_HUGE_PMD_SHARE > bool > > diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h > index 3b1df7da402d..a48d0690b66f 100644 > --- a/include/linux/vmalloc.h > +++ b/include/linux/vmalloc.h > @@ -35,6 +35,11 @@ struct notifier_block; /* in > notifier.h */ > #define VM_DEFER_KMEMLEAK 0 > #endif > > +#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC_FLAG > +#define VM_TRY_HUGE_VMAP 0x00001000 /* Allow for huge > pages on HAVE_ARCH_HUGE_VMALLOC_FLAG arch's */ > +#else > +#define VM_TRY_HUGE_VMAP 0 > +#endif > /* bits [20..32] reserved for arch specific ioremap internals */ > > /* > @@ -51,7 +56,7 @@ struct vm_struct { > unsigned long size; > unsigned long flags; > struct page **pages; > -#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC > +#if (defined(CONFIG_HAVE_ARCH_HUGE_VMALLOC) || > defined(CONFIG_HAVE_ARCH_HUGE_VMALLOC_FLAG)) > unsigned int page_order; > #endif > unsigned int nr_pages; > @@ -225,7 +230,7 @@ static inline bool is_vm_area_hugepages(const > void *addr) > * prevents that. This only indicates the size of the physical > page > * allocated in the vmalloc layer. > */ > -#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC > +#if (defined(CONFIG_HAVE_ARCH_HUGE_VMALLOC) || > defined(CONFIG_HAVE_ARCH_HUGE_VMALLOC_FLAG)) Since HAVE_ARCH_HUGE_VMALLOC_FLAG depends on HAVE_ARCH_HUGE_VMAP, I don't think you need both here. > return find_vm_area(addr)->page_order > 0; > #else > return false; > diff --git a/mm/vmalloc.c b/mm/vmalloc.c > index e163372d3967..179200bce285 100644 > --- a/mm/vmalloc.c > +++ b/mm/vmalloc.c > @@ -46,7 +46,7 @@ > #include "internal.h" > #include "pgalloc-track.h" > > -#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP > +#if (defined(CONFIG_HAVE_ARCH_HUGE_VMALLOC) || > defined(CONFIG_HAVE_ARCH_HUGE_VMALLOC_FLAG)) Same as above. > static unsigned int __ro_after_init ioremap_max_page_shift = > BITS_PER_LONG - 1; > > static int __init set_nohugeiomap(char *str) > @@ -55,11 +55,11 @@ static int __init set_nohugeiomap(char *str) > return 0; > } > early_param("nohugeiomap", set_nohugeiomap); > -#else /* CONFIG_HAVE_ARCH_HUGE_VMAP */ > +#else /* CONFIG_HAVE_ARCH_HUGE_VMAP || > CONFIG_HAVE_ARCH_HUGE_VMALLOC_FLAG */ > static const unsigned int ioremap_max_page_shift = PAGE_SHIFT; > -#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ > +#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP || > CONFIG_HAVE_ARCH_HUGE_VMALLOC_FLAG*/ Same here, and for the rest below. I think having HAVE_ARCH_HUGE_VMALLOC_FLAG depend on HAVE_ARCH_HUGE_VMAP like you did is nice because you don't need to special logic for most of the huge page parts. It should shrink this patch. > > -#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC > +#if (defined(CONFIG_HAVE_ARCH_HUGE_VMALLOC) || > defined(CONFIG_HAVE_ARCH_HUGE_VMALLOC_FLAG)) > static bool __ro_after_init vmap_allow_huge = true; > > static int __init set_nohugevmalloc(char *str) > @@ -582,8 +582,9 @@ int vmap_pages_range_noflush(unsigned long addr, > unsigned long end, > > WARN_ON(page_shift < PAGE_SHIFT); > > - if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMALLOC) || > - page_shift == PAGE_SHIFT) > + if ((!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMALLOC) && > + !IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMALLOC_FLAG)) || > + (page_shift == PAGE_SHIFT)) > return vmap_small_pages_range_noflush(addr, end, prot, > pages); > > for (i = 0; i < nr; i += 1U << (page_shift - PAGE_SHIFT)) { > @@ -2252,7 +2253,7 @@ static struct vm_struct *vmlist __initdata; > > static inline unsigned int vm_area_page_order(struct vm_struct *vm) > { > -#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC > +#if (defined(CONFIG_HAVE_ARCH_HUGE_VMALLOC) || > defined(CONFIG_HAVE_ARCH_HUGE_VMALLOC_FLAG)) > return vm->page_order; > #else > return 0; > @@ -2261,7 +2262,7 @@ static inline unsigned int > vm_area_page_order(struct vm_struct *vm) > > static inline void set_vm_area_page_order(struct vm_struct *vm, > unsigned int order) > { > -#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC > +#if (defined(CONFIG_HAVE_ARCH_HUGE_VMALLOC) || > defined(CONFIG_HAVE_ARCH_HUGE_VMALLOC_FLAG)) > vm->page_order = order; > #else > BUG_ON(order != 0); > @@ -3056,6 +3057,15 @@ static void *__vmalloc_area_node(struct > vm_struct *area, gfp_t gfp_mask, > return NULL; > } > > +static bool vmalloc_try_huge_page(unsigned long vm_flags) > +{ > + if (!vmap_allow_huge || (vm_flags & VM_NO_HUGE_VMAP)) > + return false; > + > + /* VM_TRY_HUGE_VMAP only works for > CONFIG_HAVE_ARCH_HUGE_VMALLOC_FLAG */ > + return vm_flags & VM_TRY_HUGE_VMAP; > +} > + It won't return true in the case of just CONFIG_HAVE_ARCH_HUGE_VMALLOC and vmap_allow_huge. If you have CONFIG_HAVE_ARCH_HUGE_VMALLOC, but not CONFIG_HAVE_ARCH_HUGE_VMALLOC_FLAG like powerpc, it should still try huge pages like before. > /** > * __vmalloc_node_range - allocate virtually contiguous memory > * @size: allocation size > @@ -3106,7 +3116,7 @@ void *__vmalloc_node_range(unsigned long size, > unsigned long align, > return NULL; > } > > - if (vmap_allow_huge && !(vm_flags & VM_NO_HUGE_VMAP)) { > + if (vmalloc_try_huge_page(vm_flags)) { > unsigned long size_per_node; > > /*