Huge page backed vmalloc memory could benefit performance in many cases. However, some users of vmalloc may not be ready to handle huge pages for various reasons: hardware constraints, potential pages split, etc. VM_NO_HUGE_VMAP was introduced to allow vmalloc users to opt-out huge pages. However, it is not easy to track down all the users that require the opt-out, as the allocation are passed different stacks and may cause issues in different layers. To address this issue, replace VM_NO_HUGE_VMAP with an opt-in flag, VM_ALLOW_HUGE_VMAP, so that users that benefit from huge pages could ask specificially. Also, replace vmalloc_no_huge() with opt-in helpers vmalloc_huge(), and __vmalloc_huge(). Fixes: fac54e2bfb5b ("x86/Kconfig: Select HAVE_ARCH_HUGE_VMALLOC with HAVE_ARCH_HUGE_VMAP") Link: https://lore.kernel.org/netdev/14444103-d51b-0fb3-ee63-c3f182f0b546@xxxxxxxxxxxxx/" Signed-off-by: Song Liu <song@xxxxxxxxxx> --- arch/Kconfig | 6 ++---- arch/powerpc/kernel/module.c | 2 +- arch/s390/kvm/pv.c | 2 +- include/linux/vmalloc.h | 5 +++-- mm/vmalloc.c | 34 ++++++++++++++++++++++++++++------ 5 files changed, 35 insertions(+), 14 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 29b0167c088b..31c4fdc4a4ba 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -854,10 +854,8 @@ config HAVE_ARCH_HUGE_VMAP # # Archs that select this would be capable of PMD-sized vmaps (i.e., -# arch_vmap_pmd_supported() returns true), and they must make no assumptions -# that vmalloc memory is mapped with PAGE_SIZE ptes. The VM_NO_HUGE_VMAP flag -# can be used to prohibit arch-specific allocations from using hugepages to -# help with this (e.g., modules may require it). +# arch_vmap_pmd_supported() returns true). The VM_ALLOW_HUGE_VMAP flag +# must be used to enable allocations to use hugepages. # config HAVE_ARCH_HUGE_VMALLOC depends on HAVE_ARCH_HUGE_VMAP diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 40a583e9d3c7..97a76a8619fb 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -101,7 +101,7 @@ __module_alloc(unsigned long size, unsigned long start, unsigned long end, bool * too. */ return __vmalloc_node_range(size, 1, start, end, gfp, prot, - VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP, + VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, __builtin_return_address(0)); } diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 7f7c0d6af2ce..8afede243903 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -142,7 +142,7 @@ static int kvm_s390_pv_alloc_vm(struct kvm *kvm) * using large pages for the virtual memory area. * This is a hardware limitation. */ - kvm->arch.pv.stor_var = vmalloc_no_huge(vlen); + kvm->arch.pv.stor_var = vmalloc(vlen); if (!kvm->arch.pv.stor_var) goto out_err; return 0; diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 3b1df7da402d..20205c4e3b23 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -26,7 +26,7 @@ struct notifier_block; /* in notifier.h */ #define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ #define VM_FLUSH_RESET_PERMS 0x00000100 /* reset direct map and flush TLB on unmap, can't be freed in atomic context */ #define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */ -#define VM_NO_HUGE_VMAP 0x00000400 /* force PAGE_SIZE pte mapping */ +#define VM_ALLOW_HUGE_VMAP 0x00000400 /* Allow for huge pages on archs with HAVE_ARCH_HUGE_VMALLOC */ #if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ !defined(CONFIG_KASAN_VMALLOC) @@ -153,7 +153,8 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align, const void *caller) __alloc_size(1); void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, int node, const void *caller) __alloc_size(1); -void *vmalloc_no_huge(unsigned long size) __alloc_size(1); +void *vmalloc_huge(unsigned long size) __alloc_size(1); +void *__vmalloc_huge(unsigned long size, gfp_t gfp_mask) __alloc_size(1); extern void *__vmalloc_array(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); extern void *vmalloc_array(size_t n, size_t size) __alloc_size(1, 2); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index e163372d3967..1dac30c0ea41 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3106,7 +3106,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, return NULL; } - if (vmap_allow_huge && !(vm_flags & VM_NO_HUGE_VMAP)) { + if (vmap_allow_huge && (vm_flags & VM_ALLOW_HUGE_VMAP)) { unsigned long size_per_node; /* @@ -3273,21 +3273,43 @@ void *vmalloc(unsigned long size) EXPORT_SYMBOL(vmalloc); /** - * vmalloc_no_huge - allocate virtually contiguous memory using small pages + * vmalloc_huge - allocate virtually contiguous memory, allow huge pages * @size: allocation size * - * Allocate enough non-huge pages to cover @size from the page level + * Allocate enough pages to cover @size from the page level + * allocator and map them into contiguous kernel virtual space. + * If @size is greater than or equal to PMD_SIZE, allow using + * huge pages for the memory + * + * Return: pointer to the allocated memory or %NULL on error + */ +void *vmalloc_huge(unsigned long size) +{ + return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, + GFP_KERNEL, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP, + NUMA_NO_NODE, __builtin_return_address(0)); +} +EXPORT_SYMBOL_GPL(vmalloc_huge); + +/** + * __vmalloc_huge - allocate virtually contiguous memory, allow huge pages + * @size: allocation size + * @gfp_mask: flags for the page level allocator + * + * Allocate enough pages to cover @size from the page level * allocator and map them into contiguous kernel virtual space. + * If @size is greater than or equal to PMD_SIZE, allow using + * huge pages for the memory * * Return: pointer to the allocated memory or %NULL on error */ -void *vmalloc_no_huge(unsigned long size) +void *__vmalloc_huge(unsigned long size, gfp_t gfp_mask) { return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, - GFP_KERNEL, PAGE_KERNEL, VM_NO_HUGE_VMAP, + gfp_mask, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP, NUMA_NO_NODE, __builtin_return_address(0)); } -EXPORT_SYMBOL(vmalloc_no_huge); +EXPORT_SYMBOL_GPL(__vmalloc_huge); /** * vzalloc - allocate virtually contiguous memory with zero fill -- 2.30.2