The area->pages stores the struct pages allocated for vmalloc mappings. The allocated memory can be hugepage if arch has HAVE_ARCH_HUGE_VMALLOC set, while area->pages itself does not have to be hugepage backed. Suppose that we want to vmalloc 1026M of memory, then area->pages is 2052K in size, which is large than PMD_SIZE when the pagesize is 4K. Currently, 4096K will be allocated for area->pages, wherein 2044K is wasted. This introduces __vmalloc_node_no_huge, and makes area->pages backed by small pages, because I think to allocate hugepage for area->pages is unnecessary and vulnerable to abuse. Signed-off-by: Xu Yu <xuyu@xxxxxxxxxxxxxxxxx> --- include/linux/vmalloc.h | 2 ++ mm/vmalloc.c | 15 ++++++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 6e022cc712e6..e93f39eb46a5 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -150,6 +150,8 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align, const void *caller) __alloc_size(1); void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, int node, const void *caller) __alloc_size(1); +void *__vmalloc_node_no_huge(unsigned long size, unsigned long align, + gfp_t gfp_mask, int node, const void *caller) __alloc_size(1); void *vmalloc_no_huge(unsigned long size) __alloc_size(1); extern void vfree(const void *addr); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index d2a00ad4e1dd..0bdbb96d3e3f 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2925,17 +2925,18 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, unsigned long size = get_vm_area_size(area); unsigned long array_size; unsigned int nr_small_pages = size >> PAGE_SHIFT; + unsigned int max_small_pages = ALIGN(size, 1UL << page_shift) >> PAGE_SHIFT; unsigned int page_order; - array_size = (unsigned long)nr_small_pages * sizeof(struct page *); + array_size = (unsigned long)max_small_pages * sizeof(struct page *); gfp_mask |= __GFP_NOWARN; if (!(gfp_mask & (GFP_DMA | GFP_DMA32))) gfp_mask |= __GFP_HIGHMEM; /* Please note that the recursion is strictly bounded. */ if (array_size > PAGE_SIZE) { - area->pages = __vmalloc_node(array_size, 1, nested_gfp, node, - area->caller); + area->pages = __vmalloc_node_no_huge(array_size, 1, nested_gfp, + node, area->caller); } else { area->pages = kmalloc_node(array_size, nested_gfp, node); } @@ -3114,6 +3115,14 @@ void *__vmalloc_node(unsigned long size, unsigned long align, return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, gfp_mask, PAGE_KERNEL, 0, node, caller); } + +void *__vmalloc_node_no_huge(unsigned long size, unsigned long align, + gfp_t gfp_mask, int node, const void *caller) +{ + return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, + gfp_mask, PAGE_KERNEL, VM_NO_HUGE_VMAP, node, caller); +} + /* * This is only for performance analysis of vmalloc and stress purpose. * It is required by vmalloc test module, therefore do not use it other -- 2.20.1.2432.ga663e714