The patch titled Subject: mm/sparse: abstract sparse buffer allocations has been added to the -mm tree. Its filename is mm-sparse-abstract-sparse-buffer-allocations.patch This patch should soon appear at http://ozlabs.org/~akpm/mmots/broken-out/mm-sparse-abstract-sparse-buffer-allocations.patch and later at http://ozlabs.org/~akpm/mmotm/broken-out/mm-sparse-abstract-sparse-buffer-allocations.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: Pavel Tatashin <pasha.tatashin@xxxxxxxxxx> Subject: mm/sparse: abstract sparse buffer allocations Patch series "sparse_init rewrite", v5. In sparse_init() we allocate two large buffers to temporary hold usemap and memmap for the whole machine. However, we can avoid doing that if we changed sparse_init() to operated on per-node bases instead of doing it on the whole machine beforehand. As shown by Baoquan http://lkml.kernel.org/r/20180628062857.29658-1-bhe@xxxxxxxxxx The buffers are large enough to cause machine stop to boot on small memory systems. Another benefit of these changes is that they also obsolete CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER. This patch (of 5): When struct pages are allocated for sparse-vmemmap VA layout, we first try to allocate one large buffer, and than if that fails allocate struct pages for each section as we go. The code that allocates buffer is uses global variables and is spread across several call sites. Cleanup the code by introducing three functions to handle the global buffer: sparse_buffer_init() initialize the buffer sparse_buffer_fini() free the remaining part of the buffer sparse_buffer_alloc() alloc from the buffer, and if buffer is empty return NULL Define these functions in sparse.c instead of sparse-vmemmap.c because later we will use them for non-vmemmap sparse allocations as well. Link: http://lkml.kernel.org/r/20180712203730.8703-2-pasha.tatashin@xxxxxxxxxx Signed-off-by: Pavel Tatashin <pasha.tatashin@xxxxxxxxxx> Cc: Steven Sistare <steven.sistare@xxxxxxxxxx> Cc: Daniel Jordan <daniel.m.jordan@xxxxxxxxxx> Cc: "Kirill A. Shutemov" <kirill.shutemov@xxxxxxxxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxxxx> Cc: Dan Williams <dan.j.williams@xxxxxxxxx> Cc: Jan Kara <jack@xxxxxxx> Cc: Jérôme Glisse <jglisse@xxxxxxxxxx> Cc: Souptick Joarder <jrdr.linux@xxxxxxxxx> Cc: Baoquan He <bhe@xxxxxxxxxx> Cc: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> Cc: Vlastimil Babka <vbabka@xxxxxxx> Cc: Wei Yang <richard.weiyang@xxxxxxxxx> Cc: Dave Hansen <dave.hansen@xxxxxxxxx> Cc: David Rientjes <rientjes@xxxxxxxxxx> Cc: Ingo Molnar <mingo@xxxxxxxxxx> Cc: Oscar Salvador <osalvador@xxxxxxxxxxxxxxxxxx> Cc: Abdul Haleem <abdhalee@xxxxxxxxxxxxxxxxxx> Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- diff -puN include/linux/mm.h~mm-sparse-abstract-sparse-buffer-allocations include/linux/mm.h --- a/include/linux/mm.h~mm-sparse-abstract-sparse-buffer-allocations +++ a/include/linux/mm.h @@ -2655,6 +2655,10 @@ void sparse_mem_maps_populate_node(struc unsigned long map_count, int nodeid); +unsigned long __init section_map_size(void); +void sparse_buffer_init(unsigned long size, int nid); +void sparse_buffer_fini(void); +void *sparse_buffer_alloc(unsigned long size); struct page *sparse_mem_map_populate(unsigned long pnum, int nid, struct vmem_altmap *altmap); pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); diff -puN mm/sparse.c~mm-sparse-abstract-sparse-buffer-allocations mm/sparse.c --- a/mm/sparse.c~mm-sparse-abstract-sparse-buffer-allocations +++ a/mm/sparse.c @@ -400,7 +400,14 @@ static void __init sparse_early_usemaps_ } } -#ifndef CONFIG_SPARSEMEM_VMEMMAP +#ifdef CONFIG_SPARSEMEM_VMEMMAP +unsigned long __init section_map_size(void) + +{ + return ALIGN(sizeof(struct page) * PAGES_PER_SECTION, PMD_SIZE); +} + +#else struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid, struct vmem_altmap *altmap) { @@ -457,6 +464,42 @@ void __init sparse_mem_maps_populate_nod } #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ +static void *sparsemap_buf __meminitdata; +static void *sparsemap_buf_end __meminitdata; + +void __init sparse_buffer_init(unsigned long size, int nid) +{ + BUG_ON(sparsemap_buf); + sparsemap_buf = + memblock_virt_alloc_try_nid_raw(size, PAGE_SIZE, + __pa(MAX_DMA_ADDRESS), + BOOTMEM_ALLOC_ACCESSIBLE, nid); + sparsemap_buf_end = sparsemap_buf + size; +} + +void __init sparse_buffer_fini(void) +{ + unsigned long size = sparsemap_buf_end - sparsemap_buf; + + if (sparsemap_buf && size > 0) + memblock_free_early(__pa(sparsemap_buf), size); + sparsemap_buf = NULL; +} + +void * __meminit sparse_buffer_alloc(unsigned long size) +{ + void *ptr = NULL; + + if (sparsemap_buf) { + ptr = (void *)ALIGN((unsigned long)sparsemap_buf, size); + if (ptr + size > sparsemap_buf_end) + ptr = NULL; + else + sparsemap_buf = ptr + size; + } + return ptr; +} + #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER static void __init sparse_early_mem_maps_alloc_node(void *data, unsigned long pnum_begin, diff -puN mm/sparse-vmemmap.c~mm-sparse-abstract-sparse-buffer-allocations mm/sparse-vmemmap.c --- a/mm/sparse-vmemmap.c~mm-sparse-abstract-sparse-buffer-allocations +++ a/mm/sparse-vmemmap.c @@ -43,12 +43,9 @@ static void * __ref __earlyonly_bootmem_ unsigned long goal) { return memblock_virt_alloc_try_nid_raw(size, align, goal, - BOOTMEM_ALLOC_ACCESSIBLE, node); + BOOTMEM_ALLOC_ACCESSIBLE, node); } -static void *vmemmap_buf; -static void *vmemmap_buf_end; - void * __meminit vmemmap_alloc_block(unsigned long size, int node) { /* If the main allocator is up use that, fallback to bootmem. */ @@ -76,18 +73,10 @@ void * __meminit vmemmap_alloc_block(uns /* need to make sure size is all the same during early stage */ void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node) { - void *ptr; - - if (!vmemmap_buf) - return vmemmap_alloc_block(size, node); - - /* take the from buf */ - ptr = (void *)ALIGN((unsigned long)vmemmap_buf, size); - if (ptr + size > vmemmap_buf_end) - return vmemmap_alloc_block(size, node); - - vmemmap_buf = ptr + size; + void *ptr = sparse_buffer_alloc(size); + if (!ptr) + ptr = vmemmap_alloc_block(size, node); return ptr; } @@ -279,19 +268,9 @@ void __init sparse_mem_maps_populate_nod unsigned long map_count, int nodeid) { unsigned long pnum; - unsigned long size = sizeof(struct page) * PAGES_PER_SECTION; - void *vmemmap_buf_start; int nr_consumed_maps = 0; - size = ALIGN(size, PMD_SIZE); - vmemmap_buf_start = __earlyonly_bootmem_alloc(nodeid, size * map_count, - PMD_SIZE, __pa(MAX_DMA_ADDRESS)); - - if (vmemmap_buf_start) { - vmemmap_buf = vmemmap_buf_start; - vmemmap_buf_end = vmemmap_buf_start + size * map_count; - } - + sparse_buffer_init(section_map_size() * map_count, nodeid); for (pnum = pnum_begin; pnum < pnum_end; pnum++) { if (!present_section_nr(pnum)) continue; @@ -303,12 +282,5 @@ void __init sparse_mem_maps_populate_nod pr_err("%s: sparsemem memory map backing failed some memory will not be available\n", __func__); } - - if (vmemmap_buf_start) { - /* need to free left buf */ - memblock_free_early(__pa(vmemmap_buf), - vmemmap_buf_end - vmemmap_buf); - vmemmap_buf = NULL; - vmemmap_buf_end = NULL; - } + sparse_buffer_fini(); } _ Patches currently in -mm which might be from pasha.tatashin@xxxxxxxxxx are mm-skip-invalid-pages-block-at-a-time-in-zero_resv_unresv.patch mm-sparse-abstract-sparse-buffer-allocations.patch mm-sparse-use-the-new-sparse-buffer-functions-in-non-vmemmap.patch mm-sparse-move-buffer-init-fini-to-the-common-place.patch mm-sparse-add-new-sparse_init_nid-and-sparse_init.patch mm-sparse-delete-old-sprase_init-and-enable-new-one.patch -- To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html