Introduce memblock early memory allocation APIs which allow to support LPAE extension on 32 bits archs. More over, this is the next step to get rid of NO_BOOTMEM memblock wrapper(nobootmem.c) and directly use memblock APIs. The proposed interface will became active if both CONFIG_HAVE_MEMBLOCK and CONFIG_NO_BOOTMEM are specified by arch. In case !CONFIG_NO_BOOTMEM, the memblock() wrappers will fallback to the existing bootmem apis so that arch's noy converted to NO_BOOTMEM continue to work as is. The meaning of MEMBLOCK_ALLOC_ACCESSIBLE and MEMBLOCK_ALLOC_ANYWHERE is kept same. TODO: Now the free_all_bootmem() function is used as is from NO_BOOTMEM allocator. Can be moved to memblock file once we remove the no-bootmem.c Cc: Yinghai Lu <yinghai@xxxxxxxxxx> Cc: Tejun Heo <tj@xxxxxxxxxx> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Signed-off-by: Grygorii Strashko <grygorii.strashko@xxxxxx> Signed-off-by: Santosh Shilimkar <santosh.shilimkar@xxxxxx> --- include/linux/bootmem.h | 72 ++++++++++++++++++++++++++++++ mm/memblock.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 186 insertions(+) diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 55d52fb..33b27bb 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -141,6 +141,78 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, #define alloc_bootmem_low_pages_node(pgdat, x) \ __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0) + +#if defined(CONFIG_HAVE_MEMBLOCK) && defined(CONFIG_NO_BOOTMEM) + +/* FIXME: use MEMBLOCK_ALLOC_* variants here */ +#define BOOTMEM_ALLOC_ACCESSIBLE 0 +#define BOOTMEM_ALLOC_ANYWHERE (~(phys_addr_t)0) + +/* FIXME: Move to memblock.h at a point where we remove nobootmem.c */ +void *memblock_early_alloc_try_nid_nopanic(int nid, phys_addr_t size, + phys_addr_t align, phys_addr_t from, phys_addr_t max_addr); +void *memblock_early_alloc_try_nid(int nid, phys_addr_t size, + phys_addr_t align, phys_addr_t from, phys_addr_t max_addr); +void __memblock_free_early(phys_addr_t base, phys_addr_t size); +void __memblock_free_late(phys_addr_t base, phys_addr_t size); + +#define memblock_early_alloc(x) \ + memblock_early_alloc_try_nid(MAX_NUMNODES, x, SMP_CACHE_BYTES, \ + BOOTMEM_LOW_LIMIT, BOOTMEM_ALLOC_ACCESSIBLE) +#define memblock_early_alloc_align(x, align) \ + memblock_early_alloc_try_nid(MAX_NUMNODES, x, align, \ + BOOTMEM_LOW_LIMIT, BOOTMEM_ALLOC_ACCESSIBLE) +#define memblock_early_alloc_nopanic(x) \ + memblock_early_alloc_try_nid_nopanic(MAX_NUMNODES, x, SMP_CACHE_BYTES, \ + BOOTMEM_LOW_LIMIT, BOOTMEM_ALLOC_ACCESSIBLE) +#define memblock_early_alloc_pages(x) \ + memblock_early_alloc_try_nid(MAX_NUMNODES, x, PAGE_SIZE, \ + BOOTMEM_LOW_LIMIT, BOOTMEM_ALLOC_ACCESSIBLE) +#define memblock_early_alloc_pages_nopanic(x) \ + memblock_early_alloc_try_nid_nopanic(MAX_NUMNODES, x, PAGE_SIZE, \ + BOOTMEM_LOW_LIMIT, BOOTMEM_ALLOC_ACCESSIBLE) +#define memblock_early_alloc_node(nid, x) \ + memblock_early_alloc_try_nid(nid, x, SMP_CACHE_BYTES, \ + BOOTMEM_LOW_LIMIT, BOOTMEM_ALLOC_ACCESSIBLE) +#define memblock_early_alloc_node_nopanic(nid, x) \ + memblock_early_alloc_try_nid_nopanic(nid, x, SMP_CACHE_BYTES, \ + BOOTMEM_LOW_LIMIT, BOOTMEM_ALLOC_ACCESSIBLE) + +#define memblock_free_early(x, s) __memblock_free_early(x, s) +#define memblock_free_early_nid(nid, x, s) __memblock_free_early(x, s) +#define memblock_free_late(x, s) __memblock_free_late(x, s) + +#else + +/* Fall back to all the existing bootmem APIs */ +#define memblock_early_alloc(x) \ + __alloc_bootmem(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT) +#define memblock_early_alloc_align(x, align) \ + __alloc_bootmem(x, align, BOOTMEM_LOW_LIMIT) +#define memblock_early_alloc_nopanic(x) \ + __alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT) +#define memblock_early_alloc_pages(x) \ + __alloc_bootmem(MAX_NUMNODES, x, PAGE_SIZE) +#define memblock_early_alloc_pages_nopanic(x) \ + __alloc_bootmem_nopanic(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT) +#define memblock_early_alloc_node(nid, x) \ + __alloc_bootmem_node(NODE_DATA(nid), x, SMP_CACHE_BYTES, \ + BOOTMEM_LOW_LIMIT) +#define memblock_early_alloc_node_nopanic(nid, x) \ + __alloc_bootmem_node_nopanic(NODE_DATA(nid), x, SMP_CACHE_BYTES, \ + BOOTMEM_LOW_LIMIT) +#define memblock_early_alloc_try_nid(nid, size, align, from, max_addr) \ + __alloc_bootmem_node_high(NODE_DATA(nid), size, align, from) +#define memblock_early_alloc_try_nid_nopanic(nid, size, align, from, max_addr) \ + ___alloc_bootmem_node_nopanic(NODE_DATA(nid), size, align, \ + from, max_addr) +#define memblock_free_early(x, s) free_bootmem(x, s) +#define memblock_free_early_nid(nid, x, s) \ + free_bootmem_node(NODE_DATA(nid), x, s) +#define memblock_free_late(x, s) free_bootmem_late(x, s) + +#endif /* defined(CONFIG_HAVE_MEMBLOCK) && defined(CONFIG_NO_BOOTMEM) */ + #ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP extern void *alloc_remap(int nid, unsigned long size); #else diff --git a/mm/memblock.c b/mm/memblock.c index 0ac412a..c67f4bb 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -20,6 +20,8 @@ #include <linux/seq_file.h> #include <linux/memblock.h> +#include "internal.h" + static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; @@ -822,6 +824,118 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, i return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); } +static void * __init _memblock_early_alloc_try_nid_nopanic(int nid, + phys_addr_t size, phys_addr_t align, + phys_addr_t from, phys_addr_t max_addr) +{ + phys_addr_t alloc; + void *ptr; + + if (WARN_ON_ONCE(slab_is_available())) { + if (nid == MAX_NUMNODES) + return kzalloc(size, GFP_NOWAIT); + else + return kzalloc_node(size, GFP_NOWAIT, nid); + } + + if (WARN_ON(!align)) + align = __alignof__(long long); + + /* align @size to avoid excessive fragmentation on reserved array */ + size = round_up(size, align); + +again: + alloc = memblock_find_in_range_node(from, max_addr, size, align, nid); + if (alloc) + goto done; + + if (nid != MAX_NUMNODES) { + alloc = + memblock_find_in_range_node(from, max_addr, size, + align, MAX_NUMNODES); + if (alloc) + goto done; + } + + if (from) { + from = 0; + goto again; + } else { + goto error; + } + +done: + memblock_reserve(alloc, size); + ptr = phys_to_virt(alloc); + memset(ptr, 0, size); + + /* + * The min_count is set to 0 so that bootmem allocated blocks + * are never reported as leaks. + */ + kmemleak_alloc(ptr, size, 0, 0); + + return ptr; + +error: + return NULL; +} + +void * __init memblock_early_alloc_try_nid_nopanic(int nid, + phys_addr_t size, phys_addr_t align, + phys_addr_t from, phys_addr_t max_addr) +{ + memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=0x%llx max_addr=0x%llx %pF\n", + __func__, (u64)size, (u64)align, nid, (u64)from, + (u64)max_addr, (void *)_RET_IP_); + return _memblock_early_alloc_try_nid_nopanic(nid, size, + align, from, max_addr); +} + +void * __init memblock_early_alloc_try_nid(int nid, + phys_addr_t size, phys_addr_t align, + phys_addr_t from, phys_addr_t max_addr) +{ + void *ptr; + + memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=0x%llx max_addr=0x%llx %pF\n", + __func__, (u64)size, (u64)align, nid, (u64)from, + (u64)max_addr, (void *)_RET_IP_); + ptr = _memblock_early_alloc_try_nid_nopanic(nid, size, + align, from, max_addr); + if (ptr) + return ptr; + + panic("%s: Failed to allocate %llu bytes align=0x%llx nid=%d from=0x%llx max_addr=0x%llx\n", + __func__, (u64)size, (u64)align, nid, (u64)from, (u64)max_addr); + return NULL; +} + +void __init __memblock_free_early(phys_addr_t base, phys_addr_t size) +{ + memblock_dbg("%s: [%#016llx-%#016llx] %pF\n", + __func__, (u64)base, (u64)base + size, + (void *)_RET_IP_); + kmemleak_free_part(__va(base), size); + __memblock_remove(&memblock.reserved, base, size); +} + +void __init __memblock_free_late(phys_addr_t base, phys_addr_t size) +{ + u64 cursor, end; + + memblock_dbg("%s: [%#016llx-%#016llx] %pF\n", + __func__, (u64)base, (u64)base + size, + (void *)_RET_IP_); + kmemleak_free_part(__va(base), size); + cursor = PFN_UP(base); + end = PFN_DOWN(base + size); + + for (; cursor < end; cursor++) { + __free_pages_bootmem(pfn_to_page(cursor), 0); + totalram_pages++; + } +} /* * Remaining API functions -- 1.7.9.5 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>