'struct mem_section_usage' combines the existing 'pageblock_flags' bitmap with a new 'map_active' bitmap. The new bitmap enables the memory hot{plug,remove} implementation to act on incremental sub-divisions of a section. The primary impetus for this functionality is to support platforms that mix "System RAM" and "Persistent Memory" within a single section. We want to be able to hotplug "Persistent Memory" to extend a partially populated section and share that section between ZONE_DEVICE and ZONE_NORMAL/MOVABLE memory. This introduces a pointer to the new 'map_active' bitmap through struct mem_section, but otherwise should not change any behavior. Cc: Michal Hocko <mhocko@xxxxxxxx> Cc: Vlastimil Babka <vbabka@xxxxxxx> Cc: Johannes Weiner <hannes@xxxxxxxxxxx> Cc: Logan Gunthorpe <logang@xxxxxxxxxxxx> Cc: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Cc: Stephen Bates <stephen.bates@xxxxxxxxxxxxx> Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx> --- include/linux/mmzone.h | 21 +++++++++- mm/memory_hotplug.c | 4 +- mm/page_alloc.c | 2 - mm/sparse.c | 98 ++++++++++++++++++++++++++---------------------- 4 files changed, 75 insertions(+), 50 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index ffa9503d5be5..82a1af3afa04 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1070,6 +1070,19 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn) #define SECTION_ALIGN_UP(pfn) (((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK) #define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK) +#define SECTION_ACTIVE_SIZE ((1UL << SECTION_SIZE_BITS) / BITS_PER_LONG) +#define SECTION_ACTIVE_MASK (~(SECTION_ACTIVE_SIZE - 1)) + +struct mem_section_usage { + /* + * SECTION_ACTIVE_SIZE portions of the section that are populated in + * the memmap + */ + unsigned long map_active; + /* See declaration of similar field in struct zone */ + unsigned long pageblock_flags[0]; +}; + struct page; struct page_ext; struct mem_section { @@ -1087,8 +1100,7 @@ struct mem_section { */ unsigned long section_mem_map; - /* See declaration of similar field in struct zone */ - unsigned long *pageblock_flags; + struct mem_section_usage *usage; #ifdef CONFIG_PAGE_EXTENSION /* * If SPARSEMEM, pgdat doesn't have page_ext pointer. We use @@ -1119,6 +1131,11 @@ extern struct mem_section *mem_section[NR_SECTION_ROOTS]; extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]; #endif +static inline unsigned long *section_to_usemap(struct mem_section *ms) +{ + return ms->usage->pageblock_flags; +} + static inline struct mem_section *__nr_to_section(unsigned long nr) { if (!mem_section[SECTION_NR_TO_ROOT(nr)]) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index e1751ca002fa..07accab8441d 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -235,7 +235,7 @@ static void register_page_bootmem_info_section(unsigned long start_pfn) for (i = 0; i < mapsize; i++, page++) get_page_bootmem(section_nr, page, SECTION_INFO); - usemap = __nr_to_section(section_nr)->pageblock_flags; + usemap = section_to_usemap(__nr_to_section(section_nr)); page = virt_to_page(usemap); mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT; @@ -261,7 +261,7 @@ static void register_page_bootmem_info_section(unsigned long start_pfn) register_page_bootmem_memmap(section_nr, memmap, PAGES_PER_SECTION); - usemap = __nr_to_section(section_nr)->pageblock_flags; + usemap = section_to_usemap(__nr_to_section(section_nr)); page = virt_to_page(usemap); mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6cbde310abed..50858eef1cc4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -357,7 +357,7 @@ static inline unsigned long *get_pageblock_bitmap(struct page *page, unsigned long pfn) { #ifdef CONFIG_SPARSEMEM - return __pfn_to_section(pfn)->pageblock_flags; + return section_to_usemap(__pfn_to_section(pfn)); #else return page_zone(page)->pageblock_flags; #endif /* CONFIG_SPARSEMEM */ diff --git a/mm/sparse.c b/mm/sparse.c index db6bf3c97ea2..d0d4c005dc60 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -233,15 +233,15 @@ struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pn static int __meminit sparse_init_one_section(struct mem_section *ms, unsigned long pnum, struct page *mem_map, - unsigned long *pageblock_bitmap) + struct mem_section_usage *usage) { if (!present_section(ms)) return -EINVAL; ms->section_mem_map &= ~SECTION_MAP_MASK; ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) | - SECTION_HAS_MEM_MAP; - ms->pageblock_flags = pageblock_bitmap; + SECTION_HAS_MEM_MAP; + ms->usage = usage; return 1; } @@ -255,9 +255,13 @@ unsigned long usemap_size(void) } #ifdef CONFIG_MEMORY_HOTPLUG -static unsigned long *__kmalloc_section_usemap(void) +static struct mem_section_usage *__alloc_section_usage(void) { - return kmalloc(usemap_size(), GFP_KERNEL); + struct mem_section_usage *usage; + + usage = kzalloc(sizeof(*usage) + usemap_size(), GFP_KERNEL); + /* TODO: allocate the map_active bitmap */ + return usage; } #endif /* CONFIG_MEMORY_HOTPLUG */ @@ -293,7 +297,8 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, return p; } -static void __init check_usemap_section_nr(int nid, unsigned long *usemap) +static void __init check_usemap_section_nr(int nid, + struct mem_section_usage *usage) { unsigned long usemap_snr, pgdat_snr; static unsigned long old_usemap_snr = NR_MEM_SECTIONS; @@ -301,7 +306,7 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap) struct pglist_data *pgdat = NODE_DATA(nid); int usemap_nid; - usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT); + usemap_snr = pfn_to_section_nr(__pa(usage) >> PAGE_SHIFT); pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); if (usemap_snr == pgdat_snr) return; @@ -336,7 +341,8 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, return memblock_virt_alloc_node_nopanic(size, pgdat->node_id); } -static void __init check_usemap_section_nr(int nid, unsigned long *usemap) +static void __init check_usemap_section_nr(int nid, + struct mem_section_usage *usage) { } #endif /* CONFIG_MEMORY_HOTREMOVE */ @@ -344,26 +350,27 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap) static void __init sparse_early_usemaps_alloc_node(void *data, unsigned long pnum_begin, unsigned long pnum_end, - unsigned long usemap_count, int nodeid) + unsigned long usage_count, int nodeid) { - void *usemap; + void *usage; unsigned long pnum; - unsigned long **usemap_map = (unsigned long **)data; - int size = usemap_size(); + struct mem_section_usage **usage_map = data; + int size = sizeof(struct mem_section_usage) + usemap_size(); - usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid), - size * usemap_count); - if (!usemap) { + usage = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid), + size * usage_count); + if (!usage) { pr_warn("%s: allocation failed\n", __func__); return; } + memset(usage, 0, size * usage_count); for (pnum = pnum_begin; pnum < pnum_end; pnum++) { if (!present_section_nr(pnum)) continue; - usemap_map[pnum] = usemap; - usemap += size; - check_usemap_section_nr(nodeid, usemap_map[pnum]); + usage_map[pnum] = usage; + usage += size; + check_usemap_section_nr(nodeid, usage_map[pnum]); } } @@ -468,7 +475,7 @@ void __weak __meminit vmemmap_populate_print_last(void) /** * alloc_usemap_and_memmap - memory alloction for pageblock flags and vmemmap - * @map: usemap_map for pageblock flags or mmap_map for vmemmap + * @map: usage_map for mem_section_usage or mmap_map for vmemmap */ static void __init alloc_usemap_and_memmap(void (*alloc_func) (void *, unsigned long, unsigned long, @@ -521,10 +528,9 @@ static void __init alloc_usemap_and_memmap(void (*alloc_func) */ void __init sparse_init(void) { + struct mem_section_usage *usage, **usage_map; unsigned long pnum; struct page *map; - unsigned long *usemap; - unsigned long **usemap_map; int size; #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER int size2; @@ -539,21 +545,21 @@ void __init sparse_init(void) /* * map is using big page (aka 2M in x86 64 bit) - * usemap is less one page (aka 24 bytes) + * usage is less one page (aka 24 bytes) * so alloc 2M (with 2M align) and 24 bytes in turn will * make next 2M slip to one more 2M later. * then in big system, the memory will have a lot of holes... * here try to allocate 2M pages continuously. * * powerpc need to call sparse_init_one_section right after each - * sparse_early_mem_map_alloc, so allocate usemap_map at first. + * sparse_early_mem_map_alloc, so allocate usage_map at first. */ - size = sizeof(unsigned long *) * NR_MEM_SECTIONS; - usemap_map = memblock_virt_alloc(size, 0); - if (!usemap_map) - panic("can not allocate usemap_map\n"); + size = sizeof(struct mem_section_usage *) * NR_MEM_SECTIONS; + usage_map = memblock_virt_alloc(size, 0); + if (!usage_map) + panic("can not allocate usage_map\n"); alloc_usemap_and_memmap(sparse_early_usemaps_alloc_node, - (void *)usemap_map); + (void *)usage_map); #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER size2 = sizeof(struct page *) * NR_MEM_SECTIONS; @@ -568,8 +574,8 @@ void __init sparse_init(void) if (!present_section_nr(pnum)) continue; - usemap = usemap_map[pnum]; - if (!usemap) + usage = usage_map[pnum]; + if (!usage) continue; #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER @@ -581,7 +587,7 @@ void __init sparse_init(void) continue; sparse_init_one_section(__nr_to_section(pnum), pnum, map, - usemap); + usage); } vmemmap_populate_print_last(); @@ -589,7 +595,7 @@ void __init sparse_init(void) #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER memblock_free_early(__pa(map_map), size2); #endif - memblock_free_early(__pa(usemap_map), size); + memblock_free_early(__pa(usage_map), size); } #ifdef CONFIG_MEMORY_HOTPLUG @@ -693,9 +699,9 @@ int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn) { unsigned long section_nr = pfn_to_section_nr(start_pfn); struct pglist_data *pgdat = zone->zone_pgdat; + static struct mem_section_usage *usage; struct mem_section *ms; struct page *memmap; - unsigned long *usemap; unsigned long flags; int ret; @@ -709,8 +715,8 @@ int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn) memmap = kmalloc_section_memmap(section_nr, pgdat->node_id); if (!memmap) return -ENOMEM; - usemap = __kmalloc_section_usemap(); - if (!usemap) { + usage = __alloc_section_usage(); + if (!usage) { __kfree_section_memmap(memmap); return -ENOMEM; } @@ -727,12 +733,12 @@ int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn) ms->section_mem_map |= SECTION_MARKED_PRESENT; - ret = sparse_init_one_section(ms, section_nr, memmap, usemap); + ret = sparse_init_one_section(ms, section_nr, memmap, usage); out: pgdat_resize_unlock(pgdat, &flags); if (ret <= 0) { - kfree(usemap); + kfree(usage); __kfree_section_memmap(memmap); } return ret; @@ -760,19 +766,20 @@ static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) } #endif -static void free_section_usemap(struct page *memmap, unsigned long *usemap) +static void free_section_usage(struct page *memmap, + struct mem_section_usage *usage) { struct page *usemap_page; - if (!usemap) + if (!usage) return; - usemap_page = virt_to_page(usemap); + usemap_page = virt_to_page(usage->pageblock_flags); /* * Check to see if allocation came from hot-plug-add */ if (PageSlab(usemap_page) || PageCompound(usemap_page)) { - kfree(usemap); + kfree(usage); if (memmap) __kfree_section_memmap(memmap); return; @@ -790,23 +797,24 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap) void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, unsigned long map_offset) { + unsigned long flags; struct page *memmap = NULL; - unsigned long *usemap = NULL, flags; + struct mem_section_usage *usage = NULL; struct pglist_data *pgdat = zone->zone_pgdat; pgdat_resize_lock(pgdat, &flags); if (ms->section_mem_map) { - usemap = ms->pageblock_flags; + usage = ms->usage; memmap = sparse_decode_mem_map(ms->section_mem_map, __section_nr(ms)); ms->section_mem_map = 0; - ms->pageblock_flags = NULL; + ms->usage = NULL; } pgdat_resize_unlock(pgdat, &flags); clear_hwpoisoned_pages(memmap + map_offset, PAGES_PER_SECTION - map_offset); - free_section_usemap(memmap, usemap); + free_section_usage(memmap, usage); } #endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_MEMORY_HOTPLUG */ -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>