The patch titled Subject: mm/memory_hotplug: embed vmem_altmap details in memory block has been added to the -mm mm-unstable branch. Its filename is mm-memory_hotplug-embed-vmem_altmap-details-in-memory-block.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-memory_hotplug-embed-vmem_altmap-details-in-memory-block.patch This patch will later appear in the mm-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: "Aneesh Kumar K.V" <aneesh.kumar@xxxxxxxxxxxxx> Subject: mm/memory_hotplug: embed vmem_altmap details in memory block Date: Tue, 8 Aug 2023 14:45:01 +0530 With memmap on memory, some architecture needs more details w.r.t altmap such as base_pfn, end_pfn, etc to unmap vmemmap memory. Instead of computing them again when we remove a memory block, embed vmem_altmap details in struct memory_block if we are using memmap on memory block feature. Link: https://lkml.kernel.org/r/20230808091501.287660-7-aneesh.kumar@xxxxxxxxxxxxx Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxxxxx> Acked-by: Michal Hocko <mhocko@xxxxxxxx> Acked-by: David Hildenbrand <david@xxxxxxxxxx> Cc: Christophe Leroy <christophe.leroy@xxxxxxxxxx> Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx> Cc: Nicholas Piggin <npiggin@xxxxxxxxx> Cc: Oscar Salvador <osalvador@xxxxxxx> Cc: Vishal Verma <vishal.l.verma@xxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- drivers/base/memory.c | 27 ++++++++++++------- include/linux/memory.h | 8 +---- mm/memory_hotplug.c | 54 +++++++++++++++++++++++---------------- 3 files changed, 52 insertions(+), 37 deletions(-) --- a/drivers/base/memory.c~mm-memory_hotplug-embed-vmem_altmap-details-in-memory-block +++ a/drivers/base/memory.c @@ -105,7 +105,8 @@ EXPORT_SYMBOL(unregister_memory_notifier static void memory_block_release(struct device *dev) { struct memory_block *mem = to_memory_block(dev); - + /* Verify that the altmap is freed */ + WARN_ON(mem->altmap); kfree(mem); } @@ -183,7 +184,7 @@ static int memory_block_online(struct me { unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; - unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages; + unsigned long nr_vmemmap_pages = 0; struct zone *zone; int ret; @@ -200,6 +201,9 @@ static int memory_block_online(struct me * stage helps to keep accounting easier to follow - e.g vmemmaps * belong to the same zone as the memory they backed. */ + if (mem->altmap) + nr_vmemmap_pages = mem->altmap->free; + if (nr_vmemmap_pages) { ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, zone); if (ret) @@ -230,7 +234,7 @@ static int memory_block_offline(struct m { unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; - unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages; + unsigned long nr_vmemmap_pages = 0; int ret; if (!mem->zone) @@ -240,6 +244,9 @@ static int memory_block_offline(struct m * Unaccount before offlining, such that unpopulated zone and kthreads * can properly be torn down in offline_pages(). */ + if (mem->altmap) + nr_vmemmap_pages = mem->altmap->free; + if (nr_vmemmap_pages) adjust_present_page_count(pfn_to_page(start_pfn), mem->group, -nr_vmemmap_pages); @@ -726,7 +733,7 @@ void memory_block_add_nid(struct memory_ #endif static int add_memory_block(unsigned long block_id, unsigned long state, - unsigned long nr_vmemmap_pages, + struct vmem_altmap *altmap, struct memory_group *group) { struct memory_block *mem; @@ -744,7 +751,7 @@ static int add_memory_block(unsigned lon mem->start_section_nr = block_id * sections_per_block; mem->state = state; mem->nid = NUMA_NO_NODE; - mem->nr_vmemmap_pages = nr_vmemmap_pages; + mem->altmap = altmap; INIT_LIST_HEAD(&mem->group_next); #ifndef CONFIG_NUMA @@ -783,14 +790,14 @@ static int __init add_boot_memory_block( if (section_count == 0) return 0; return add_memory_block(memory_block_id(base_section_nr), - MEM_ONLINE, 0, NULL); + MEM_ONLINE, NULL, NULL); } static int add_hotplug_memory_block(unsigned long block_id, - unsigned long nr_vmemmap_pages, + struct vmem_altmap *altmap, struct memory_group *group) { - return add_memory_block(block_id, MEM_OFFLINE, nr_vmemmap_pages, group); + return add_memory_block(block_id, MEM_OFFLINE, altmap, group); } static void remove_memory_block(struct memory_block *memory) @@ -818,7 +825,7 @@ static void remove_memory_block(struct m * Called under device_hotplug_lock. */ int create_memory_block_devices(unsigned long start, unsigned long size, - unsigned long vmemmap_pages, + struct vmem_altmap *altmap, struct memory_group *group) { const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start)); @@ -832,7 +839,7 @@ int create_memory_block_devices(unsigned return -EINVAL; for (block_id = start_block_id; block_id != end_block_id; block_id++) { - ret = add_hotplug_memory_block(block_id, vmemmap_pages, group); + ret = add_hotplug_memory_block(block_id, altmap, group); if (ret) break; } --- a/include/linux/memory.h~mm-memory_hotplug-embed-vmem_altmap-details-in-memory-block +++ a/include/linux/memory.h @@ -77,11 +77,7 @@ struct memory_block { */ struct zone *zone; struct device dev; - /* - * Number of vmemmap pages. These pages - * lay at the beginning of the memory block. - */ - unsigned long nr_vmemmap_pages; + struct vmem_altmap *altmap; struct memory_group *group; /* group (if any) for this block */ struct list_head group_next; /* next block inside memory group */ #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG) @@ -147,7 +143,7 @@ static inline int hotplug_memory_notifie extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); int create_memory_block_devices(unsigned long start, unsigned long size, - unsigned long vmemmap_pages, + struct vmem_altmap *altmap, struct memory_group *group); void remove_memory_block_devices(unsigned long start, unsigned long size); extern void memory_dev_init(void); --- a/mm/memory_hotplug.c~mm-memory_hotplug-embed-vmem_altmap-details-in-memory-block +++ a/mm/memory_hotplug.c @@ -1439,7 +1439,11 @@ int __ref add_memory_resource(int nid, s if (mhp_flags & MHP_MEMMAP_ON_MEMORY) { if (mhp_supports_memmap_on_memory(size)) { mhp_altmap.free = memory_block_memmap_on_memory_pages(); - params.altmap = &mhp_altmap; + params.altmap = kmalloc(sizeof(struct vmem_altmap), GFP_KERNEL); + if (!params.altmap) + goto error; + + memcpy(params.altmap, &mhp_altmap, sizeof(mhp_altmap)); } /* fallback to not using altmap */ } @@ -1447,13 +1451,13 @@ int __ref add_memory_resource(int nid, s /* call arch's memory hotadd */ ret = arch_add_memory(nid, start, size, ¶ms); if (ret < 0) - goto error; + goto error_free; /* create memory block devices after memory was added */ - ret = create_memory_block_devices(start, size, mhp_altmap.free, group); + ret = create_memory_block_devices(start, size, params.altmap, group); if (ret) { arch_remove_memory(start, size, NULL); - goto error; + goto error_free; } if (new_node) { @@ -1490,6 +1494,8 @@ int __ref add_memory_resource(int nid, s walk_memory_blocks(start, size, NULL, online_memory_block); return ret; +error_free: + kfree(params.altmap); error: if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) memblock_remove(start, size); @@ -2056,12 +2062,18 @@ static int check_memblock_offlined_cb(st return 0; } -static int get_nr_vmemmap_pages_cb(struct memory_block *mem, void *arg) +static int test_has_altmap_cb(struct memory_block *mem, void *arg) { + struct memory_block **mem_ptr = (struct memory_block **)arg; /* - * If not set, continue with the next block. + * return the memblock if we have altmap + * and break callback. */ - return mem->nr_vmemmap_pages; + if (mem->altmap) { + *mem_ptr = mem; + return 1; + } + return 0; } static int check_cpu_on_node(int nid) @@ -2136,10 +2148,9 @@ EXPORT_SYMBOL(try_offline_node); static int __ref try_remove_memory(u64 start, u64 size) { - struct vmem_altmap mhp_altmap = {}; - struct vmem_altmap *altmap = NULL; - unsigned long nr_vmemmap_pages; + struct memory_block *mem; int rc = 0, nid = NUMA_NO_NODE; + struct vmem_altmap *altmap = NULL; BUG_ON(check_hotplug_memory_range(start, size)); @@ -2161,25 +2172,20 @@ static int __ref try_remove_memory(u64 s * the same granularity it was added - a single memory block. */ if (mhp_memmap_on_memory()) { - nr_vmemmap_pages = walk_memory_blocks(start, size, NULL, - get_nr_vmemmap_pages_cb); - if (nr_vmemmap_pages) { + rc = walk_memory_blocks(start, size, &mem, test_has_altmap_cb); + if (rc) { if (size != memory_block_size_bytes()) { pr_warn("Refuse to remove %#llx - %#llx," "wrong granularity\n", start, start + size); return -EINVAL; } - + altmap = mem->altmap; /* - * Let remove_pmd_table->free_hugepage_table do the - * right thing if we used vmem_altmap when hot-adding - * the range. + * Mark altmap NULL so that we can add a debug + * check on memblock free. */ - mhp_altmap.base_pfn = PHYS_PFN(start); - mhp_altmap.free = nr_vmemmap_pages; - mhp_altmap.alloc = nr_vmemmap_pages; - altmap = &mhp_altmap; + mem->altmap = NULL; } } @@ -2196,6 +2202,12 @@ static int __ref try_remove_memory(u64 s arch_remove_memory(start, size, altmap); + /* Verify that all vmemmap pages have actually been freed. */ + if (altmap) { + WARN(altmap->alloc, "Altmap not fully unmapped"); + kfree(altmap); + } + if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) { memblock_phys_free(start, size); memblock_remove(start, size); _ Patches currently in -mm which might be from aneesh.kumar@xxxxxxxxxxxxx are mm-hugepage-pud-allow-arch-specific-helper-function-to-check-huge-page-pud-support.patch mm-hugepage-pud-allow-arch-specific-helper-function-to-check-huge-page-pud-support-fix.patch mm-change-pudp_huge_get_and_clear_full-take-vm_area_struct-as-arg.patch mm-vmemmap-improve-vmemmap_can_optimize-and-allow-architectures-to-override.patch mm-vmemmap-allow-architectures-to-override-how-vmemmap-optimization-works.patch mm-add-pud_same-similar-to-__have_arch_p4d_same.patch mm-huge-pud-use-transparent-huge-pud-helpers-only-with-config_transparent_hugepage.patch mm-vmemmap-optimization-split-hugetlb-and-devdax-vmemmap-optimization.patch powerpc-mm-trace-convert-trace-event-to-trace-event-class.patch powerpc-book3s64-mm-enable-transparent-pud-hugepage.patch powerpc-book3s64-vmemmap-switch-radix-to-use-a-different-vmemmap-handling-function.patch powerpc-book3s64-vmemmap-switch-radix-to-use-a-different-vmemmap-handling-function-fix.patch powerpc-book3s64-vmemmap-switch-radix-to-use-a-different-vmemmap-handling-function-fix-2.patch powerpc-book3s64-radix-add-support-for-vmemmap-optimization-for-radix.patch powerpc-book3s64-radix-add-support-for-vmemmap-optimization-for-radix-fix.patch powerpc-book3s64-radix-remove-mmu_vmemmap_psize.patch powerpc-book3s64-radix-remove-mmu_vmemmap_psize-fix.patch powerpc-book3s64-radix-add-debug-message-to-give-more-details-of-vmemmap-allocation.patch mm-memory_hotplug-simplify-arch_mhp_memmap_on_memory_enable-kconfig.patch mm-memory_hotplug-allow-memmap-on-memory-hotplug-request-to-fallback.patch mm-memory_hotplug-allow-architecture-to-override-memmap-on-memory-support-check.patch mm-memory_hotplug-support-memmap_on_memory-when-memmap-is-not-aligned-to-pageblocks.patch powerpc-book3s64-memhotplug-enable-memmap-on-memory-for-radix.patch mm-memory_hotplug-embed-vmem_altmap-details-in-memory-block.patch