Only memory added via add_memory() and friends will need memory block devices - only memory to be used via the buddy and to be onlined/ offlined by user space in memory block granularity. Move creation of memory block devices out of arch_add_memory(). Create all devices after arch_add_memory() succeeded. We can later drop the want_memblock parameter, because it is now effectively stale. Only after memory block devices have been added, memory can be onlined by user space. This implies, that memory is not visible to user space at all before arch_add_memory() succeeded. Issue 1: __add_pages() does not remove pages in case something went wrong. If this is the case, we would now no longer create memory block devices for such "partially added memory". So the memory would not be usable/onlinable. Bad? Or related to issue 2 (e.g. fix __add_pages() to remove any parts that were added in case of an error). Functions that fail and don't clean up are not that nice. Issue 2: In case we can't add memory block devices, and we don't have HOTREMOVE, we can't remove the pages via arch_remove_pages. Maybe we should try to get rid of CONFIG_MEMORY_HOTREMOVE, so we can handle all failures in a nice way? Or at least allow arch_remove_pages() and friends, so a subset of CONFIG_MEMORY_HOTREMOVE. Signed-off-by: David Hildenbrand <david@xxxxxxxxxx> --- drivers/base/memory.c | 67 +++++++++++++++++++++++++----------------- include/linux/memory.h | 2 +- mm/memory_hotplug.c | 17 +++++++---- 3 files changed, 53 insertions(+), 33 deletions(-) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index d9ebb89816f7..847b33061e2e 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -701,44 +701,57 @@ static int add_memory_block(int base_section_nr) return 0; } -/* - * need an interface for the VM to add new memory regions, - * but without onlining it. - */ -int hotplug_memory_register(int nid, struct mem_section *section) +static void unregister_memory(struct memory_block *memory) { - int ret = 0; + BUG_ON(memory->dev.bus != &memory_subsys); + + /* drop the ref. we got via find_memory_block() */ + put_device(&memory->dev); + device_unregister(&memory->dev); +} + +int hotplug_memory_register(unsigned long start, unsigned long size) +{ + unsigned long block_nr_pages = memory_block_size_bytes() >> PAGE_SHIFT; + unsigned long start_pfn = PFN_DOWN(start); + unsigned long end_pfn = start_pfn + (size >> PAGE_SHIFT); + unsigned long pfn; struct memory_block *mem; + int ret = 0; - mutex_lock(&mem_sysfs_mutex); + BUG_ON(!IS_ALIGNED(start, memory_block_size_bytes())); + BUG_ON(!IS_ALIGNED(size, memory_block_size_bytes())); - mem = find_memory_block(section); - if (mem) { - mem->section_count++; - put_device(&mem->dev); - } else { - ret = init_memory_block(&mem, section, MEM_OFFLINE); + mutex_lock(&mem_sysfs_mutex); + for (pfn = start_pfn; pfn != end_pfn; pfn += block_nr_pages) { + mem = find_memory_block(__pfn_to_section(pfn)); + if (mem) { + WARN_ON_ONCE(false); + put_device(&mem->dev); + continue; + } + ret = init_memory_block(&mem, __pfn_to_section(pfn), + MEM_OFFLINE); if (ret) - goto out; - mem->section_count++; + break; + mem->section_count = memory_block_size_bytes() / + MIN_MEMORY_BLOCK_SIZE; + } + if (ret) { + end_pfn = pfn; + for (pfn = start_pfn; pfn != end_pfn; pfn += block_nr_pages) { + mem = find_memory_block(__pfn_to_section(pfn)); + if (!mem) + continue; + mem->section_count = 0; + unregister_memory(mem); + } } - -out: mutex_unlock(&mem_sysfs_mutex); return ret; } #ifdef CONFIG_MEMORY_HOTREMOVE -static void -unregister_memory(struct memory_block *memory) -{ - BUG_ON(memory->dev.bus != &memory_subsys); - - /* drop the ref. we got in remove_memory_section() */ - put_device(&memory->dev); - device_unregister(&memory->dev); -} - static int remove_memory_section(struct mem_section *section) { struct memory_block *mem; diff --git a/include/linux/memory.h b/include/linux/memory.h index a6ddefc60517..e275dc775834 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -111,7 +111,7 @@ extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); extern int register_memory_isolate_notifier(struct notifier_block *nb); extern void unregister_memory_isolate_notifier(struct notifier_block *nb); -int hotplug_memory_register(int nid, struct mem_section *section); +int hotplug_memory_register(unsigned long start, unsigned long size); #ifdef CONFIG_MEMORY_HOTREMOVE extern int unregister_memory_section(struct mem_section *); #endif diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 680dcc67f9d5..13ee0a26e034 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -260,11 +260,7 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, ret = sparse_add_one_section(nid, phys_start_pfn, altmap); if (ret < 0) return ret; - - if (!want_memblock) - return 0; - - return hotplug_memory_register(nid, __pfn_to_section(phys_start_pfn)); + return 0; } /* @@ -1125,6 +1121,17 @@ int __ref add_memory_resource(int nid, struct resource *res) if (ret < 0) goto error; + /* create memory block devices after memory was added */ + ret = hotplug_memory_register(start, size); +#ifdef CONFIG_MEMORY_HOTREMOVE + if (ret) { + arch_remove_memory(nid, start, size, NULL); + goto error; + } +#else + WARN_ON(ret); +#endif + if (new_node) { /* If sysfs file of new node can't be created, cpu on the node * can't be hot-added. There is no rollback way now. -- 2.17.2