Implement MEM_PHYS_ONLINE and MEM_PHYS_OFFLINE memory notifiers on s390 Implementation of MEM_PHYS_ONLINE Memory Notifier: * Transition the memory block to an accessible/online state using the sclp assign command. * Execute __add_pages() for the memory block, enabling a self-contained memory map range. For boot-time memory, vmemmap mapping is carried out through sparse_init(). Implementation of MEM_PHYS_OFFLINE Memory Notifier: * Execute __remove_pages() exclusively for the memory block (applicable where a self-contained memory map was possible before). * Shift the memory block to an inaccessible/offline state using the sclp unassign command. Additional Implementation Considerations: * When MHP_MEMMAP_ON_MEMORY is disabled, the system retains the old behavior. This means the memory map is allocated from default memory, and struct vmemmap pages are populated during the standby memory detection phase. * With MHP_MEMMAP_ON_MEMORY enabled (allowing self-contained memory map), the memory map is allocated using the self-contained memory map range. Struct vmemmap pages are populated during the memory hotplug phase. * If MACHINE_HAS_EDAT1 is unavailable, MHP_MEMMAP_ON_MEMORY is automatically disabled. This ensures that vmemmap pagetables do not consume additional memory from the default memory allocator. * The MEM_GOING_ONLINE notifier has been modified to perform no operation, as MEM_PHYS_ONLINE already executes the sclp assign command. * The MEM_CANCEL_ONLINE notifier now performs no operation, as MEM_PHYS_OFFLINE already executes the sclp unassign command. * The call to __add_pages() in arch_add_memory() with altmap support is skipped. This operation is deferred and will be performed later in the MEM_PHYS_ONLINE notifier. Reviewed-by: Gerald Schaefer <gerald.schaefer@xxxxxxxxxxxxx> Signed-off-by: Sumanth Korikkar <sumanthk@xxxxxxxxxxxxx> --- arch/s390/mm/init.c | 16 +++++++++++++++- drivers/s390/char/sclp_cmd.c | 33 ++++++++++++++++++++++++++++++--- 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 8d9a60ccb777..db505ed590b2 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -288,6 +288,12 @@ int arch_add_memory(int nid, u64 start, u64 size, rc = vmem_add_mapping(start, size); if (rc) return rc; + /* + * If MHP_MEMMAP_ON_MEMORY is enabled, perform __add_pages() during memory + * onlining phase + */ + if (params->altmap) + return 0; rc = __add_pages(nid, start_pfn, size_pages, params); if (rc) @@ -300,7 +306,15 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - __remove_pages(start_pfn, nr_pages, altmap); + /* + * On s390, currently arch_remove_memory() will be called during error + * handling of add_memory_resource(). When MHP_MEMMAP_ON_MEMORY is + * enabled, __add_pages() is performed later during the memory onlining + * phase. Hence, __remove_pages() should not be called here in that + * case, but only later during memory offline phase + */ + if (!altmap) + __remove_pages(start_pfn, nr_pages, NULL); vmem_remove_mapping(start, size); } #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index 11c428f4c7cf..12f3d4af7e4e 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -18,6 +18,7 @@ #include <linux/mm.h> #include <linux/mmzone.h> #include <linux/memory.h> +#include <linux/memory_hotplug.h> #include <linux/module.h> #include <asm/ctlreg.h> #include <asm/chpid.h> @@ -26,6 +27,7 @@ #include <asm/sclp.h> #include <asm/numa.h> #include <asm/facility.h> +#include <asm/page-states.h> #include "sclp.h" @@ -319,6 +321,8 @@ static bool contains_standby_increment(unsigned long start, unsigned long end) static int sclp_mem_notifier(struct notifier_block *nb, unsigned long action, void *data) { + struct mhp_params params = { .pgprot = pgprot_mhp(PAGE_KERNEL) }; + struct memory_block *memory_block; unsigned long start, size; struct memory_notify *arg; unsigned char id; @@ -330,6 +334,11 @@ static int sclp_mem_notifier(struct notifier_block *nb, mutex_lock(&sclp_mem_mutex); for_each_clear_bit(id, sclp_storage_ids, sclp_max_storage_id + 1) sclp_attach_storage(id); + memory_block = find_memory_block(pfn_to_section_nr(arg->start_pfn)); + if (!memory_block) { + rc = -EINVAL; + goto out; + } switch (action) { case MEM_GOING_OFFLINE: /* @@ -344,17 +353,34 @@ static int sclp_mem_notifier(struct notifier_block *nb, case MEM_CANCEL_OFFLINE: break; case MEM_GOING_ONLINE: + break; + case MEM_PHYS_ONLINE: rc = sclp_mem_change_state(start, size, 1); + if (rc || !memory_block->altmap) + goto out; + params.altmap = memory_block->altmap; + rc = __add_pages(0, arg->start_pfn, arg->nr_pages, ¶ms); + if (rc) + sclp_mem_change_state(start, size, 0); + /* + * Set CMMA state to nodat here, since the struct page memory + * at the beginning of the memory block will not go through the + * buddy allocator later. + */ + __arch_set_page_nodat((void *)start, memory_block->altmap->free); break; case MEM_CANCEL_ONLINE: - sclp_mem_change_state(start, size, 0); - break; case MEM_OFFLINE: + break; + case MEM_PHYS_OFFLINE: + if (memory_block->altmap) + __remove_pages(arg->start_pfn, arg->nr_pages, memory_block->altmap); sclp_mem_change_state(start, size, 0); break; default: break; } +out: mutex_unlock(&sclp_mem_mutex); return rc ? NOTIFY_BAD : NOTIFY_OK; } @@ -400,7 +426,8 @@ static void __init add_memory_merged(u16 rn) if (!size) goto skip_add; for (addr = start; addr < start + size; addr += block_size) - add_memory(0, addr, block_size, MHP_NONE); + add_memory(0, addr, block_size, + MACHINE_HAS_EDAT1 ? MHP_MEMMAP_ON_MEMORY : MHP_NONE); skip_add: first_rn = rn; num = 1; -- 2.41.0