Provide a new interface for dynamic configuration and deconfiguration of hotplug memory, allowing for mixed altmap and non-altmap support. It is a follow-up on the discussion with David: https://lore.kernel.org/all/ee492da8-74b4-4a97-8b24-73e07257f01d@xxxxxxxxxx/ As mentioned in the discussion, advantages of the new interface are: * Users can dynamically specify which memory ranges should have altmap support, rather than having it statically enabled or disabled for all hot-plugged memory. * In the long term, user could specify a memory range, including multiple blocks, and whether user wants altmap support for that range. This could allow for the altmap block grouping, or even variable-sized blocks, in the future. i.e. "grouping" memory blocks that share a same altmap located on the first memory blocks in the group and reduce fragementation due to altmap. To leverage these advantages: Create a sysfs interface /sys/bus/memory/devices/configure_memory, which performs runtime (de)configuration of memory with altmap or non-altmap support. The interface validates the memory ranges against architecture specific memory configuration and performs add_memory()/remove_memory(). Dynamic (de)configuration of memory is made configurable via config CONFIG_RUNTIME_MEMORY_CONFIGURATION. Usage format for the new interface: echo config_mode,memoryrange,altmap_mode > /sys/bus/memory/devices/configure_memory E.g. to configure a range with altmap: echo 1,0x200000000-0x20fffffff,1 > /sys/bus/memory/devices/configure_memory This interface could not only help to make s390 more flexible and similar to others (wrt adding hotplug memory in advance). It might also be possible to provide the dynamically configured altmap support for others. E.g. instead of directly doing an add_memory() in the ACPI handler, with the static altmap setting, one could instead defer that to the new interface which allows dynamic altmap configuration. Reviewed-by: Gerald Schaefer <gerald.schaefer@xxxxxxxxxxxxx> Signed-off-by: Sumanth Korikkar <sumanthk@xxxxxxxxxxxxx> --- drivers/base/memory.c | 124 +++++++++++++++++++++++++++++++++++++++++ include/linux/memory.h | 6 ++ mm/Kconfig | 16 ++++++ 3 files changed, 146 insertions(+) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 67858eeb92ed..f024444b3301 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -631,6 +631,127 @@ int __weak arch_get_memory_phys_device(unsigned long start_pfn) return 0; } +#ifdef CONFIG_RUNTIME_MEMORY_CONFIGURATION +enum { + REMOVE_MEMORY = 0, + ADD_MEMORY, + MAX_CONFIGURE_MODE +}; + +enum { + NOALTMAP = 0, + ALTMAP, + MAX_ALTMAP_MODE +}; + +/* + * Return true when the memory range is valid. + * + * Architecture specific code can override the below function and validate the + * memory range against its possible memory configurations. + */ +bool __weak arch_validate_memory_range(unsigned long long start, + unsigned long long end) +{ + return false; +} + +/* + * Format: + * echo config_mode,memoryrange,altmap_mode > + * /sys/bus/memory/devices/configure_memory + * + * config_mode: + * value: 1 - add_memory, 0 - remove_memory + * + * range: + * 0x<start address>-0x<end address> + * Where start address is aligned to memory block size and end address + * represents last byte in the range. + * example: 0x200000000-0x20fffffff + * + * altmap_mode: + * value: 1 - altmap support, 0 - no altmap support + */ +static ssize_t configure_memory_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + s64 start, end, block_size, range; + u32 config_mode, altmap_mode; + int num, nid, ret = -EINVAL; + struct memory_block *mem; + + num = sscanf(buf, "%u,0x%llx-0x%llx,%u", &config_mode, &start, &end, &altmap_mode); + if (num != 4) + goto out; + + if (config_mode >= MAX_CONFIGURE_MODE || altmap_mode >= MAX_ALTMAP_MODE) + goto out; + + altmap_mode = altmap_mode ? MHP_MEMMAP_ON_MEMORY | + MHP_OFFLINE_INACCESSIBLE : MHP_NONE; + + block_size = memory_block_size_bytes(); + + if (!IS_ALIGNED(start, block_size) || !IS_ALIGNED(end + 1, block_size)) + goto out; + + if (start < 0 || end < 0 || start >= end) + goto out; + + if (!arch_validate_memory_range(start, end)) + goto out; + + ret = lock_device_hotplug_sysfs(); + if (ret) + goto out; + + if (config_mode == ADD_MEMORY) { + for (range = start; range < end + 1; range += block_size) { + mem = find_memory_block(pfn_to_section_nr(PFN_DOWN(range))); + if (mem) { + pr_info("Memory already configured - (start:0x%llx)\n", range); + ret = -EEXIST; + put_device(&mem->dev); + goto out_unlock; + } + nid = memory_add_physaddr_to_nid(range); + ret = __add_memory(nid, range, block_size, altmap_mode); + if (ret) { + pr_info("Memory addition failed - (start:0x%llx)\n", range); + goto out_unlock; + } + } + } else if (config_mode == REMOVE_MEMORY) { + for (range = start; range < end + 1; range += block_size) { + mem = find_memory_block(pfn_to_section_nr(PFN_DOWN(range))); + if (!mem) { + pr_info("Memory not configured - (start:0x%llx)\n", range); + ret = -EINVAL; + goto out_unlock; + } + if (mem->state != MEM_OFFLINE) { + pr_info("Memory removal failed - (start:0x%llx) not offline\n", + range); + put_device(&mem->dev); + ret = -EBUSY; + goto out_unlock; + } else { + /* drop the ref just got via find_memory_block() */ + put_device(&mem->dev); + } + __remove_memory(range, block_size); + } + } +out_unlock: + unlock_device_hotplug(); +out: + return ret ? ret : count; +} +static DEVICE_ATTR_WO(configure_memory); +#endif /* CONFIG_RUNTIME_MEMORY_CONFIGURATION */ + /* * A reference for the returned memory block device is acquired. * @@ -941,6 +1062,9 @@ static struct attribute *memory_root_attrs[] = { &dev_attr_auto_online_blocks.attr, #ifdef CONFIG_CRASH_HOTPLUG &dev_attr_crash_hotplug.attr, +#endif +#ifdef CONFIG_RUNTIME_MEMORY_CONFIGURATION + &dev_attr_configure_memory.attr, #endif NULL }; diff --git a/include/linux/memory.h b/include/linux/memory.h index c0afee5d126e..88b2b374bc44 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -85,6 +85,12 @@ struct memory_block { #endif }; +#ifdef CONFIG_RUNTIME_MEMORY_CONFIGURATION +bool arch_validate_memory_range(unsigned long long start, unsigned long long end); +ssize_t arch_get_memory_max_configurable(void); +int memory_create_sysfs_max_configurable(void); +#endif /* CONFIG_RUNTIME_MEMORY_CONFIGURATION */ + int arch_get_memory_phys_device(unsigned long start_pfn); unsigned long memory_block_size_bytes(void); int set_memory_block_size_order(unsigned int order); diff --git a/mm/Kconfig b/mm/Kconfig index 84000b016808..2aec2fc3fb25 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -576,6 +576,22 @@ config MHP_MEMMAP_ON_MEMORY depends on MEMORY_HOTPLUG && SPARSEMEM_VMEMMAP depends on ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE +config RUNTIME_MEMORY_CONFIGURATION + bool "Dynamic configuration and deconfiguration of memory" + def_bool n + depends on MEMORY_HOTPLUG && SPARSEMEM_VMEMMAP + help + This option provides support to perform dynamic configuration and + deconfiguration of memory with altmap or non-altmap support + (/sys/bus/memory/devices/configure_memory). The interface validates + the configuration and deconfiguration of memory ranges against + architecture specific configuration and performs add_memory() with + altmap or non-altmap support and remove_memory() respectively. + + Say Y here if the architecture supports validating dynamically + (de)configured memory against architecture specific memory + configurations. + endif # MEMORY_HOTPLUG config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE -- 2.45.2