Provide a sysfs interface to hot remove memory. This patch updates the sysfs interface for hot add of memory to also provide a sysfs interface to hot remove memory. The use of this interface is controlled with the ARCH_MEMORY_PROBE config option, currently used by x86 and powerpc. This patch also updates the name of this option to CONFIG_ARCH_MEMORY_PROBE_RELEASE to indicate that it controls the probe and release sysfs interfaces. Signed-off-by: Nathan Fontenot <nfont@xxxxxxxxxxxxxxxxxx> --- Documentation/memory-hotplug.txt | 34 ++++++++++++---- arch/powerpc/Kconfig | 2 arch/x86/Kconfig | 2 drivers/base/memory.c | 81 ++++++++++++++++++++++++++++++++++----- 4 files changed, 100 insertions(+), 19 deletions(-) Index: linux/drivers/base/memory.c =================================================================== --- linux.orig/drivers/base/memory.c +++ linux/drivers/base/memory.c @@ -129,22 +129,30 @@ static ssize_t show_mem_end_phys_index(s return sprintf(buf, "%08lx\n", phys_index); } +static int is_memblock_removable(unsigned long start_section_nr) +{ + unsigned long pfn; + int i, ret = 1; + + for (i = 0; i < sections_per_block; i++) { + pfn = section_nr_to_pfn(start_section_nr + i); + ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION); + } + + return ret; +} + /* * Show whether the section of memory is likely to be hot-removable */ static ssize_t show_mem_removable(struct device *dev, struct device_attribute *attr, char *buf) { - unsigned long i, pfn; - int ret = 1; + int ret; struct memory_block *mem = container_of(dev, struct memory_block, dev); - for (i = 0; i < sections_per_block; i++) { - pfn = section_nr_to_pfn(mem->start_section_nr + i); - ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION); - } - + ret = is_memblock_removable(mem->start_section_nr); return sprintf(buf, "%d\n", ret); } @@ -421,7 +429,7 @@ static DEVICE_ATTR(block_size_bytes, 044 * as well as ppc64 will do all of their discovery in userspace * and will require this interface. */ -#ifdef CONFIG_ARCH_MEMORY_PROBE +#ifdef CONFIG_ARCH_MEMORY_PROBE_RELEASE static ssize_t memory_probe_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -444,6 +452,60 @@ memory_probe_store(struct device *dev, s } static DEVICE_ATTR(probe, S_IWUSR, NULL, memory_probe_store); + +static int is_memblock_offline(struct memory_block *mem, void *arg) +{ + if (mem->state == MEM_ONLINE) + return 1; + + return 0; +} + +static ssize_t +memory_release_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + u64 phys_addr; + int nid, ret = 0; + unsigned long block_size, pfn; + unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block; + + lock_device_hotplug(); + + ret = kstrtoull(buf, 0, &phys_addr); + if (ret) + goto out; + + if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1)) { + ret = -EINVAL; + goto out; + } + + block_size = get_memory_block_size(); + nid = memory_add_physaddr_to_nid(phys_addr); + + /* Ensure memory is offline and removable before removing it. */ + ret = walk_memory_range(PFN_DOWN(phys_addr), + PFN_UP(phys_addr + block_size - 1), NULL, + is_memblock_offline); + if (!ret) { + pfn = phys_addr >> PAGE_SHIFT; + ret = !is_memblock_removable(pfn_to_section_nr(pfn)); + } + + if (ret) { + ret = -EINVAL; + goto out; + } + + remove_memory(nid, phys_addr, block_size); + +out: + unlock_device_hotplug(); + return ret ? ret : count; +} + +static DEVICE_ATTR(release, S_IWUSR, NULL, memory_release_store); #endif #ifdef CONFIG_MEMORY_FAILURE @@ -694,8 +756,9 @@ bool is_memblock_offlined(struct memory_ } static struct attribute *memory_root_attrs[] = { -#ifdef CONFIG_ARCH_MEMORY_PROBE +#ifdef CONFIG_ARCH_MEMORY_PROBE_RELEASE &dev_attr_probe.attr, + &dev_attr_release.attr, #endif #ifdef CONFIG_MEMORY_FAILURE Index: linux/arch/powerpc/Kconfig =================================================================== --- linux.orig/arch/powerpc/Kconfig +++ linux/arch/powerpc/Kconfig @@ -438,7 +438,7 @@ config SYS_SUPPORTS_HUGETLBFS source "mm/Kconfig" -config ARCH_MEMORY_PROBE +config ARCH_MEMORY_PROBE_RELEASE def_bool y depends on MEMORY_HOTPLUG Index: linux/arch/x86/Kconfig =================================================================== --- linux.orig/arch/x86/Kconfig +++ linux/arch/x86/Kconfig @@ -1343,7 +1343,7 @@ config ARCH_SELECT_MEMORY_MODEL def_bool y depends on ARCH_SPARSEMEM_ENABLE -config ARCH_MEMORY_PROBE +config ARCH_MEMORY_PROBE_RELEASE def_bool y depends on X86_64 && MEMORY_HOTPLUG Index: linux/Documentation/memory-hotplug.txt =================================================================== --- linux.orig/Documentation/memory-hotplug.txt +++ linux/Documentation/memory-hotplug.txt @@ -17,7 +17,9 @@ be changed often. 3. sysfs files for memory hotplug 4. Physical memory hot-add phase 4.1 Hardware(Firmware) Support - 4.2 Notify memory hot-add event by hand + 4.2 Notify memory hot-addand hot-remove event by hand + 4.2.1 Probe interface + 4.2.2 Release interface 5. Logical Memory hot-add phase 5.1. State of memory 5.2. How to online memory @@ -69,7 +71,7 @@ management tables, and makes sysfs files If firmware supports notification of connection of new memory to OS, this phase is triggered automatically. ACPI can notify this event. If not, -"probe" operation by system administration is used instead. +"probe" and "release" operations by system administration is used instead. (see Section 4.). Logical Memory Hotplug phase is to change memory state into @@ -208,20 +210,23 @@ calls hotplug code for all of objects wh If memory device is found, memory hotplug code will be called. -4.2 Notify memory hot-add event by hand +4.2 Notify memory hot-add and hot-remove event by hand ------------ In some environments, especially virtualized environment, firmware will not notify memory hotplug event to the kernel. For such environment, "probe" -interface is supported. This interface depends on CONFIG_ARCH_MEMORY_PROBE. +and "release" interfaces are supported. This interface depends on +CONFIG_ARCH_MEMORY_PROBE_RELEASE. -Now, CONFIG_ARCH_MEMORY_PROBE is supported only by powerpc but it does not -contain highly architecture codes. Please add config if you need "probe" -interface. +Now, CONFIG_ARCH_MEMORY_PROBE_RELEASE is supported only by powerpc but it does +not contain highly architecture codes. Please add config if you need "probe" +and "release" interfaces. +4.2.1 "probe" interface +------------ Probe interface is located at /sys/devices/system/memory/probe -You can tell the physical address of new memory to the kernel by +You can tell the physical address of new memory to hot-add to the kernel by % echo start_address_of_new_memory > /sys/devices/system/memory/probe @@ -230,6 +235,19 @@ memory range is hot-added. In this case, current implementation). You'll have to online memory by yourself. Please see "How to online memory" in this text. +4.2.2 "release" interface +------------ +Release interface is located at +/sys/devices/system/memory/release + +You can tell the physical address of memory to hot-remove from the kernel by + +% echo start_address_of_memory > /sys/devices/system/memory/release + +Then, [start_address_of_memory, start_address_of_memory + section_size) +memory range is hot-removed. You will need to ensure all of the memory in +this range has been offlined prior to using this interface, please see +"How to offline memory" in this text. ------------------------------ -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>