From: David Hildenbrand <david@xxxxxxxxxx> Subject: mm/memory_hotplug: MEMHP_MERGE_RESOURCE to specify merging of System RAM resources Some add_memory*() users add memory in small, contiguous memory blocks. Examples include virtio-mem, hyper-v balloon, and the XEN balloon. This can quickly result in a lot of memory resources, whereby the actual resource boundaries are not of interest (e.g., it might be relevant for DIMMs, exposed via /proc/iomem to user space). We really want to merge added resources in this scenario where possible. Let's provide a flag (MEMHP_MERGE_RESOURCE) to specify that a resource either created within add_memory*() or passed via add_memory_resource() shall be marked mergeable and merged with applicable siblings. To implement that, we need a kernel/resource interface to mark selected System RAM resources mergeable (IORESOURCE_SYSRAM_MERGEABLE) and trigger merging. Note: We really want to merge after the whole operation succeeded, not directly when adding a resource to the resource tree (it would break add_memory_resource() and require splitting resources again when the operation failed - e.g., due to -ENOMEM). Link: https://lkml.kernel.org/r/20200911103459.10306-6-david@xxxxxxxxxx Signed-off-by: David Hildenbrand <david@xxxxxxxxxx> Reviewed-by: Pankaj Gupta <pankaj.gupta.linux@xxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxx> Cc: Dan Williams <dan.j.williams@xxxxxxxxx> Cc: Jason Gunthorpe <jgg@xxxxxxxx> Cc: Kees Cook <keescook@xxxxxxxxxxxx> Cc: Ard Biesheuvel <ardb@xxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: "K. Y. Srinivasan" <kys@xxxxxxxxxxxxx> Cc: Haiyang Zhang <haiyangz@xxxxxxxxxxxxx> Cc: Stephen Hemminger <sthemmin@xxxxxxxxxxxxx> Cc: Wei Liu <wei.liu@xxxxxxxxxx> Cc: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx> Cc: Juergen Gross <jgross@xxxxxxxx> Cc: Stefano Stabellini <sstabellini@xxxxxxxxxx> Cc: Roger Pau Monné <roger.pau@xxxxxxxxxx> Cc: Julien Grall <julien@xxxxxxx> Cc: Baoquan He <bhe@xxxxxxxxxx> Cc: Wei Yang <richardw.yang@xxxxxxxxxxxxxxx> Cc: Anton Blanchard <anton@xxxxxxxxxx> Cc: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx> Cc: Christian Borntraeger <borntraeger@xxxxxxxxxx> Cc: Dave Jiang <dave.jiang@xxxxxxxxx> Cc: Eric Biederman <ebiederm@xxxxxxxxxxxx> Cc: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> Cc: Heiko Carstens <hca@xxxxxxxxxxxxx> Cc: Jason Wang <jasowang@xxxxxxxxxx> Cc: Len Brown <lenb@xxxxxxxxxx> Cc: Leonardo Bras <leobras.c@xxxxxxxxx> Cc: Libor Pechacek <lpechacek@xxxxxxx> Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx> Cc: "Michael S. Tsirkin" <mst@xxxxxxxxxx> Cc: Nathan Lynch <nathanl@xxxxxxxxxxxxx> Cc: "Oliver O'Halloran" <oohall@xxxxxxxxx> Cc: Paul Mackerras <paulus@xxxxxxxxx> Cc: Pingfan Liu <kernelfans@xxxxxxxxx> Cc: "Rafael J. Wysocki" <rjw@xxxxxxxxxxxxx> Cc: Vasily Gorbik <gor@xxxxxxxxxxxxx> Cc: Vishal Verma <vishal.l.verma@xxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- include/linux/ioport.h | 4 ++ include/linux/memory_hotplug.h | 7 +++ kernel/resource.c | 60 +++++++++++++++++++++++++++++++ mm/memory_hotplug.c | 7 +++ 4 files changed, 78 insertions(+) --- a/include/linux/ioport.h~mm-memory_hotplug-memhp_merge_resource-to-specify-merging-of-system-ram-resources +++ a/include/linux/ioport.h @@ -60,6 +60,7 @@ struct resource { /* IORESOURCE_SYSRAM specific bits. */ #define IORESOURCE_SYSRAM_DRIVER_MANAGED 0x02000000 /* Always detected via a driver. */ +#define IORESOURCE_SYSRAM_MERGEABLE 0x04000000 /* Resource can be merged. */ #define IORESOURCE_EXCLUSIVE 0x08000000 /* Userland may not map this resource */ @@ -253,6 +254,9 @@ extern void __release_region(struct reso extern void release_mem_region_adjustable(struct resource *, resource_size_t, resource_size_t); #endif +#ifdef CONFIG_MEMORY_HOTPLUG +extern void merge_system_ram_resource(struct resource *res); +#endif /* Wrappers for managed devices */ struct device; --- a/include/linux/memory_hotplug.h~mm-memory_hotplug-memhp_merge_resource-to-specify-merging-of-system-ram-resources +++ a/include/linux/memory_hotplug.h @@ -62,6 +62,13 @@ typedef int __bitwise mhp_t; /* No special request */ #define MHP_NONE ((__force mhp_t)0) +/* + * Allow merging of the added System RAM resource with adjacent, + * mergeable resources. After a successful call to add_memory_resource() + * with this flag set, the resource pointer must no longer be used as it + * might be stale, or the resource might have changed. + */ +#define MEMHP_MERGE_RESOURCE ((__force mhp_t)BIT(0)) /* * Extended parameters for memory hotplug: --- a/kernel/resource.c~mm-memory_hotplug-memhp_merge_resource-to-specify-merging-of-system-ram-resources +++ a/kernel/resource.c @@ -1363,6 +1363,66 @@ retry: } #endif /* CONFIG_MEMORY_HOTREMOVE */ +#ifdef CONFIG_MEMORY_HOTPLUG +static bool system_ram_resources_mergeable(struct resource *r1, + struct resource *r2) +{ + /* We assume either r1 or r2 is IORESOURCE_SYSRAM_MERGEABLE. */ + return r1->flags == r2->flags && r1->end + 1 == r2->start && + r1->name == r2->name && r1->desc == r2->desc && + !r1->child && !r2->child; +} + +/* + * merge_system_ram_resource - mark the System RAM resource mergeable and try to + * merge it with adjacent, mergeable resources + * @res: resource descriptor + * + * This interface is intended for memory hotplug, whereby lots of contiguous + * system ram resources are added (e.g., via add_memory*()) by a driver, and + * the actual resource boundaries are not of interest (e.g., it might be + * relevant for DIMMs). Only resources that are marked mergeable, that have the + * same parent, and that don't have any children are considered. All mergeable + * resources must be immutable during the request. + * + * Note: + * - The caller has to make sure that no pointers to resources that are + * marked mergeable are used anymore after this call - the resource might + * be freed and the pointer might be stale! + * - release_mem_region_adjustable() will split on demand on memory hotunplug + */ +void merge_system_ram_resource(struct resource *res) +{ + const unsigned long flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + struct resource *cur; + + if (WARN_ON_ONCE((res->flags & flags) != flags)) + return; + + write_lock(&resource_lock); + res->flags |= IORESOURCE_SYSRAM_MERGEABLE; + + /* Try to merge with next item in the list. */ + cur = res->sibling; + if (cur && system_ram_resources_mergeable(res, cur)) { + res->end = cur->end; + res->sibling = cur->sibling; + free_resource(cur); + } + + /* Try to merge with previous item in the list. */ + cur = res->parent->child; + while (cur && cur->sibling != res) + cur = cur->sibling; + if (cur && system_ram_resources_mergeable(cur, res)) { + cur->end = res->end; + cur->sibling = res->sibling; + free_resource(res); + } + write_unlock(&resource_lock); +} +#endif /* CONFIG_MEMORY_HOTPLUG */ + /* * Managed region resource */ --- a/mm/memory_hotplug.c~mm-memory_hotplug-memhp_merge_resource-to-specify-merging-of-system-ram-resources +++ a/mm/memory_hotplug.c @@ -1103,6 +1103,13 @@ int __ref add_memory_resource(int nid, s /* device_online() will take the lock when calling online_pages() */ mem_hotplug_done(); + /* + * In case we're allowed to merge the resource, flag it and trigger + * merging now that adding succeeded. + */ + if (mhp_flags & MEMHP_MERGE_RESOURCE) + merge_system_ram_resource(res); + /* online pages if requested */ if (memhp_default_online_type != MMOP_OFFLINE) walk_memory_blocks(start, size, NULL, online_memory_block); _