Some devices (esp. paravirtualized) might want to control - when to online/offline a memory block - how to online memory (MOVABLE/NORMAL) - in which granularity to online/offline memory So let's add a new flag "driver_managed" and disallow to change the state by user space. Device onlining/offlining will still work, however the memory will not be actually onlined/offlined. That has to be handled by the device driver that owns the memory. Signed-off-by: David Hildenbrand <david@xxxxxxxxxx> --- drivers/base/memory.c | 22 ++++++++++++++-------- drivers/xen/balloon.c | 2 +- include/linux/memory.h | 1 + include/linux/memory_hotplug.h | 4 +++- mm/memory_hotplug.c | 34 ++++++++++++++++++++++++++++++++-- 5 files changed, 51 insertions(+), 12 deletions(-) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index bffe8616bd55..3b8616551561 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -231,27 +231,28 @@ static bool pages_correctly_probed(unsigned long start_pfn) * Must already be protected by mem_hotplug_begin(). */ static int -memory_block_action(unsigned long phys_index, unsigned long action, int online_type) +memory_block_action(struct memory_block *mem, unsigned long action) { - unsigned long start_pfn; + unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; - int ret; + int ret = 0; - start_pfn = section_nr_to_pfn(phys_index); + if (mem->driver_managed) + return 0; switch (action) { case MEM_ONLINE: if (!pages_correctly_probed(start_pfn)) return -EBUSY; - ret = online_pages(start_pfn, nr_pages, online_type); + ret = online_pages(start_pfn, nr_pages, mem->online_type); break; case MEM_OFFLINE: ret = offline_pages(start_pfn, nr_pages); break; default: WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: " - "%ld\n", __func__, phys_index, action, action); + "%ld\n", __func__, mem->start_section_nr, action, action); ret = -EINVAL; } @@ -269,8 +270,7 @@ static int memory_block_change_state(struct memory_block *mem, if (to_state == MEM_OFFLINE) mem->state = MEM_GOING_OFFLINE; - ret = memory_block_action(mem->start_section_nr, to_state, - mem->online_type); + ret = memory_block_action(mem, to_state); mem->state = ret ? from_state_req : to_state; @@ -350,6 +350,11 @@ store_mem_state(struct device *dev, */ mem_hotplug_begin(); + if (mem->driver_managed) { + ret = -EINVAL; + goto out; + } + switch (online_type) { case MMOP_ONLINE_KERNEL: case MMOP_ONLINE_MOVABLE: @@ -364,6 +369,7 @@ store_mem_state(struct device *dev, ret = -EINVAL; /* should never happen */ } +out: mem_hotplug_done(); err: unlock_device_hotplug(); diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 065f0b607373..89981d573c06 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -401,7 +401,7 @@ static enum bp_state reserve_additional_memory(void) * callers drop the mutex before trying again. */ mutex_unlock(&balloon_mutex); - rc = add_memory_resource(nid, resource, memhp_auto_online); + rc = add_memory_resource(nid, resource, memhp_auto_online, false); mutex_lock(&balloon_mutex); if (rc) { diff --git a/include/linux/memory.h b/include/linux/memory.h index 9f8cd856ca1e..018c5e5ecde1 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -29,6 +29,7 @@ struct memory_block { unsigned long state; /* serialized by the dev->lock */ int section_count; /* serialized by mem_sysfs_mutex */ int online_type; /* for passing data to online routine */ + bool driver_managed; /* driver handles online/offline */ int phys_device; /* to which fru does this belong? */ void *hw; /* optional pointer to fw/hw data */ int (*phys_callback)(struct memory_block *); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index e0e49b5b1ee1..46c6ceb1110d 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -320,7 +320,9 @@ static inline void remove_memory(int nid, u64 start, u64 size) {} extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, void *arg, int (*func)(struct memory_block *, void *)); extern int add_memory(int nid, u64 start, u64 size); -extern int add_memory_resource(int nid, struct resource *resource, bool online); +extern int add_memory_driver_managed(int nid, u64 start, u64 size); +extern int add_memory_resource(int nid, struct resource *resource, bool online, + bool driver_managed); extern int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, bool want_memblock); extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 1d6054edc241..ac14ea772792 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1108,8 +1108,15 @@ static int online_memory_block(struct memory_block *mem, void *arg) return device_online(&mem->dev); } +static int mark_memory_block_driver_managed(struct memory_block *mem, void *arg) +{ + mem->driver_managed = true; + return 0; +} + /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ -int __ref add_memory_resource(int nid, struct resource *res, bool online) +int __ref add_memory_resource(int nid, struct resource *res, bool online, + bool driver_managed) { u64 start, size; pg_data_t *pgdat = NULL; @@ -1117,6 +1124,9 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online) bool new_node; int ret; + if (online && driver_managed) + return -EINVAL; + start = res->start; size = resource_size(res); @@ -1188,6 +1198,9 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online) if (online) walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL, online_memory_block); + else if (driver_managed) + walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), + NULL, mark_memory_block_driver_managed); goto out; @@ -1212,13 +1225,30 @@ int __ref add_memory(int nid, u64 start, u64 size) if (IS_ERR(res)) return PTR_ERR(res); - ret = add_memory_resource(nid, res, memhp_auto_online); + ret = add_memory_resource(nid, res, memhp_auto_online, false); if (ret < 0) release_memory_resource(res); return ret; } EXPORT_SYMBOL_GPL(add_memory); +int __ref add_memory_driver_managed(int nid, u64 start, u64 size) +{ + struct resource *res; + int ret; + + res = register_memory_resource(start, size); + if (IS_ERR(res)) + return PTR_ERR(res); + + ret = add_memory_resource(nid, res, false, true); + if (ret < 0) + release_memory_resource(res); + return ret; +} +EXPORT_SYMBOL_GPL(add_memory_driver_managed); + + #ifdef CONFIG_MEMORY_HOTREMOVE /* * A free page on the buddy free lists (not the per-cpu lists) has PageBuddy -- 2.14.3