When a process is bound to a node that is being hot-removed, any memory allocation attempts from that node should fail gracefully without triggering the OOM-killer. However, the current behavior can cause the oom-killer to be invoked, leading to the termination of processes on other nodes, even when there is sufficient memory available in the system. Prevent the oom-killer from being triggered by processes bound to a node undergoing hot-remove operations. Instead, the allocation attempts from the offlining node will simply fail, allowing the process to handle the failure appropriately without causing disruption to the system. Signed-off-by: Li Zhijian <lizhijian@xxxxxxxxxxx> --- include/linux/memory_hotplug.h | 6 ++++++ mm/memory_hotplug.c | 21 +++++++++++++++++++++ mm/page_alloc.c | 6 ++++++ 3 files changed, 33 insertions(+) diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 7a9ff464608d..0ca804215e11 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -332,6 +332,7 @@ extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages, extern int remove_memory(u64 start, u64 size); extern void __remove_memory(u64 start, u64 size); extern int offline_and_remove_memory(u64 start, u64 size); +bool is_offlining_node(nodemask_t nodes); #else static inline void try_offline_node(int nid) {} @@ -348,6 +349,11 @@ static inline int remove_memory(u64 start, u64 size) } static inline void __remove_memory(u64 start, u64 size) {} + +static inline bool is_offlining_node(nodemask_t nodes) +{ + return false; +} #endif /* CONFIG_MEMORY_HOTREMOVE */ #ifdef CONFIG_MEMORY_HOTPLUG diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 431b1f6753c0..da3982751ba9 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1938,6 +1938,22 @@ static int count_system_ram_pages_cb(unsigned long start_pfn, return 0; } +static nodemask_t offlining_node = NODE_MASK_NONE; + +bool is_offlining_node(nodemask_t nodes) +{ + return nodes_equal(offlining_node, nodes); +} + +static void offline_pages_start(int node) +{ + node_set(node, offlining_node); +} + +static void offline_pages_end(void) +{ + offlining_node = NODE_MASK_NONE; +} /* * Must be called with mem_hotplug_lock in write mode. */ @@ -1991,6 +2007,7 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages, goto failed_removal; } + offline_pages_start(node); /* * Disable pcplists so that page isolation cannot race with freeing * in a way that pages from isolated pageblock are left on pcplists. @@ -2107,6 +2124,8 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages, memory_notify(MEM_OFFLINE, &arg); remove_pfn_range_from_zone(zone, start_pfn, nr_pages); + offline_pages_end(); + return 0; failed_removal_isolated: @@ -2121,6 +2140,8 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages, (unsigned long long) start_pfn << PAGE_SHIFT, ((unsigned long long) end_pfn << PAGE_SHIFT) - 1, reason); + + offline_pages_end(); return ret; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1780df31d5f5..acdab6b114a5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3563,6 +3563,12 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, if (page) goto out; + /* hot-remove is on-going, it generally fails to allocate memory from + * the being removed memory node. Leave it alone. + */ + if (is_offlining_node(*ac->nodemask)) + goto out; + /* Coredumps can quickly deplete all memory reserves */ if (current->flags & PF_DUMPCORE) goto out; -- 2.29.2