+ mm-fix-endless-reclaim-on-machines-with-unaccepted-memory.patch added to mm-hotfixes-unstable branch

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The patch titled
     Subject: mm: fix endless reclaim on machines with unaccepted memory.
has been added to the -mm mm-hotfixes-unstable branch.  Its filename is
     mm-fix-endless-reclaim-on-machines-with-unaccepted-memory.patch

This patch will shortly appear at
     https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-fix-endless-reclaim-on-machines-with-unaccepted-memory.patch

This patch will later appear in the mm-hotfixes-unstable branch at
    git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***

The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days

------------------------------------------------------
From: "Kirill A. Shutemov" <kirill.shutemov@xxxxxxxxxxxxxxx>
Subject: mm: fix endless reclaim on machines with unaccepted memory.
Date: Tue, 16 Jul 2024 16:00:13 +0300

Unaccepted memory is considered unusable free memory, which is not counted
as free on the zone watermark check.  This causes get_page_from_freelist()
to accept more memory to hit the high watermark, but it creates problems
in the reclaim path.

The reclaim path encounters a failed zone watermark check and attempts to
reclaim memory.  This is usually successful, but if there is little or no
reclaimable memory, it can result in endless reclaim with little to no
progress.  This can occur early in the boot process, just after start of
the init process when the only reclaimable memory is the page cache of the
init executable and its libraries.

To address this issue, teach shrink_node() and shrink_zones() to accept
memory before attempting to reclaim.

Link: https://lkml.kernel.org/r/20240716130013.1997325-1-kirill.shutemov@xxxxxxxxxxxxxxx
Fixes: dcdfdd40fa82 ("mm: Add support for unaccepted memory")
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
Reported-by: Jianxiong Gao <jxgao@xxxxxxxxxx>
Cc: Borislav Petkov (AMD) <bp@xxxxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxx>
Cc: Mike Rapoport (IBM) <rppt@xxxxxxxxxx>
Cc: Tom Lendacky <thomas.lendacky@xxxxxxx>
Cc: Vlastimil Babka <vbabka@xxxxxxx>
Cc: stable@xxxxxxxxxxxxxxx	[6.5+]
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
---

 mm/internal.h   |    9 +++++++++
 mm/page_alloc.c |    8 +-------
 mm/vmscan.c     |   36 ++++++++++++++++++++++++++++++++++++
 3 files changed, 46 insertions(+), 7 deletions(-)

--- a/mm/internal.h~mm-fix-endless-reclaim-on-machines-with-unaccepted-memory
+++ a/mm/internal.h
@@ -1515,4 +1515,13 @@ static inline void shrinker_debugfs_remo
 void workingset_update_node(struct xa_node *node);
 extern struct list_lru shadow_nodes;
 
+#ifdef CONFIG_UNACCEPTED_MEMORY
+bool try_to_accept_memory(struct zone *zone, unsigned int order);
+#else
+static inline bool try_to_accept_memory(struct zone *zone, unsigned int order)
+{
+	return false;
+}
+#endif /* CONFIG_UNACCEPTED_MEMORY */
+
 #endif	/* __MM_INTERNAL_H */
--- a/mm/page_alloc.c~mm-fix-endless-reclaim-on-machines-with-unaccepted-memory
+++ a/mm/page_alloc.c
@@ -287,7 +287,6 @@ EXPORT_SYMBOL(nr_online_nodes);
 
 static bool page_contains_unaccepted(struct page *page, unsigned int order);
 static void accept_page(struct page *page, unsigned int order);
-static bool try_to_accept_memory(struct zone *zone, unsigned int order);
 static inline bool has_unaccepted_memory(void);
 static bool __free_unaccepted(struct page *page);
 
@@ -6940,7 +6939,7 @@ static bool try_to_accept_memory_one(str
 	return true;
 }
 
-static bool try_to_accept_memory(struct zone *zone, unsigned int order)
+bool try_to_accept_memory(struct zone *zone, unsigned int order)
 {
 	long to_accept;
 	int ret = false;
@@ -6999,11 +6998,6 @@ static void accept_page(struct page *pag
 {
 }
 
-static bool try_to_accept_memory(struct zone *zone, unsigned int order)
-{
-	return false;
-}
-
 static inline bool has_unaccepted_memory(void)
 {
 	return false;
--- a/mm/vmscan.c~mm-fix-endless-reclaim-on-machines-with-unaccepted-memory
+++ a/mm/vmscan.c
@@ -5900,12 +5900,44 @@ static void shrink_node_memcgs(pg_data_t
 	} while ((memcg = mem_cgroup_iter(target_memcg, memcg, NULL)));
 }
 
+#ifdef CONFIG_UNACCEPTED_MEMORY
+static bool node_try_to_accept_memory(pg_data_t *pgdat, struct scan_control *sc)
+{
+	bool progress = false;
+	struct zone *zone;
+	int z;
+
+	for (z = 0; z <= sc->reclaim_idx; z++) {
+		zone = pgdat->node_zones + z;
+		if (!managed_zone(zone))
+			continue;
+
+		if (try_to_accept_memory(zone, sc->order))
+			progress = true;
+	}
+
+	return progress;
+}
+#else
+static inline bool node_try_to_accept_memory(pg_data_t *pgdat,
+					     struct scan_control *sc)
+{
+	return false;
+}
+#endif /* CONFIG_UNACCEPTED_MEMORY */
+
 static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 {
 	unsigned long nr_reclaimed, nr_scanned, nr_node_reclaimed;
 	struct lruvec *target_lruvec;
 	bool reclaimable = false;
 
+	/* Try to accept memory before going for reclaim */
+	if (node_try_to_accept_memory(pgdat, sc)) {
+		if (!should_continue_reclaim(pgdat, 0, sc))
+			return;
+	}
+
 	if (lru_gen_enabled() && root_reclaim(sc)) {
 		lru_gen_shrink_node(pgdat, sc);
 		return;
@@ -6118,6 +6150,10 @@ static void shrink_zones(struct zonelist
 						 GFP_KERNEL | __GFP_HARDWALL))
 				continue;
 
+			/* Try to accept memory before going for reclaim */
+			if (try_to_accept_memory(zone, sc->order))
+				continue;
+
 			/*
 			 * If we already have plenty of memory free for
 			 * compaction in this zone, don't free any more.
_

Patches currently in -mm which might be from kirill.shutemov@xxxxxxxxxxxxxxx are

mm-fix-endless-reclaim-on-machines-with-unaccepted-memory.patch





[Index of Archives]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux