Re: [PATCH] mm: Stop kswapd early when nothing's waiting for it to free pages

Mel Gorman <mgorman@xxxxxxx> · Wed, 19 Feb 2020 21:45:13 +0000

On Wed, Feb 19, 2020 at 12:42:20PM -0800, Sultan Alsawaf wrote:
> > Again, do you have more details about the workload and what was the
> > cause of responsiveness issues? Because I would expect that the
> > situation would be quite opposite because it is usually the direct
> > reclaim that is a source of stalls visible from userspace. Or is this
> > about a single CPU situation where kswapd saturates the single CPU and
> > all other tasks are just not getting enough CPU cycles?
> 
> The workload was having lots of applications open at once. At a certain point
> when memory ran low, my system became sluggish and kswapd CPU usage skyrocketed.
> I added printks into kswapd with this patch, and my premature exit in kswapd
> kicked in quite often.
> 

This could be watermark boosting run wild again. Can you test with
sysctl vm.watermark_boost_factor=0 or the following patch? (preferably
both to compare and contrast).

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 572fb17c6273..71dd47172cef 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3462,6 +3462,25 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int classzone_idx)
 	return false;
 }
 
+static void acct_boosted_reclaim(pg_data_t *pgdat, int classzone_idx,
+				unsigned long *zone_boosts)
+{
+	struct zone *zone;
+	unsigned long flags;
+	int i;
+
+	for (i = 0; i <= classzone_idx; i++) {
+		if (!zone_boosts[i])
+			continue;
+
+		/* Increments are under the zone lock */
+		zone = pgdat->node_zones + i;
+		spin_lock_irqsave(&zone->lock, flags);
+		zone->watermark_boost -= min(zone->watermark_boost, zone_boosts[i]);
+		spin_unlock_irqrestore(&zone->lock, flags);
+	}
+}
+
 /* Clear pgdat state for congested, dirty or under writeback. */
 static void clear_pgdat_congested(pg_data_t *pgdat)
 {
@@ -3654,9 +3673,17 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
 		if (!nr_boost_reclaim && balanced)
 			goto out;
 
-		/* Limit the priority of boosting to avoid reclaim writeback */
-		if (nr_boost_reclaim && sc.priority == DEF_PRIORITY - 2)
-			raise_priority = false;
+		/*
+		 * Abort boosting if reclaiming at higher priority is not
+		 * working to avoid excessive reclaim due to lower zones
+		 * being boosted.
+		 */
+		if (nr_boost_reclaim && sc.priority == DEF_PRIORITY - 2) {
+			acct_boosted_reclaim(pgdat, classzone_idx, zone_boosts);
+			boosted = false;
+			nr_boost_reclaim = 0;
+			goto restart;
+		}
 
 		/*
 		 * Do not writeback or swap pages for boosted reclaim. The
@@ -3738,18 +3765,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
 out:
 	/* If reclaim was boosted, account for the reclaim done in this pass */
 	if (boosted) {
-		unsigned long flags;
-
-		for (i = 0; i <= classzone_idx; i++) {
-			if (!zone_boosts[i])
-				continue;
-
-			/* Increments are under the zone lock */
-			zone = pgdat->node_zones + i;
-			spin_lock_irqsave(&zone->lock, flags);
-			zone->watermark_boost -= min(zone->watermark_boost, zone_boosts[i]);
-			spin_unlock_irqrestore(&zone->lock, flags);
-		}
+		acct_boosted_reclaim(pgdat, classzone_idx, zone_boosts);
 
 		/*
 		 * As there is now likely space, wakeup kcompact to defragment