[PATCH v3 05/13] mm, compaction: report compaction as contended only due to lock contention

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Async compaction aborts when it detects zone lock contention or need_resched()
is true. David Rientjes has reported that in practice, most direct async
compactions for THP allocation abort due to need_resched(). This means that a
second direct compaction is never attempted, which might be OK for a page
fault, but khugepaged is intended to attempt a sync compaction in such case and
in these cases it won't.

This patch replaces "bool contended" in compact_control with an enum that
distinguieshes between aborting due to need_resched() and aborting due to lock
contention. This allows propagating the abort through all compaction functions
as before, but declaring the direct compaction as contended only when lock
contention has been detected.

A second problem is that try_to_compact_pages() did not act upon the reported
contention (both need_resched() or lock contention) and could proceed with
another zone from the zonelist. When need_resched() is true, that means
initializing another zone compaction, only to check again need_resched() in
isolate_migratepages() and aborting. For zone lock contention, the unintended
consequence is that the contended status reported back to the allocator
is decided from the last zone where compaction was attempted, which is rather
arbitrary.

This patch fixes the problem in the following way:
- need_resched() being true after async compaction returned from a zone means
  that further zones should not be tried. We do a cond_resched() so that we
  do not hog the CPU, and abort. "contended" is reported as false, since we
  did not fail due to lock contention.
- aborting zone compaction due to lock contention means we can still try
  another zone, since it has different locks. We report back "contended" as
  true only if *all* zones where compaction was attempted, it aborted due to
  lock contention.

As a result of these fixes, khugepaged will proceed with second sync compaction
as intended, when the preceding async compaction aborted due to need_resched().
Page fault compactions aborting due to need_resched() will spare some cycles
previously wasted by initializing another zone compaction only to abort again.
Lock contention will be reported only when compaction in all zones aborted due
to lock contention, and therefore it's not a good idea to try again after
reclaim.

Reported-by: David Rientjes <rientjes@xxxxxxxxxx>
Signed-off-by: Vlastimil Babka <vbabka@xxxxxxx>
Cc: Minchan Kim <minchan@xxxxxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxx>
Cc: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx>
Cc: Michal Nazarewicz <mina86@xxxxxxxxxx>
Cc: Naoya Horiguchi <n-horiguchi@xxxxxxxxxxxxx>
Cc: Christoph Lameter <cl@xxxxxxxxx>
Cc: Rik van Riel <riel@xxxxxxxxxx>
---
 mm/compaction.c | 48 +++++++++++++++++++++++++++++++++++++++---------
 mm/internal.h   | 15 +++++++++++----
 2 files changed, 50 insertions(+), 13 deletions(-)

diff --git a/mm/compaction.c b/mm/compaction.c
index ebe30c9..e8cfac9 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -180,9 +180,14 @@ static void update_pageblock_skip(struct compact_control *cc,
 }
 #endif /* CONFIG_COMPACTION */
 
-static inline bool should_release_lock(spinlock_t *lock)
+enum compact_contended should_release_lock(spinlock_t *lock)
 {
-	return need_resched() || spin_is_contended(lock);
+	if (spin_is_contended(lock))
+		return COMPACT_CONTENDED_LOCK;
+	else if (need_resched())
+		return COMPACT_CONTENDED_SCHED;
+	else
+		return COMPACT_CONTENDED_NONE;
 }
 
 /*
@@ -197,7 +202,9 @@ static inline bool should_release_lock(spinlock_t *lock)
 static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags,
 				      bool locked, struct compact_control *cc)
 {
-	if (should_release_lock(lock)) {
+	enum compact_contended contended = should_release_lock(lock);
+
+	if (contended) {
 		if (locked) {
 			spin_unlock_irqrestore(lock, *flags);
 			locked = false;
@@ -205,7 +212,7 @@ static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags,
 
 		/* async aborts if taking too long or contended */
 		if (cc->mode == MIGRATE_ASYNC) {
-			cc->contended = true;
+			cc->contended = contended;
 			return false;
 		}
 
@@ -231,7 +238,7 @@ static inline bool compact_should_abort(struct compact_control *cc)
 	/* async compaction aborts if contended */
 	if (need_resched()) {
 		if (cc->mode == MIGRATE_ASYNC) {
-			cc->contended = true;
+			cc->contended = COMPACT_CONTENDED_SCHED;
 			return true;
 		}
 
@@ -1101,7 +1108,8 @@ static unsigned long compact_zone_order(struct zone *zone, int order,
 	VM_BUG_ON(!list_empty(&cc.freepages));
 	VM_BUG_ON(!list_empty(&cc.migratepages));
 
-	*contended = cc.contended;
+	/* We only signal lock contention back to the allocator */
+	*contended = cc.contended == COMPACT_CONTENDED_LOCK;
 	return ret;
 }
 
@@ -1132,6 +1140,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 	struct zone *zone;
 	int rc = COMPACT_SKIPPED;
 	int alloc_flags = 0;
+	bool all_zones_contended = true;
 
 	/* Check if the GFP flags allow compaction */
 	if (!order || !may_enter_fs || !may_perform_io)
@@ -1146,6 +1155,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 	for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
 								nodemask) {
 		int status;
+		bool zone_contended;
 
 		if (compaction_deferred(zone, order))
 			continue;
@@ -1153,8 +1163,9 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 		*deferred = false;
 
 		status = compact_zone_order(zone, order, gfp_mask, mode,
-						contended);
+							&zone_contended);
 		rc = max(status, rc);
+		all_zones_contended &= zone_contended;
 
 		/* If a normal allocation would succeed, stop compacting */
 		if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0,
@@ -1168,12 +1179,31 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			 * succeeding after all, it will be reset.
 			 */
 			defer_compaction(zone, order);
+			/*
+			 * If we stopped compacting due to need_resched(), do
+			 * not try further zones and yield the CPU.
+			 */
+			if (need_resched()) {
+				/*
+				 * We might not have tried all the zones, so
+				 * be conservative and assume they are not
+				 * all lock contended.
+				 */
+				all_zones_contended = false;
+				cond_resched();
+				break;
+			}
 		}
 	}
 
-	/* If at least one zone wasn't deferred, we count a compaction stall */
-	if (!*deferred)
+	/*
+	 * If at least one zone wasn't deferred, we count a compaction stall
+	 * and we report if all zones that were tried were contended.
+	 */
+	if (!*deferred) {
 		count_compact_event(COMPACTSTALL);
+		*contended = all_zones_contended;
+	}
 
 	return rc;
 }
diff --git a/mm/internal.h b/mm/internal.h
index a1b651b..2c187d2 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -117,6 +117,13 @@ extern int user_min_free_kbytes;
 
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
 
+/* Used to signal whether compaction detected need_sched() or lock contention */
+enum compact_contended {
+	COMPACT_CONTENDED_NONE = 0, /* no contention detected */
+	COMPACT_CONTENDED_SCHED,    /* need_sched() was true */
+	COMPACT_CONTENDED_LOCK,     /* zone lock or lru_lock was contended */
+};
+
 /*
  * in mm/compaction.c
  */
@@ -144,10 +151,10 @@ struct compact_control {
 	int order;			/* order a direct compactor needs */
 	int migratetype;		/* MOVABLE, RECLAIMABLE etc */
 	struct zone *zone;
-	bool contended;			/* True if a lock was contended, or
-					 * need_resched() true during async
-					 * compaction
-					 */
+	enum compact_contended contended; /* Signal need_sched() or lock
+					   * contention detected during
+					   * compaction
+					   */
 };
 
 unsigned long
-- 
1.8.4.5

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]