Re: [PATCH 9/9] mm: zone_reclaim: compaction: add compaction to zone_reclaim_mode

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Aug 02, 2013 at 06:06:36PM +0200, Andrea Arcangeli wrote:
> +		need_compaction = false;

This should be changed to "*need_compaction = false". It's actually a
cleanup because it's a nooperational change at runtime.
need_compaction was initialized to false by the only caller so it
couldn't harm. But it's better to fix it to avoid
confusion. Alternatively the above line can be dropped entirely but I
thought it was cleaner to have a defined value as result of the
function.

Found by Fengguang kbuild robot.

A new replacement patch 9/9 is appended below:

===
From: Andrea Arcangeli <aarcange@xxxxxxxxxx>
Subject: [PATCH] mm: zone_reclaim: compaction: add compaction to
 zone_reclaim_mode

This adds compaction to zone_reclaim so THP enabled won't decrease the
NUMA locality with /proc/sys/vm/zone_reclaim_mode > 0.

It is important to boot with numa_zonelist_order=n (n means nodes) to
get more accurate NUMA locality if there are multiple zones per node.

Signed-off-by: Andrea Arcangeli <aarcange@xxxxxxxxxx>
---
 include/linux/swap.h |   8 +++-
 mm/page_alloc.c      |   4 +-
 mm/vmscan.c          | 111 ++++++++++++++++++++++++++++++++++++++++++---------
 3 files changed, 102 insertions(+), 21 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index d95cde5..d076a54 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -289,10 +289,14 @@ extern unsigned long vm_total_pages;
 extern int zone_reclaim_mode;
 extern int sysctl_min_unmapped_ratio;
 extern int sysctl_min_slab_ratio;
-extern int zone_reclaim(struct zone *, gfp_t, unsigned int);
+extern int zone_reclaim(struct zone *, struct zone *, gfp_t, unsigned int,
+			unsigned long, int, int);
 #else
 #define zone_reclaim_mode 0
-static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
+static inline int zone_reclaim(struct zone *preferred_zone, struct zone *zone,
+			       gfp_t mask, unsigned int order,
+			       unsigned long mark, int classzone_idx,
+			       int alloc_flags)
 {
 	return 0;
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 879a3fd..c0bdde6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1982,7 +1982,9 @@ zonelist_scan:
 				!zlc_zone_worth_trying(zonelist, z, allowednodes))
 				continue;
 
-			ret = zone_reclaim(zone, gfp_mask, order);
+			ret = zone_reclaim(preferred_zone, zone, gfp_mask,
+					   order,
+					   mark, classzone_idx, alloc_flags);
 			switch (ret) {
 			case ZONE_RECLAIM_NOSCAN:
 				/* did not scan */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f2ada36..fedb246 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3488,6 +3488,24 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	unsigned long nr_slab_pages0, nr_slab_pages1;
 
 	cond_resched();
+
+	/*
+	 * Zone reclaim reclaims unmapped file backed pages and
+	 * slab pages if we are over the defined limits.
+	 *
+	 * A small portion of unmapped file backed pages is needed for
+	 * file I/O otherwise pages read by file I/O will be immediately
+	 * thrown out if the zone is overallocated. So we do not reclaim
+	 * if less than a specified percentage of the zone is used by
+	 * unmapped file backed pages.
+	 */
+	if (zone_pagecache_reclaimable(zone) <= zone->min_unmapped_pages &&
+	    zone_page_state(zone, NR_SLAB_RECLAIMABLE) <= zone->min_slab_pages)
+		return ZONE_RECLAIM_FULL;
+
+	if (zone->all_unreclaimable)
+		return ZONE_RECLAIM_FULL;
+
 	/*
 	 * We need to be able to allocate from the reserves for RECLAIM_SWAP
 	 * and we also need to be able to write out pages for RECLAIM_WRITE
@@ -3549,27 +3567,35 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	return sc.nr_reclaimed >= nr_pages;
 }
 
-int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
+static int zone_reclaim_compact(struct zone *preferred_zone,
+				struct zone *zone, gfp_t gfp_mask,
+				unsigned int order,
+				bool sync_compaction,
+				bool *need_compaction)
 {
-	int node_id;
-	int ret;
+	bool contended;
 
-	/*
-	 * Zone reclaim reclaims unmapped file backed pages and
-	 * slab pages if we are over the defined limits.
-	 *
-	 * A small portion of unmapped file backed pages is needed for
-	 * file I/O otherwise pages read by file I/O will be immediately
-	 * thrown out if the zone is overallocated. So we do not reclaim
-	 * if less than a specified percentage of the zone is used by
-	 * unmapped file backed pages.
-	 */
-	if (zone_pagecache_reclaimable(zone) <= zone->min_unmapped_pages &&
-	    zone_page_state(zone, NR_SLAB_RECLAIMABLE) <= zone->min_slab_pages)
-		return ZONE_RECLAIM_FULL;
+	if (compaction_deferred(preferred_zone, order) ||
+	    !order ||
+	    (gfp_mask & (__GFP_FS|__GFP_IO)) != (__GFP_FS|__GFP_IO)) {
+		*need_compaction = false;
+		return COMPACT_SKIPPED;
+	}
 
-	if (zone->all_unreclaimable)
-		return ZONE_RECLAIM_FULL;
+	*need_compaction = true;
+	return compact_zone_order(zone, order,
+				  gfp_mask,
+				  sync_compaction,
+				  &contended);
+}
+
+int zone_reclaim(struct zone *preferred_zone, struct zone *zone,
+		 gfp_t gfp_mask, unsigned int order,
+		 unsigned long mark, int classzone_idx, int alloc_flags)
+{
+	int node_id;
+	int ret, c_ret;
+	bool sync_compaction = false, need_compaction = false;
 
 	/*
 	 * Do not scan if the allocation should not be delayed.
@@ -3587,7 +3613,56 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	if (node_state(node_id, N_CPU) && node_id != numa_node_id())
 		return ZONE_RECLAIM_NOSCAN;
 
+repeat_compaction:
+	/*
+	 * If this allocation may be satisfied by memory compaction,
+	 * run compaction before reclaim.
+	 */
+	c_ret = zone_reclaim_compact(preferred_zone,
+				     zone, gfp_mask, order,
+				     sync_compaction,
+				     &need_compaction);
+	if (need_compaction &&
+	    c_ret != COMPACT_SKIPPED &&
+	    zone_watermark_ok(zone, order, mark,
+			      classzone_idx,
+			      alloc_flags)) {
+#ifdef CONFIG_COMPACTION
+		zone->compact_considered = 0;
+		zone->compact_defer_shift = 0;
+#endif
+		return ZONE_RECLAIM_SUCCESS;
+	}
+
+	/*
+	 * reclaim if compaction failed because not enough memory was
+	 * available or if compaction didn't run (order 0) or didn't
+	 * succeed.
+	 */
 	ret = __zone_reclaim(zone, gfp_mask, order);
+	if (ret == ZONE_RECLAIM_SUCCESS) {
+		if (zone_watermark_ok(zone, order, mark,
+				      classzone_idx,
+				      alloc_flags))
+			return ZONE_RECLAIM_SUCCESS;
+
+		/*
+		 * If compaction run but it was skipped and reclaim was
+		 * successful keep going.
+		 */
+		if (need_compaction && c_ret == COMPACT_SKIPPED) {
+			/*
+			 * If it's ok to wait for I/O we can as well run sync
+			 * compaction
+			 */
+			sync_compaction = !!(zone_reclaim_mode &
+					     (RECLAIM_WRITE|RECLAIM_SWAP));
+			cond_resched();
+			goto repeat_compaction;
+		}
+	}
+	if (need_compaction)
+		defer_compaction(preferred_zone, order);
 
 	if (!ret)
 		count_vm_event(PGSCAN_ZONE_RECLAIM_FAILED);

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]