[no subject]

**Date** **Thread**

netperf-tcp
                            5.17.0-rc3             5.17.0-rc3            =
 5.17.0-rc3
                               vanilla    mm-reverthighpcp-v1     mm-high=
pcplimit-v1
Min       64         935.38 (   0.00%)      939.40 (   0.43%)      940.11=
 (   0.51%)
Min       128       1831.69 (   0.00%)     1856.15 (   1.34%)     1849.30=
 (   0.96%)
Min       256       3560.61 (   0.00%)     3659.25 (   2.77%)     3654.12=
 (   2.63%)
Min       1024     13165.24 (   0.00%)    13444.74 (   2.12%)    13281.71=
 (   0.88%)
Min       2048     22706.44 (   0.00%)    23219.67 (   2.26%)    23027.31=
 (   1.41%)
Min       3312     30960.26 (   0.00%)    31985.01 (   3.31%)    31484.40=
 (   1.69%)
Min       4096     35149.03 (   0.00%)    35997.44 (   2.41%)    35891.92=
 (   2.11%)
Min       8192     48064.73 (   0.00%)    49574.05 (   3.14%)    48928.89=
 (   1.80%)
Min       16384    58017.25 (   0.00%)    60352.93 (   4.03%)    60691.14=
 (   4.61%)
Hmean     64         938.95 (   0.00%)      941.50 *   0.27%*      940.47=
 (   0.16%)
Hmean     128       1843.10 (   0.00%)     1857.58 *   0.79%*     1855.83=
 *   0.69%*
Hmean     256       3573.07 (   0.00%)     3667.45 *   2.64%*     3662.08=
 *   2.49%*
Hmean     1024     13206.52 (   0.00%)    13487.80 *   2.13%*    13351.11=
 *   1.09%*
Hmean     2048     22870.23 (   0.00%)    23337.96 *   2.05%*    23149.68=
 *   1.22%*
Hmean     3312     31001.99 (   0.00%)    32206.50 *   3.89%*    31849.40=
 *   2.73%*
Hmean     4096     35364.59 (   0.00%)    36490.96 *   3.19%*    36112.91=
 *   2.12%*
Hmean     8192     48497.71 (   0.00%)    49954.05 *   3.00%*    49384.50=
 *   1.83%*
Hmean     16384    58410.86 (   0.00%)    60839.80 *   4.16%*    61362.12=
 *   5.05%*

Note that this was a machine that did not benefit from caching high-order
pages and performance is almost restored with the series applied. It's no=
t
fully restored as cache misses are still higher. This is a trade-off
between optimising for a workload that does all allocs on one CPU and fre=
es
on another or more general workloads that need high-order pages for SLUB
and benefit from avoiding zone->lock for every SLUB refill/drain.

Signed-off-by: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx>
---
 mm/page_alloc.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6881175b27df..cfb3cbad152c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3314,10 +3314,15 @@ static bool free_unref_page_prepare(struct page *=
page, unsigned long pfn,
 	return true;
 }
=20
-static int nr_pcp_free(struct per_cpu_pages *pcp, int high, int batch)
+static int nr_pcp_free(struct per_cpu_pages *pcp, int high, int batch,
+		       bool free_high)
 {
 	int min_nr_free, max_nr_free;
=20
+	/* Free everything if batch freeing high-order pages. */
+	if (unlikely(free_high))
+		return pcp->count;
+
 	/* Check for PCP disabled or boot pageset */
 	if (unlikely(high < batch))
 		return 1;
@@ -3338,11 +3343,12 @@ static int nr_pcp_free(struct per_cpu_pages *pcp,=
 int high, int batch)
 	return batch;
 }
=20
-static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone)
+static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone,
+		       bool free_high)
 {
 	int high =3D READ_ONCE(pcp->high);
=20
-	if (unlikely(!high))
+	if (unlikely(!high || free_high))
 		return 0;
=20
 	if (!test_bit(ZONE_RECLAIM_ACTIVE, &zone->flags))
@@ -3362,17 +3368,27 @@ static void free_unref_page_commit(struct page *p=
age, unsigned long pfn,
 	struct per_cpu_pages *pcp;
 	int high;
 	int pindex;
+	bool free_high;
=20
 	__count_vm_event(PGFREE);
 	pcp =3D this_cpu_ptr(zone->per_cpu_pageset);
 	pindex =3D order_to_pindex(migratetype, order);
 	list_add(&page->lru, &pcp->lists[pindex]);
 	pcp->count +=3D 1 << order;
-	high =3D nr_pcp_high(pcp, zone);
+
+	/*
+	 * As high-order pages other than THP's stored on PCP can contribute
+	 * to fragmentation, limit the number stored when PCP is heavily
+	 * freeing without allocation. The remainder after bulk freeing
+	 * stops will be drained from vmstat refresh context.
+	 */
+	free_high =3D (pcp->free_factor && order && order <=3D PAGE_ALLOC_COSTL=
Y_ORDER);
+
+	high =3D nr_pcp_high(pcp, zone, free_high);
 	if (pcp->count >=3D high) {
 		int batch =3D READ_ONCE(pcp->batch);
=20
-		free_pcppages_bulk(zone, nr_pcp_free(pcp, high, batch), pcp, pindex);
+		free_pcppages_bulk(zone, nr_pcp_free(pcp, high, batch, free_high), pcp=
, pindex);
 	}
 }
=20
--=20
2.31.1