netperf-tcp 5.17.0-rc3 5.17.0-rc3 = 5.17.0-rc3 vanilla mm-reverthighpcp-v1 mm-high= pcplimit-v1 Min 64 935.38 ( 0.00%) 939.40 ( 0.43%) 940.11= ( 0.51%) Min 128 1831.69 ( 0.00%) 1856.15 ( 1.34%) 1849.30= ( 0.96%) Min 256 3560.61 ( 0.00%) 3659.25 ( 2.77%) 3654.12= ( 2.63%) Min 1024 13165.24 ( 0.00%) 13444.74 ( 2.12%) 13281.71= ( 0.88%) Min 2048 22706.44 ( 0.00%) 23219.67 ( 2.26%) 23027.31= ( 1.41%) Min 3312 30960.26 ( 0.00%) 31985.01 ( 3.31%) 31484.40= ( 1.69%) Min 4096 35149.03 ( 0.00%) 35997.44 ( 2.41%) 35891.92= ( 2.11%) Min 8192 48064.73 ( 0.00%) 49574.05 ( 3.14%) 48928.89= ( 1.80%) Min 16384 58017.25 ( 0.00%) 60352.93 ( 4.03%) 60691.14= ( 4.61%) Hmean 64 938.95 ( 0.00%) 941.50 * 0.27%* 940.47= ( 0.16%) Hmean 128 1843.10 ( 0.00%) 1857.58 * 0.79%* 1855.83= * 0.69%* Hmean 256 3573.07 ( 0.00%) 3667.45 * 2.64%* 3662.08= * 2.49%* Hmean 1024 13206.52 ( 0.00%) 13487.80 * 2.13%* 13351.11= * 1.09%* Hmean 2048 22870.23 ( 0.00%) 23337.96 * 2.05%* 23149.68= * 1.22%* Hmean 3312 31001.99 ( 0.00%) 32206.50 * 3.89%* 31849.40= * 2.73%* Hmean 4096 35364.59 ( 0.00%) 36490.96 * 3.19%* 36112.91= * 2.12%* Hmean 8192 48497.71 ( 0.00%) 49954.05 * 3.00%* 49384.50= * 1.83%* Hmean 16384 58410.86 ( 0.00%) 60839.80 * 4.16%* 61362.12= * 5.05%* Note that this was a machine that did not benefit from caching high-order pages and performance is almost restored with the series applied. It's no= t fully restored as cache misses are still higher. This is a trade-off between optimising for a workload that does all allocs on one CPU and fre= es on another or more general workloads that need high-order pages for SLUB and benefit from avoiding zone->lock for every SLUB refill/drain. Signed-off-by: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> --- mm/page_alloc.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6881175b27df..cfb3cbad152c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3314,10 +3314,15 @@ static bool free_unref_page_prepare(struct page *= page, unsigned long pfn, return true; } =20 -static int nr_pcp_free(struct per_cpu_pages *pcp, int high, int batch) +static int nr_pcp_free(struct per_cpu_pages *pcp, int high, int batch, + bool free_high) { int min_nr_free, max_nr_free; =20 + /* Free everything if batch freeing high-order pages. */ + if (unlikely(free_high)) + return pcp->count; + /* Check for PCP disabled or boot pageset */ if (unlikely(high < batch)) return 1; @@ -3338,11 +3343,12 @@ static int nr_pcp_free(struct per_cpu_pages *pcp,= int high, int batch) return batch; } =20 -static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone) +static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone, + bool free_high) { int high =3D READ_ONCE(pcp->high); =20 - if (unlikely(!high)) + if (unlikely(!high || free_high)) return 0; =20 if (!test_bit(ZONE_RECLAIM_ACTIVE, &zone->flags)) @@ -3362,17 +3368,27 @@ static void free_unref_page_commit(struct page *p= age, unsigned long pfn, struct per_cpu_pages *pcp; int high; int pindex; + bool free_high; =20 __count_vm_event(PGFREE); pcp =3D this_cpu_ptr(zone->per_cpu_pageset); pindex =3D order_to_pindex(migratetype, order); list_add(&page->lru, &pcp->lists[pindex]); pcp->count +=3D 1 << order; - high =3D nr_pcp_high(pcp, zone); + + /* + * As high-order pages other than THP's stored on PCP can contribute + * to fragmentation, limit the number stored when PCP is heavily + * freeing without allocation. The remainder after bulk freeing + * stops will be drained from vmstat refresh context. + */ + free_high =3D (pcp->free_factor && order && order <=3D PAGE_ALLOC_COSTL= Y_ORDER); + + high =3D nr_pcp_high(pcp, zone, free_high); if (pcp->count >=3D high) { int batch =3D READ_ONCE(pcp->batch); =20 - free_pcppages_bulk(zone, nr_pcp_free(pcp, high, batch), pcp, pindex); + free_pcppages_bulk(zone, nr_pcp_free(pcp, high, batch, free_high), pcp= , pindex); } } =20 --=20 2.31.1