From: Claudiu Beznea <claudiu.beznea.uj@xxxxxxxxxxxxxx> Commit 52166607ecc9 ("mm: restrict the pcp batch scale factor to avoid too long latency") introduced default PCP (Per-CPU Pageset) batch size as a configuration flag. The configuration flag is CONFIG_PCP_BATCH_SCALE_MAX. The ARM64 defconfig has CONFIG_PCP_BATCH_SCALE_MAX=5. This defconfig is used by a high range of SoCs. The Renesas RZ/G3S SoC is a single CPU SoC, with L1$ (I-cache 32Kbytes, D-cache 32 Kbytes), L3$ (256 Kbytes), but no L2$. It is currently used in a configuration with 1 GiB RAM size. In this configuration, starting with commit 52166607ecc9 ("mm: restrict the pcp batch scale factor to avoid too long latency") the "bonnie++ -d /mnt -u root" benchmark takes ~14 minutes while previously it took ~10 minutes. The /mnt directory is mounted on SD card. Same behavior is reproduced on similar Renesas single core devices (e.g., Renesas RZ/G2UL). Add a new kernel parameter to allow systems like Renesas RZ/G3S to continue have the same performance numbers with the default mainline ARM64 config. With pcp_batch_scale_max=5 (the default value) the bonnie++ benchmark takes ~14 minutes while with pcp_batch_scale_max=0 it takes ~10 minutes. Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@xxxxxxxxxxxxxx> --- .../admin-guide/kernel-parameters.txt | 6 +++++ mm/page_alloc.c | 26 ++++++++++++++----- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index e7bfe1bde49e..ce745ea78470 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4716,6 +4716,12 @@ for debug and development, but should not be needed on a platform with proper driver support. + pcp_batch_scale_max=n + Format: <integer> + Range: 0,6 : number + Default : CONFIG_PCP_BATCH_SCALE_MAX + Used for setting the scale number for PCP batch scale algorithm. + pdcchassis= [PARISC,HW] Disable/Enable PDC Chassis Status codes at boot time. Format: { 0 | 1 } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bc55d39eb372..ef1d37cefb43 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -163,6 +163,20 @@ static DEFINE_MUTEX(pcp_batch_high_lock); #define pcp_spin_unlock(ptr) \ pcpu_spin_unlock(lock, ptr) +static unsigned int pcp_batch_scale_max = CONFIG_PCP_BATCH_SCALE_MAX; +#define MAX_PCP_BATCH 6 + +static int __init setup_pcp_batch_scale_max(char *str) +{ + get_option(&str, (unsigned int *)&pcp_batch_scale_max); + + if (pcp_batch_scale_max > MAX_PCP_BATCH) + pcp_batch_scale_max = MAX_PCP_BATCH; + + return 1; +} +__setup("pcp_batch_scale_max=", setup_pcp_batch_scale_max); + #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID DEFINE_PER_CPU(int, numa_node); EXPORT_PER_CPU_SYMBOL(numa_node); @@ -2362,7 +2376,7 @@ int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp) * control latency. This caps pcp->high decrement too. */ if (pcp->high > high_min) { - pcp->high = max3(pcp->count - (batch << CONFIG_PCP_BATCH_SCALE_MAX), + pcp->high = max3(pcp->count - (batch << pcp_batch_scale_max), pcp->high - (pcp->high >> 3), high_min); if (pcp->high > high_min) todo++; @@ -2412,7 +2426,7 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone) count = pcp->count; if (count) { int to_drain = min(count, - pcp->batch << CONFIG_PCP_BATCH_SCALE_MAX); + pcp->batch << pcp_batch_scale_max); free_pcppages_bulk(zone, to_drain, pcp, 0); count -= to_drain; @@ -2540,7 +2554,7 @@ static int nr_pcp_free(struct per_cpu_pages *pcp, int batch, int high, bool free /* Free as much as possible if batch freeing high-order pages. */ if (unlikely(free_high)) - return min(pcp->count, batch << CONFIG_PCP_BATCH_SCALE_MAX); + return min(pcp->count, batch << pcp_batch_scale_max); /* Check for PCP disabled or boot pageset */ if (unlikely(high < batch)) @@ -2572,7 +2586,7 @@ static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone, return 0; if (unlikely(free_high)) { - pcp->high = max(high - (batch << CONFIG_PCP_BATCH_SCALE_MAX), + pcp->high = max(high - (batch << pcp_batch_scale_max), high_min); return 0; } @@ -2642,7 +2656,7 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp, } else if (pcp->flags & PCPF_PREV_FREE_HIGH_ORDER) { pcp->flags &= ~PCPF_PREV_FREE_HIGH_ORDER; } - if (pcp->free_count < (batch << CONFIG_PCP_BATCH_SCALE_MAX)) + if (pcp->free_count < (batch << pcp_batch_scale_max)) pcp->free_count += (1 << order); high = nr_pcp_high(pcp, zone, batch, free_high); if (pcp->count >= high) { @@ -2984,7 +2998,7 @@ static int nr_pcp_alloc(struct per_cpu_pages *pcp, struct zone *zone, int order) * subsequent allocation of order-0 pages without any freeing. */ if (batch <= max_nr_alloc && - pcp->alloc_factor < CONFIG_PCP_BATCH_SCALE_MAX) + pcp->alloc_factor < pcp_batch_scale_max) pcp->alloc_factor++; batch = min(batch, max_nr_alloc); } -- 2.39.2