The patch titled Subject: mm/page_alloc: batch the accounting updates in the bulk allocator has been added to the -mm tree. Its filename is mm-page_alloc-batch-the-accounting-updates-in-the-bulk-allocator.patch This patch should soon appear at https://ozlabs.org/~akpm/mmots/broken-out/mm-page_alloc-batch-the-accounting-updates-in-the-bulk-allocator.patch and later at https://ozlabs.org/~akpm/mmotm/broken-out/mm-page_alloc-batch-the-accounting-updates-in-the-bulk-allocator.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> Subject: mm/page_alloc: batch the accounting updates in the bulk allocator Now that the zone_statistics are simple counters that do not require special protection, the bulk allocator accounting updates can be batch updated without adding too much complexity with protected RMW updates or using xchg. Link: https://lkml.kernel.org/r/20210512095458.30632-6-mgorman@xxxxxxxxxxxxxxxxxxx Signed-off-by: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> Acked-by: Vlastimil Babka <vbabka@xxxxxxx> Acked-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Cc: Chuck Lever <chuck.lever@xxxxxxxxxx> Cc: Ingo Molnar <mingo@xxxxxxxxxx> Cc: Jesper Dangaard Brouer <brouer@xxxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxxxx> Cc: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- include/linux/vmstat.h | 8 ++++++++ mm/page_alloc.c | 30 +++++++++++++----------------- 2 files changed, 21 insertions(+), 17 deletions(-) --- a/include/linux/vmstat.h~mm-page_alloc-batch-the-accounting-updates-in-the-bulk-allocator +++ a/include/linux/vmstat.h @@ -247,6 +247,14 @@ __count_numa_event(struct zone *zone, en raw_cpu_inc(pzstats->vm_numa_event[item]); } +static inline void +__count_numa_events(struct zone *zone, enum numa_stat_item item, long delta) +{ + struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats; + + raw_cpu_add(pzstats->vm_numa_event[item], delta); +} + extern unsigned long sum_zone_node_page_state(int node, enum zone_stat_item item); extern unsigned long sum_zone_numa_event_state(int node, enum numa_stat_item item); --- a/mm/page_alloc.c~mm-page_alloc-batch-the-accounting-updates-in-the-bulk-allocator +++ a/mm/page_alloc.c @@ -3456,7 +3456,8 @@ void __putback_isolated_page(struct page * * Must be called with interrupts disabled. */ -static inline void zone_statistics(struct zone *preferred_zone, struct zone *z) +static inline void zone_statistics(struct zone *preferred_zone, struct zone *z, + long nr_account) { #ifdef CONFIG_NUMA enum numa_stat_item local_stat = NUMA_LOCAL; @@ -3469,12 +3470,12 @@ static inline void zone_statistics(struc local_stat = NUMA_OTHER; if (zone_to_nid(z) == zone_to_nid(preferred_zone)) - __count_numa_event(z, NUMA_HIT); + __count_numa_events(z, NUMA_HIT, nr_account); else { - __count_numa_event(z, NUMA_MISS); - __count_numa_event(preferred_zone, NUMA_FOREIGN); + __count_numa_events(z, NUMA_MISS, nr_account); + __count_numa_events(preferred_zone, NUMA_FOREIGN, nr_account); } - __count_numa_event(z, local_stat); + __count_numa_events(z, local_stat, nr_account); #endif } @@ -3520,7 +3521,7 @@ static struct page *rmqueue_pcplist(stru page = __rmqueue_pcplist(zone, migratetype, alloc_flags, pcp, list); if (page) { __count_zid_vm_events(PGALLOC, page_zonenum(page), 1); - zone_statistics(preferred_zone, zone); + zone_statistics(preferred_zone, zone, 1); } local_unlock_irqrestore(&pagesets.lock, flags); return page; @@ -3581,7 +3582,7 @@ struct page *rmqueue(struct zone *prefer get_pcppage_migratetype(page)); __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); - zone_statistics(preferred_zone, zone); + zone_statistics(preferred_zone, zone, 1); local_irq_restore(flags); out: @@ -5063,7 +5064,7 @@ unsigned long __alloc_pages_bulk(gfp_t g struct alloc_context ac; gfp_t alloc_gfp; unsigned int alloc_flags = ALLOC_WMARK_LOW; - int nr_populated = 0; + int nr_populated = 0, nr_account = 0; if (unlikely(nr_pages <= 0)) return 0; @@ -5136,15 +5137,7 @@ unsigned long __alloc_pages_bulk(gfp_t g goto failed_irq; break; } - - /* - * Ideally this would be batched but the best way to do - * that cheaply is to first convert zone_statistics to - * be inaccurate per-cpu counter like vm_events to avoid - * a RMW cycle then do the accounting with IRQs enabled. - */ - __count_zid_vm_events(PGALLOC, zone_idx(zone), 1); - zone_statistics(ac.preferred_zoneref->zone, zone); + nr_account++; prep_new_page(page, 0, gfp, 0); if (page_list) @@ -5154,6 +5147,9 @@ unsigned long __alloc_pages_bulk(gfp_t g nr_populated++; } + __count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account); + zone_statistics(ac.preferred_zoneref->zone, zone, nr_account); + local_unlock_irqrestore(&pagesets.lock, flags); return nr_populated; _ Patches currently in -mm which might be from mgorman@xxxxxxxxxxxxxxxxxxx are mm-page_alloc-split-per-cpu-page-lists-and-zone-stats.patch mm-page_alloc-convert-per-cpu-list-protection-to-local_lock.patch mm-vmstat-convert-numa-statistics-to-basic-numa-counters.patch mm-vmstat-inline-numa-event-counter-updates.patch mm-page_alloc-batch-the-accounting-updates-in-the-bulk-allocator.patch mm-page_alloc-reduce-duration-that-irqs-are-disabled-for-vm-counters.patch mm-page_alloc-explicitly-acquire-the-zone-lock-in-__free_pages_ok.patch mm-page_alloc-avoid-conflating-irqs-disabled-with-zone-lock.patch mm-page_alloc-update-pgfree-outside-the-zone-lock-in-__free_pages_ok.patch