The patch titled Subject: mm/page_alloc: convert per-cpu list protection to local_lock has been added to the -mm tree. Its filename is mm-page_alloc-convert-per-cpu-list-protection-to-local_lock.patch This patch should soon appear at https://ozlabs.org/~akpm/mmots/broken-out/mm-page_alloc-convert-per-cpu-list-protection-to-local_lock.patch and later at https://ozlabs.org/~akpm/mmotm/broken-out/mm-page_alloc-convert-per-cpu-list-protection-to-local_lock.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> Subject: mm/page_alloc: convert per-cpu list protection to local_lock There is a lack of clarity of what exactly local_irq_save/local_irq_restore protects in page_alloc.c . It conflates the protection of per-cpu page allocation structures with per-cpu vmstat deltas. This patch protects the PCP structure using local_lock which for most configurations is identical to IRQ enabling/disabling. The scope of the lock is still wider than it should be but this is decreased later. It is possible for the local_lock to be embedded safely within struct per_cpu_pages but it adds complexity to free_unref_page_list. [lkp@xxxxxxxxx: Make pagesets static] Link: https://lkml.kernel.org/r/20210512095458.30632-3-mgorman@xxxxxxxxxxxxxxxxxxx Signed-off-by: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> Acked-by: Vlastimil Babka <vbabka@xxxxxxx> Acked-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Cc: Chuck Lever <chuck.lever@xxxxxxxxxx> Cc: Ingo Molnar <mingo@xxxxxxxxxx> Cc: Jesper Dangaard Brouer <brouer@xxxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxxxx> Cc: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- include/linux/mmzone.h | 2 + mm/page_alloc.c | 50 +++++++++++++++++++++++++++------------ 2 files changed, 37 insertions(+), 15 deletions(-) --- a/include/linux/mmzone.h~mm-page_alloc-convert-per-cpu-list-protection-to-local_lock +++ a/include/linux/mmzone.h @@ -20,6 +20,7 @@ #include <linux/atomic.h> #include <linux/mm_types.h> #include <linux/page-flags.h> +#include <linux/local_lock.h> #include <asm/page.h> /* Free memory management - zoned buddy allocator. */ @@ -337,6 +338,7 @@ enum zone_watermarks { #define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost) #define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost) +/* Fields and list protected by pagesets local_lock in page_alloc.c */ struct per_cpu_pages { int count; /* number of pages in the list */ int high; /* high watermark, emptying needed */ --- a/mm/page_alloc.c~mm-page_alloc-convert-per-cpu-list-protection-to-local_lock +++ a/mm/page_alloc.c @@ -122,6 +122,13 @@ typedef int __bitwise fpi_t; static DEFINE_MUTEX(pcp_batch_high_lock); #define MIN_PERCPU_PAGELIST_FRACTION (8) +struct pagesets { + local_lock_t lock; +}; +static DEFINE_PER_CPU(struct pagesets, pagesets) = { + .lock = INIT_LOCAL_LOCK(lock), +}; + #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID DEFINE_PER_CPU(int, numa_node); EXPORT_PER_CPU_SYMBOL(numa_node); @@ -1453,6 +1460,10 @@ static void free_pcppages_bulk(struct zo } while (--count && --batch_free && !list_empty(list)); } + /* + * local_lock_irq held so equivalent to spin_lock_irqsave for + * both PREEMPT_RT and non-PREEMPT_RT configurations. + */ spin_lock(&zone->lock); isolated_pageblocks = has_isolate_pageblock(zone); @@ -1573,6 +1584,11 @@ static void __free_pages_ok(struct page return; migratetype = get_pfnblock_migratetype(page, pfn); + + /* + * TODO FIX: Disable IRQs before acquiring IRQ-safe zone->lock + * and protect vmstat updates. + */ local_irq_save(flags); __count_vm_events(PGFREE, 1 << order); free_one_page(page_zone(page), page, pfn, order, migratetype, @@ -2955,6 +2971,10 @@ static int rmqueue_bulk(struct zone *zon { int i, allocated = 0; + /* + * local_lock_irq held so equivalent to spin_lock_irqsave for + * both PREEMPT_RT and non-PREEMPT_RT configurations. + */ spin_lock(&zone->lock); for (i = 0; i < count; ++i) { struct page *page = __rmqueue(zone, order, migratetype, @@ -3007,12 +3027,12 @@ void drain_zone_pages(struct zone *zone, unsigned long flags; int to_drain, batch; - local_irq_save(flags); + local_lock_irqsave(&pagesets.lock, flags); batch = READ_ONCE(pcp->batch); to_drain = min(pcp->count, batch); if (to_drain > 0) free_pcppages_bulk(zone, to_drain, pcp); - local_irq_restore(flags); + local_unlock_irqrestore(&pagesets.lock, flags); } #endif @@ -3028,13 +3048,13 @@ static void drain_pages_zone(unsigned in unsigned long flags; struct per_cpu_pages *pcp; - local_irq_save(flags); + local_lock_irqsave(&pagesets.lock, flags); pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); if (pcp->count) free_pcppages_bulk(zone, pcp->count, pcp); - local_irq_restore(flags); + local_unlock_irqrestore(&pagesets.lock, flags); } /* @@ -3297,9 +3317,9 @@ void free_unref_page(struct page *page) if (!free_unref_page_prepare(page, pfn)) return; - local_irq_save(flags); + local_lock_irqsave(&pagesets.lock, flags); free_unref_page_commit(page, pfn); - local_irq_restore(flags); + local_unlock_irqrestore(&pagesets.lock, flags); } /* @@ -3319,7 +3339,7 @@ void free_unref_page_list(struct list_he set_page_private(page, pfn); } - local_irq_save(flags); + local_lock_irqsave(&pagesets.lock, flags); list_for_each_entry_safe(page, next, list, lru) { unsigned long pfn = page_private(page); @@ -3332,12 +3352,12 @@ void free_unref_page_list(struct list_he * a large list of pages to free. */ if (++batch_count == SWAP_CLUSTER_MAX) { - local_irq_restore(flags); + local_unlock_irqrestore(&pagesets.lock, flags); batch_count = 0; - local_irq_save(flags); + local_lock_irqsave(&pagesets.lock, flags); } } - local_irq_restore(flags); + local_unlock_irqrestore(&pagesets.lock, flags); } /* @@ -3494,7 +3514,7 @@ static struct page *rmqueue_pcplist(stru struct page *page; unsigned long flags; - local_irq_save(flags); + local_lock_irqsave(&pagesets.lock, flags); pcp = this_cpu_ptr(zone->per_cpu_pageset); list = &pcp->lists[migratetype]; page = __rmqueue_pcplist(zone, migratetype, alloc_flags, pcp, list); @@ -3502,7 +3522,7 @@ static struct page *rmqueue_pcplist(stru __count_zid_vm_events(PGALLOC, page_zonenum(page), 1); zone_statistics(preferred_zone, zone); } - local_irq_restore(flags); + local_unlock_irqrestore(&pagesets.lock, flags); return page; } @@ -5096,7 +5116,7 @@ unsigned long __alloc_pages_bulk(gfp_t g goto failed; /* Attempt the batch allocation */ - local_irq_save(flags); + local_lock_irqsave(&pagesets.lock, flags); pcp = this_cpu_ptr(zone->per_cpu_pageset); pcp_list = &pcp->lists[ac.migratetype]; @@ -5134,12 +5154,12 @@ unsigned long __alloc_pages_bulk(gfp_t g nr_populated++; } - local_irq_restore(flags); + local_unlock_irqrestore(&pagesets.lock, flags); return nr_populated; failed_irq: - local_irq_restore(flags); + local_unlock_irqrestore(&pagesets.lock, flags); failed: page = __alloc_pages(gfp, 0, preferred_nid, nodemask); _ Patches currently in -mm which might be from mgorman@xxxxxxxxxxxxxxxxxxx are mm-page_alloc-split-per-cpu-page-lists-and-zone-stats.patch mm-page_alloc-convert-per-cpu-list-protection-to-local_lock.patch mm-vmstat-convert-numa-statistics-to-basic-numa-counters.patch mm-vmstat-inline-numa-event-counter-updates.patch mm-page_alloc-batch-the-accounting-updates-in-the-bulk-allocator.patch mm-page_alloc-reduce-duration-that-irqs-are-disabled-for-vm-counters.patch mm-page_alloc-explicitly-acquire-the-zone-lock-in-__free_pages_ok.patch mm-page_alloc-avoid-conflating-irqs-disabled-with-zone-lock.patch mm-page_alloc-update-pgfree-outside-the-zone-lock-in-__free_pages_ok.patch