It seems that [1] was acked, and the a v2 was written[2] which improved upon it, but got bogged down in discussion of other topics, so the improvements were not included. Then [1] got merged as commit 27cacaad16c5 ("mm,memory_hotplug: drop unneeded locking") and we ended up with locks that get taken for read but never for write. So, let's remove the read locking. Compared to Oscar's original v2[2], I have added a READ_ONCE in page_outside_zone_boundaries; this is a substitute for the compiler barrier that was implied by read_seqretry(). I believe this is necessary to insure against UB, although the value being read here is only used for a printk so the stakes seem very low (and this is all debug code anyway). I believe a compiler barrier is also needed in zone_spans_pfn, but I'll address that in a separate patch. That read_seqretry() also impleied a CPU-level memory barrier, which I don't think needs replacing: page_outside_zone_boundaries() is used in the alloc and free paths, but you can't allocate or free pages from the span that is in the middle of being added/removed by hotplug. In other words, page_outside_zone_boundaries() doesn't require a strictly up-to-date view of spanned_pages, but I think it does require a value that was once/will eventually be correct, hence READ_ONCE. [1] https://lore.kernel.org/all/20210531093958.15021-1-osalvador@xxxxxxx/T/#u [2] https://lore.kernel.org/linux-mm/20210602091457.17772-3-osalvador@xxxxxxx/#t Cc: David Hildenbrand <david@xxxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxx> Cc: Anshuman Khandual <anshuman.khandual@xxxxxxx> Cc: Vlastimil Babka <vbabka@xxxxxxx> Cc: Pavel Tatashin <pasha.tatashin@xxxxxxxxxx> Co-developed-by: Oscar Salvador <osalvador@xxxxxxx> Signed-off-by: Oscar Salvador <osalvador@xxxxxxx> Signed-off-by: Brendan Jackman <jackmanb@xxxxxxxxxx> --- include/linux/memory_hotplug.h | 35 ----------------------------------- include/linux/mmzone.h | 23 +++++------------------ mm/mm_init.c | 1 - mm/page_alloc.c | 10 +++------- 4 files changed, 8 insertions(+), 61 deletions(-) diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 7a9ff464608d..f9577e67e5ee 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -141,31 +141,7 @@ bool mhp_supports_memmap_on_memory(void); /* * Zone resizing functions - * - * Note: any attempt to resize a zone should has pgdat_resize_lock() - * zone_span_writelock() both held. This ensure the size of a zone - * can't be changed while pgdat_resize_lock() held. */ -static inline unsigned zone_span_seqbegin(struct zone *zone) -{ - return read_seqbegin(&zone->span_seqlock); -} -static inline int zone_span_seqretry(struct zone *zone, unsigned iv) -{ - return read_seqretry(&zone->span_seqlock, iv); -} -static inline void zone_span_writelock(struct zone *zone) -{ - write_seqlock(&zone->span_seqlock); -} -static inline void zone_span_writeunlock(struct zone *zone) -{ - write_sequnlock(&zone->span_seqlock); -} -static inline void zone_seqlock_init(struct zone *zone) -{ - seqlock_init(&zone->span_seqlock); -} extern void adjust_present_page_count(struct page *page, struct memory_group *group, long nr_pages); @@ -251,17 +227,6 @@ static inline void pgdat_kswapd_lock_init(pg_data_t *pgdat) ___page; \ }) -static inline unsigned zone_span_seqbegin(struct zone *zone) -{ - return 0; -} -static inline int zone_span_seqretry(struct zone *zone, unsigned iv) -{ - return 0; -} -static inline void zone_span_writelock(struct zone *zone) {} -static inline void zone_span_writeunlock(struct zone *zone) {} -static inline void zone_seqlock_init(struct zone *zone) {} static inline int try_online_node(int nid) { diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 8f9c9590a42c..194ef7fed9d6 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -14,7 +14,6 @@ #include <linux/threads.h> #include <linux/numa.h> #include <linux/init.h> -#include <linux/seqlock.h> #include <linux/nodemask.h> #include <linux/pageblock-flags.h> #include <linux/page-flags-layout.h> @@ -896,18 +895,11 @@ struct zone { * * Locking rules: * - * zone_start_pfn and spanned_pages are protected by span_seqlock. - * It is a seqlock because it has to be read outside of zone->lock, - * and it is done in the main allocator path. But, it is written - * quite infrequently. - * - * The span_seq lock is declared along with zone->lock because it is - * frequently read in proximity to zone->lock. It's good to - * give them a chance of being in the same cacheline. - * - * Write access to present_pages at runtime should be protected by - * mem_hotplug_begin/done(). Any reader who can't tolerant drift of - * present_pages should use get_online_mems() to get a stable value. + * Besides system initialization functions, memory-hotplug is the only + * user that can change zone's {spanned,present} pages at runtime, and + * it does so by holding the mem_hotplug_lock lock. Any readers who + * can't tolerate drift values should use {get,put}_online_mems to get + * a stable value. */ atomic_long_t managed_pages; unsigned long spanned_pages; @@ -930,11 +922,6 @@ struct zone { unsigned long nr_isolate_pageblock; #endif -#ifdef CONFIG_MEMORY_HOTPLUG - /* see spanned/present_pages for more description */ - seqlock_t span_seqlock; -#endif - int initialized; /* Write-intensive fields used from the page allocator */ diff --git a/mm/mm_init.c b/mm/mm_init.c index f72b852bd5b8..c725618aeb58 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1383,7 +1383,6 @@ static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, zone->name = zone_names[idx]; zone->zone_pgdat = NODE_DATA(nid); spin_lock_init(&zone->lock); - zone_seqlock_init(zone); zone_pcp_init(zone); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2e22ce5675ca..5116a2b9ea6e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -426,16 +426,12 @@ void set_pageblock_migratetype(struct page *page, int migratetype) static int page_outside_zone_boundaries(struct zone *zone, struct page *page) { int ret; - unsigned seq; unsigned long pfn = page_to_pfn(page); unsigned long sp, start_pfn; - do { - seq = zone_span_seqbegin(zone); - start_pfn = zone->zone_start_pfn; - sp = zone->spanned_pages; - ret = !zone_spans_pfn(zone, pfn); - } while (zone_span_seqretry(zone, seq)); + start_pfn = zone->zone_start_pfn; + sp = READ_ONCE(zone->spanned_pages); + ret = !zone_spans_pfn(zone, pfn); if (ret) pr_err("page 0x%lx outside node %d zone %s [ 0x%lx - 0x%lx ]\n", -- 2.45.0.rc1.225.g2a3ae87e7f-goog