Now get/set pageblock is done without any syncronization. Therefore there is race condition and migratetype can be unintended value. Sometime we move some pageblocks from one migratetype to the other type, and, at the sametime, some page in this pageblock could be freed. In this case, we can get totally unintended value, since get/set pageblock don't get/set atomically. Instead, it is accessed in bit unit. Since set pageblock isn't used frequently rather than get pageblock, I think that seqlock is proper method to synchronize it. This type of lock has minimum overhead if there are a lot of readers and few of writers. So it fits to this situation. Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index bd791e4..feaa607 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -79,6 +79,7 @@ static inline int get_pageblock_migratetype(struct page *page) { return get_pageblock_flags_group(page, PB_migrate, PB_migrate_end); } +void set_pageblock_migratetype(struct page *page, int migratetype); struct free_area { struct list_head free_list[MIGRATE_TYPES]; @@ -367,6 +368,7 @@ struct zone { #endif struct free_area free_area[MAX_ORDER]; + seqlock_t pageblock_seqlock; #ifndef CONFIG_SPARSEMEM /* * Flags for a pageblock_nr_pages block. See pageblock-flags.h. diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 3fff8e7..58e2a89 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -23,7 +23,6 @@ static inline bool is_migrate_isolate(int migratetype) bool has_unmovable_pages(struct zone *zone, struct page *page, int count, bool skip_hwpoisoned_pages); -void set_pageblock_migratetype(struct page *page, int migratetype); int move_freepages_block(struct zone *zone, struct page *page, int migratetype); int move_freepages(struct zone *zone, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5248fe0..b36aa5a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4788,6 +4788,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, spin_lock_init(&zone->lock); spin_lock_init(&zone->lru_lock); zone_seqlock_init(zone); + seqlock_init(&zone->pageblock_seqlock); zone->zone_pgdat = pgdat; zone_pcp_init(zone); @@ -5927,15 +5928,19 @@ unsigned long get_pageblock_flags_group(struct page *page, unsigned long pfn, bitidx; unsigned long flags = 0; unsigned long value = 1; + unsigned int seq; zone = page_zone(page); pfn = page_to_pfn(page); bitmap = get_pageblock_bitmap(zone, pfn); bitidx = pfn_to_bitidx(zone, pfn); - for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1) - if (test_bit(bitidx + start_bitidx, bitmap)) - flags |= value; + do { + seq = read_seqbegin(&zone->pageblock_seqlock); + for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1) + if (test_bit(bitidx + start_bitidx, bitmap)) + flags |= value; + } while (read_seqretry(&zone->pageblock_seqlock, seq)); return flags; } @@ -5954,6 +5959,7 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags, unsigned long *bitmap; unsigned long pfn, bitidx; unsigned long value = 1; + unsigned long irq_flags; zone = page_zone(page); pfn = page_to_pfn(page); @@ -5961,11 +5967,13 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags, bitidx = pfn_to_bitidx(zone, pfn); VM_BUG_ON(!zone_spans_pfn(zone, pfn)); + write_seqlock_irqsave(&zone->pageblock_seqlock, irq_flags); for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1) if (flags & value) __set_bit(bitidx + start_bitidx, bitmap); else __clear_bit(bitidx + start_bitidx, bitmap); + write_sequnlock_irqrestore(&zone->pageblock_seqlock, irq_flags); } /* -- 1.7.9.5 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>