The patch titled Subject: mm: page_alloc: consolidate free page accounting has been added to the -mm mm-unstable branch. Its filename is mm-page_alloc-consolidate-free-page-accounting.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-page_alloc-consolidate-free-page-accounting.patch This patch will later appear in the mm-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Johannes Weiner <hannes@xxxxxxxxxxx> Subject: mm: page_alloc: consolidate free page accounting Date: Mon, 11 Sep 2023 15:41:47 -0400 Free page accounting currently happens a bit too high up the call stack, where it has to deal with guard pages, compaction capturing, block stealing and even page isolation. This is subtle and fragile, and makes it difficult to hack on the code. Now that type violations on the freelists have been fixed, push the accounting down to where pages enter and leave the freelist. Link: https://lkml.kernel.org/r/20230911195023.247694-7-hannes@xxxxxxxxxxx Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx> Cc: Kefeng Wang <wangkefeng.wang@xxxxxxxxxx> Cc: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> Cc: Miaohe Lin <linmiaohe@xxxxxxxxxx> Cc: Vlastimil Babka <vbabka@xxxxxxx> Cc: Zi Yan <ziy@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- include/linux/mm.h | 18 +--- include/linux/page-isolation.h | 3 include/linux/vmstat.h | 8 - mm/debug_page_alloc.c | 12 -- mm/internal.h | 5 - mm/page_alloc.c | 135 +++++++++++++++++-------------- mm/page_isolation.c | 7 - 7 files changed, 90 insertions(+), 98 deletions(-) --- a/include/linux/mm.h~mm-page_alloc-consolidate-free-page-accounting +++ a/include/linux/mm.h @@ -3672,24 +3672,22 @@ static inline bool page_is_guard(struct return PageGuard(page); } -bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order, - int migratetype); +bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order); static inline bool set_page_guard(struct zone *zone, struct page *page, - unsigned int order, int migratetype) + unsigned int order) { if (!debug_guardpage_enabled()) return false; - return __set_page_guard(zone, page, order, migratetype); + return __set_page_guard(zone, page, order); } -void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order, - int migratetype); +void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order); static inline void clear_page_guard(struct zone *zone, struct page *page, - unsigned int order, int migratetype) + unsigned int order) { if (!debug_guardpage_enabled()) return; - __clear_page_guard(zone, page, order, migratetype); + __clear_page_guard(zone, page, order); } #else /* CONFIG_DEBUG_PAGEALLOC */ @@ -3699,9 +3697,9 @@ static inline unsigned int debug_guardpa static inline bool debug_guardpage_enabled(void) { return false; } static inline bool page_is_guard(struct page *page) { return false; } static inline bool set_page_guard(struct zone *zone, struct page *page, - unsigned int order, int migratetype) { return false; } + unsigned int order) { return false; } static inline void clear_page_guard(struct zone *zone, struct page *page, - unsigned int order, int migratetype) {} + unsigned int order) {} #endif /* CONFIG_DEBUG_PAGEALLOC */ #ifdef __HAVE_ARCH_GATE_AREA --- a/include/linux/page-isolation.h~mm-page_alloc-consolidate-free-page-accounting +++ a/include/linux/page-isolation.h @@ -34,7 +34,8 @@ static inline bool is_migrate_isolate(in #define REPORT_FAILURE 0x2 void set_pageblock_migratetype(struct page *page, int migratetype); -int move_freepages_block(struct zone *zone, struct page *page, int migratetype); +int move_freepages_block(struct zone *zone, struct page *page, + int old_mt, int new_mt); int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, int migratetype, int flags, gfp_t gfp_flags); --- a/include/linux/vmstat.h~mm-page_alloc-consolidate-free-page-accounting +++ a/include/linux/vmstat.h @@ -487,14 +487,6 @@ static inline void node_stat_sub_folio(s mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio)); } -static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages, - int migratetype) -{ - __mod_zone_page_state(zone, NR_FREE_PAGES, nr_pages); - if (is_migrate_cma(migratetype)) - __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, nr_pages); -} - extern const char * const vmstat_text[]; static inline const char *zone_stat_name(enum zone_stat_item item) --- a/mm/debug_page_alloc.c~mm-page_alloc-consolidate-free-page-accounting +++ a/mm/debug_page_alloc.c @@ -32,8 +32,7 @@ static int __init debug_guardpage_minord } early_param("debug_guardpage_minorder", debug_guardpage_minorder_setup); -bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order, - int migratetype) +bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order) { if (order >= debug_guardpage_minorder()) return false; @@ -41,19 +40,12 @@ bool __set_page_guard(struct zone *zone, __SetPageGuard(page); INIT_LIST_HEAD(&page->buddy_list); set_page_private(page, order); - /* Guard pages are not available for any usage */ - if (!is_migrate_isolate(migratetype)) - __mod_zone_freepage_state(zone, -(1 << order), migratetype); return true; } -void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order, - int migratetype) +void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order) { __ClearPageGuard(page); - set_page_private(page, 0); - if (!is_migrate_isolate(migratetype)) - __mod_zone_freepage_state(zone, (1 << order), migratetype); } --- a/mm/internal.h~mm-page_alloc-consolidate-free-page-accounting +++ a/mm/internal.h @@ -884,11 +884,6 @@ static inline bool is_migrate_highatomic return migratetype == MIGRATE_HIGHATOMIC; } -static inline bool is_migrate_highatomic_page(struct page *page) -{ - return get_pageblock_migratetype(page) == MIGRATE_HIGHATOMIC; -} - void setup_zone_pageset(struct zone *zone); struct migration_target_control { --- a/mm/page_alloc.c~mm-page_alloc-consolidate-free-page-accounting +++ a/mm/page_alloc.c @@ -640,24 +640,36 @@ compaction_capture(struct capture_contro } #endif /* CONFIG_COMPACTION */ -/* Used for pages not on another list */ -static inline void add_to_free_list(struct page *page, struct zone *zone, - unsigned int order, int migratetype) +static inline void account_freepages(struct page *page, struct zone *zone, + int nr_pages, int migratetype) { - struct free_area *area = &zone->free_area[order]; + if (is_migrate_isolate(migratetype)) + return; - list_add(&page->buddy_list, &area->free_list[migratetype]); - area->nr_free++; + __mod_zone_page_state(zone, NR_FREE_PAGES, nr_pages); + + if (is_migrate_cma(migratetype)) + __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, nr_pages); } /* Used for pages not on another list */ -static inline void add_to_free_list_tail(struct page *page, struct zone *zone, - unsigned int order, int migratetype) +static inline void add_to_free_list(struct page *page, struct zone *zone, + unsigned int order, int migratetype, + bool tail) { struct free_area *area = &zone->free_area[order]; - list_add_tail(&page->buddy_list, &area->free_list[migratetype]); + VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype, + "page type is %lu, passed migratetype is %d (nr=%d)\n", + get_pageblock_migratetype(page), migratetype, 1 << order); + + if (tail) + list_add_tail(&page->buddy_list, &area->free_list[migratetype]); + else + list_add(&page->buddy_list, &area->free_list[migratetype]); area->nr_free++; + + account_freepages(page, zone, 1 << order, migratetype); } /* @@ -666,16 +678,28 @@ static inline void add_to_free_list_tail * allocation again (e.g., optimization for memory onlining). */ static inline void move_to_free_list(struct page *page, struct zone *zone, - unsigned int order, int migratetype) + unsigned int order, int old_mt, int new_mt) { struct free_area *area = &zone->free_area[order]; - list_move_tail(&page->buddy_list, &area->free_list[migratetype]); + /* Free page moving can fail, so it happens before the type update */ + VM_WARN_ONCE(get_pageblock_migratetype(page) != old_mt, + "page type is %lu, passed migratetype is %d (nr=%d)\n", + get_pageblock_migratetype(page), old_mt, 1 << order); + + list_move_tail(&page->buddy_list, &area->free_list[new_mt]); + + account_freepages(page, zone, -(1 << order), old_mt); + account_freepages(page, zone, 1 << order, new_mt); } static inline void del_page_from_free_list(struct page *page, struct zone *zone, - unsigned int order) + unsigned int order, int migratetype) { + VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype, + "page type is %lu, passed migratetype is %d (nr=%d)\n", + get_pageblock_migratetype(page), migratetype, 1 << order); + /* clear reported state and update reported page count */ if (page_reported(page)) __ClearPageReported(page); @@ -684,6 +708,8 @@ static inline void del_page_from_free_li __ClearPageBuddy(page); set_page_private(page, 0); zone->free_area[order].nr_free--; + + account_freepages(page, zone, -(1 << order), migratetype); } static inline struct page *get_page_from_free_area(struct free_area *area, @@ -757,23 +783,21 @@ static inline void __free_one_page(struc VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page); VM_BUG_ON(migratetype == -1); - if (likely(!is_migrate_isolate(migratetype))) - __mod_zone_freepage_state(zone, 1 << order, migratetype); - VM_BUG_ON_PAGE(pfn & ((1 << order) - 1), page); VM_BUG_ON_PAGE(bad_range(zone, page), page); while (order < MAX_ORDER) { - if (compaction_capture(capc, page, order, migratetype)) { - __mod_zone_freepage_state(zone, -(1 << order), - migratetype); + int buddy_mt; + + if (compaction_capture(capc, page, order, migratetype)) return; - } buddy = find_buddy_page_pfn(page, pfn, order, &buddy_pfn); if (!buddy) goto done_merging; + buddy_mt = get_pfnblock_migratetype(buddy, buddy_pfn); + if (unlikely(order >= pageblock_order)) { /* * We want to prevent merge between freepages on pageblock @@ -801,9 +825,9 @@ static inline void __free_one_page(struc * merge with it and move up one order. */ if (page_is_guard(buddy)) - clear_page_guard(zone, buddy, order, migratetype); + clear_page_guard(zone, buddy, order); else - del_page_from_free_list(buddy, zone, order); + del_page_from_free_list(buddy, zone, order, buddy_mt); combined_pfn = buddy_pfn & pfn; page = page + (combined_pfn - pfn); pfn = combined_pfn; @@ -820,10 +844,7 @@ done_merging: else to_tail = buddy_merge_likely(pfn, buddy_pfn, page, order); - if (to_tail) - add_to_free_list_tail(page, zone, order, migratetype); - else - add_to_free_list(page, zone, order, migratetype); + add_to_free_list(page, zone, order, migratetype, to_tail); /* Notify page reporting subsystem of freed page */ if (!(fpi_flags & FPI_SKIP_REPORT_NOTIFY)) @@ -865,10 +886,8 @@ int split_free_page(struct page *free_pa } mt = get_pfnblock_migratetype(free_page, free_page_pfn); - if (likely(!is_migrate_isolate(mt))) - __mod_zone_freepage_state(zone, -(1UL << order), mt); + del_page_from_free_list(free_page, zone, order, mt); - del_page_from_free_list(free_page, zone, order); for (pfn = free_page_pfn; pfn < free_page_pfn + (1UL << order);) { int mt = get_pfnblock_migratetype(pfn_to_page(pfn), pfn); @@ -1382,10 +1401,10 @@ static inline void expand(struct zone *z * Corresponding page table entries will not be touched, * pages will stay not present in virtual address space */ - if (set_page_guard(zone, &page[size], high, migratetype)) + if (set_page_guard(zone, &page[size], high)) continue; - add_to_free_list(&page[size], zone, high, migratetype); + add_to_free_list(&page[size], zone, high, migratetype, false); set_buddy_order(&page[size], high); } } @@ -1555,7 +1574,7 @@ struct page *__rmqueue_smallest(struct z page = get_page_from_free_area(area, migratetype); if (!page) continue; - del_page_from_free_list(page, zone, current_order); + del_page_from_free_list(page, zone, current_order, migratetype); expand(zone, page, order, current_order, migratetype); trace_mm_page_alloc_zone_locked(page, order, migratetype, pcp_allowed_order(order) && @@ -1596,7 +1615,7 @@ static inline struct page *__rmqueue_cma * boundary. If alignment is required, use move_freepages_block() */ static int move_freepages(struct zone *zone, unsigned long start_pfn, - unsigned long end_pfn, int migratetype) + unsigned long end_pfn, int old_mt, int new_mt) { struct page *page; unsigned long pfn; @@ -1615,7 +1634,7 @@ static int move_freepages(struct zone *z VM_BUG_ON_PAGE(page_zone(page) != zone, page); order = buddy_order(page); - move_to_free_list(page, zone, order, migratetype); + move_to_free_list(page, zone, order, old_mt, new_mt); pfn += 1 << order; pages_moved += 1 << order; } @@ -1670,7 +1689,7 @@ static bool prep_move_freepages_block(st } int move_freepages_block(struct zone *zone, struct page *page, - int migratetype) + int old_mt, int new_mt) { unsigned long start_pfn, end_pfn; @@ -1678,7 +1697,7 @@ int move_freepages_block(struct zone *zo NULL, NULL)) return -1; - return move_freepages(zone, start_pfn, end_pfn, migratetype); + return move_freepages(zone, start_pfn, end_pfn, old_mt, new_mt); } static void change_pageblock_range(struct page *pageblock_page, @@ -1789,7 +1808,7 @@ static void steal_suitable_fallback(stru /* Take ownership for orders >= pageblock_order */ if (current_order >= pageblock_order) { - del_page_from_free_list(page, zone, current_order); + del_page_from_free_list(page, zone, current_order, block_type); change_pageblock_range(page, current_order, start_type); expand(zone, page, order, current_order, start_type); return; @@ -1839,13 +1858,13 @@ static void steal_suitable_fallback(stru */ if (free_pages + alike_pages >= (1 << (pageblock_order-1)) || page_group_by_mobility_disabled) { - move_freepages(zone, start_pfn, end_pfn, start_type); + move_freepages(zone, start_pfn, end_pfn, block_type, start_type); set_pageblock_migratetype(page, start_type); block_type = start_type; } single_page: - del_page_from_free_list(page, zone, current_order); + del_page_from_free_list(page, zone, current_order, block_type); expand(zone, page, order, current_order, block_type); } @@ -1910,7 +1929,8 @@ static void reserve_highatomic_pageblock mt = get_pageblock_migratetype(page); /* Only reserve normal pageblocks (i.e., they can merge with others) */ if (migratetype_is_mergeable(mt)) { - if (move_freepages_block(zone, page, MIGRATE_HIGHATOMIC) != -1) { + if (move_freepages_block(zone, page, + mt, MIGRATE_HIGHATOMIC) != -1) { set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC); zone->nr_reserved_highatomic += pageblock_nr_pages; } @@ -1953,11 +1973,13 @@ static bool unreserve_highatomic_pageblo spin_lock_irqsave(&zone->lock, flags); for (order = 0; order <= MAX_ORDER; order++) { struct free_area *area = &(zone->free_area[order]); + int mt; page = get_page_from_free_area(area, MIGRATE_HIGHATOMIC); if (!page) continue; + mt = get_pageblock_migratetype(page); /* * In page freeing path, migratetype change is racy so * we can counter several free pages in a pageblock @@ -1965,7 +1987,7 @@ static bool unreserve_highatomic_pageblo * from highatomic to ac->migratetype. So we should * adjust the count once. */ - if (is_migrate_highatomic_page(page)) { + if (is_migrate_highatomic(mt)) { /* * It should never happen but changes to * locking could inadvertently allow a per-cpu @@ -1987,7 +2009,8 @@ static bool unreserve_highatomic_pageblo * of pageblocks that cannot be completely freed * may increase. */ - ret = move_freepages_block(zone, page, ac->migratetype); + ret = move_freepages_block(zone, page, mt, + ac->migratetype); /* * Reserving this block already succeeded, so this should * not fail on zone boundaries. @@ -2195,12 +2218,7 @@ static int rmqueue_bulk(struct zone *zon * pages are ordered properly. */ list_add_tail(&page->pcp_list, list); - if (is_migrate_cma(get_pageblock_migratetype(page))) - __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, - -(1 << order)); } - - __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); spin_unlock_irqrestore(&zone->lock, flags); return i; @@ -2601,11 +2619,9 @@ int __isolate_free_page(struct page *pag watermark = zone->_watermark[WMARK_MIN] + (1UL << order); if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA)) return 0; - - __mod_zone_freepage_state(zone, -(1UL << order), mt); } - del_page_from_free_list(page, zone, order); + del_page_from_free_list(page, zone, order, mt); /* * Set the pageblock if the isolated page is at least half of a @@ -2620,7 +2636,7 @@ int __isolate_free_page(struct page *pag * with others) */ if (migratetype_is_mergeable(mt) && - move_freepages_block(zone, page, + move_freepages_block(zone, page, mt, MIGRATE_MOVABLE) != -1) set_pageblock_migratetype(page, MIGRATE_MOVABLE); } @@ -2706,8 +2722,6 @@ struct page *rmqueue_buddy(struct zone * return NULL; } } - __mod_zone_freepage_state(zone, -(1 << order), - get_pageblock_migratetype(page)); spin_unlock_irqrestore(&zone->lock, flags); } while (check_new_pages(page, order)); @@ -6470,8 +6484,9 @@ void __offline_isolated_pages(unsigned l BUG_ON(page_count(page)); BUG_ON(!PageBuddy(page)); + VM_WARN_ON(get_pageblock_migratetype(page) != MIGRATE_ISOLATE); order = buddy_order(page); - del_page_from_free_list(page, zone, order); + del_page_from_free_list(page, zone, order, MIGRATE_ISOLATE); pfn += (1 << order); } spin_unlock_irqrestore(&zone->lock, flags); @@ -6522,11 +6537,12 @@ static void break_down_buddy_pages(struc current_buddy = page + size; } - if (set_page_guard(zone, current_buddy, high, migratetype)) + if (set_page_guard(zone, current_buddy, high)) continue; if (current_buddy != target) { - add_to_free_list(current_buddy, zone, high, migratetype); + add_to_free_list(current_buddy, zone, high, + migratetype, false); set_buddy_order(current_buddy, high); page = next_page; } @@ -6554,12 +6570,11 @@ bool take_page_off_buddy(struct page *pa int migratetype = get_pfnblock_migratetype(page_head, pfn_head); - del_page_from_free_list(page_head, zone, page_order); + del_page_from_free_list(page_head, zone, page_order, + migratetype); break_down_buddy_pages(zone, page_head, page, 0, page_order, migratetype); SetPageHWPoisonTakenOff(page); - if (!is_migrate_isolate(migratetype)) - __mod_zone_freepage_state(zone, -1, migratetype); ret = true; break; } @@ -6666,7 +6681,7 @@ static bool try_to_accept_memory_one(str list_del(&page->lru); last = list_empty(&zone->unaccepted_pages); - __mod_zone_freepage_state(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE); + account_freepages(page, zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE); __mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES); spin_unlock_irqrestore(&zone->lock, flags); @@ -6718,7 +6733,7 @@ static bool __free_unaccepted(struct pag spin_lock_irqsave(&zone->lock, flags); first = list_empty(&zone->unaccepted_pages); list_add_tail(&page->lru, &zone->unaccepted_pages); - __mod_zone_freepage_state(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE); + account_freepages(page, zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE); __mod_zone_page_state(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES); spin_unlock_irqrestore(&zone->lock, flags); --- a/mm/page_isolation.c~mm-page_alloc-consolidate-free-page-accounting +++ a/mm/page_isolation.c @@ -181,13 +181,12 @@ static int set_migratetype_isolate(struc int nr_pages; int mt = get_pageblock_migratetype(page); - nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE); + nr_pages = move_freepages_block(zone, page, mt, MIGRATE_ISOLATE); /* Block spans zone boundaries? */ if (nr_pages == -1) { spin_unlock_irqrestore(&zone->lock, flags); return -EBUSY; } - __mod_zone_freepage_state(zone, -nr_pages, mt); set_pageblock_migratetype(page, MIGRATE_ISOLATE); zone->nr_isolate_pageblock++; spin_unlock_irqrestore(&zone->lock, flags); @@ -255,13 +254,13 @@ static void unset_migratetype_isolate(st * allocation. */ if (!isolated_page) { - int nr_pages = move_freepages_block(zone, page, migratetype); + int nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE, + migratetype); /* * Isolating this block already succeeded, so this * should not fail on zone boundaries. */ WARN_ON_ONCE(nr_pages == -1); - __mod_zone_freepage_state(zone, nr_pages, migratetype); } set_pageblock_migratetype(page, migratetype); if (isolated_page) _ Patches currently in -mm which might be from hannes@xxxxxxxxxxx are mm-page_alloc-fix-cma-and-highatomic-landing-on-the-wrong-buddy-list.patch mm-page_alloc-remove-pcppage-migratetype-caching.patch mm-page_alloc-fix-up-block-types-when-merging-compatible-blocks.patch mm-page_alloc-move-free-pages-when-converting-block-during-isolation.patch mm-page_alloc-fix-move_freepages_block-range-error.patch mm-page_alloc-fix-freelist-movement-during-block-conversion.patch mm-page_alloc-consolidate-free-page-accounting.patch