In case of alloc_flags contain ALLOC_HIGHATOMIC and alloc order is order1/2/3/10 in rmqueue(), if pages are alloced successfully from pcplist cache, and move a free pageblock from the alloced migratetype freelist to MIGRATE_HIGHATOMIC freelist, rather than alloc from MIGRATE_HIGHATOMIC freelist firstly, so this will result in an increasing number of pages on the MIGRATE_HIGHATOMIC freelist, pages in other migratetype freelist are reduced and more likely to allocation failure. Currently the sequence of ALLOC_HIGHATOMIC allocation is: pcplist cache --> buddy (batch >> order) allocation migratetype freelist --> buddy MIGRATE_HIGHATOMIC freelist --> buddy allocation migratetype freelist. Due to the fact that requesting pages from the pcplist cache is faster than buddy, the sequence of modifying the ALLOC_HIGHATOMIC allocation is: pcplist --> buddy MIGRATE_HIGHATOMIC freelist --> buddy allocation migrate freelist. This patch can solve the allocation failure of the Non-ALLOC_HIGHATOMIC alloc_flag due to excessive pages reservations in MIGRATE_HIGHATOMIC freelists. Signed-off-by: Zhiguo Jiang <justinjiang@xxxxxxxx> --- mm/internal.h | 1 + mm/page_alloc.c | 22 ++++++++++++++-------- 2 files changed, 15 insertions(+), 8 deletions(-) mode change 100644 => 100755 mm/internal.h mode change 100644 => 100755 mm/page_alloc.c diff --git a/mm/internal.h b/mm/internal.h index 7499b5ea1cf6..a3b3fdda8031 --- a/mm/internal.h +++ b/mm/internal.h @@ -844,6 +844,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, #endif #define ALLOC_HIGHATOMIC 0x200 /* Allows access to MIGRATE_HIGHATOMIC */ #define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */ +#define ALLOC_PCPLIST 0x1000 /* page alloced from pcplist */ /* Flags that allow allocations below the min watermark. */ #define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 442c1b3480aa..afcd0f875c92 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2722,6 +2722,8 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order, int batch = READ_ONCE(pcp->batch); int alloced; + if (alloc_flags & ALLOC_HIGHATOMIC) + goto out; /* * Scale batch relative to order if batch implies * free pages can be stored on the PCP. Batch can @@ -2736,6 +2738,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order, migratetype, alloc_flags); pcp->count += alloced << order; +out: if (unlikely(list_empty(list))) return NULL; } @@ -2798,7 +2801,7 @@ __no_sanitize_memory static inline struct page *rmqueue(struct zone *preferred_zone, struct zone *zone, unsigned int order, - gfp_t gfp_flags, unsigned int alloc_flags, + gfp_t gfp_flags, unsigned int *alloc_flags, int migratetype) { struct page *page; @@ -2814,21 +2817,23 @@ struct page *rmqueue(struct zone *preferred_zone, * MIGRATE_MOVABLE pcplist could have the pages on CMA area and * we need to skip it when CMA area isn't allowed. */ - if (!IS_ENABLED(CONFIG_CMA) || alloc_flags & ALLOC_CMA || + if (!IS_ENABLED(CONFIG_CMA) || *alloc_flags & ALLOC_CMA || migratetype != MIGRATE_MOVABLE) { page = rmqueue_pcplist(preferred_zone, zone, order, - migratetype, alloc_flags); - if (likely(page)) + migratetype, *alloc_flags); + if (likely(page)) { + *alloc_flags |= ALLOC_PCPLIST; goto out; + } } } - page = rmqueue_buddy(preferred_zone, zone, order, alloc_flags, + page = rmqueue_buddy(preferred_zone, zone, order, *alloc_flags, migratetype); out: /* Separate test+clear to avoid unnecessary atomics */ - if ((alloc_flags & ALLOC_KSWAPD) && + if ((*alloc_flags & ALLOC_KSWAPD) && unlikely(test_bit(ZONE_BOOSTED_WATERMARK, &zone->flags))) { clear_bit(ZONE_BOOSTED_WATERMARK, &zone->flags); wakeup_kswapd(zone, 0, 0, zone_idx(zone)); @@ -3208,7 +3213,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, try_this_zone: page = rmqueue(ac->preferred_zoneref->zone, zone, order, - gfp_mask, alloc_flags, ac->migratetype); + gfp_mask, &alloc_flags, ac->migratetype); if (page) { prep_new_page(page, order, gfp_mask, alloc_flags); @@ -3216,7 +3221,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, * If this is a high-order atomic allocation then check * if the pageblock should be reserved for the future */ - if (unlikely(alloc_flags & ALLOC_HIGHATOMIC)) + if (unlikely(alloc_flags & ALLOC_HIGHATOMIC) + && unlikely(!(alloc_flags & ALLOC_PCPLIST))) reserve_highatomic_pageblock(page, zone); return page; -- 2.39.0