On Tue, Jul 23, 2024 at 01:55:13PM +0200, Michal Hocko wrote: > On Tue 23-07-24 12:49:41, Kirill A. Shutemov wrote: > > On Tue, Jul 23, 2024 at 09:30:27AM +0200, Vlastimil Babka wrote: > [...] > > > Although just removing the lazy accept mode would be much more appealing > > > solution than this :) > > > > :P > > > > Not really an option for big VMs. It might add many minutes to boot time. > > Well a huge part of that can be done in the background so the boot > doesn't really have to wait for all of it. If we really have to start > playing whack-a-mole to plug all the potential ways to trigger reclaim > imbalance I think it is fair to re-evaluate how much lazy should the > initialization really be. One other option I see is to treat unaccepted memory as free, so watermarks would not fail if we have unaccepted memory. No spinning in kswapd in this case. Only get_page_from_freelist() and __alloc_pages_bulk() is aware about unaccepted memory. The quick patch below shows the idea. I am not sure how it would affect __isolate_free_page() callers. IIUC, they expect to see pages on free lists, but might not find them there in this scenario because they are not accepted yet. I need to look closer at this. diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c11b7cde81ef..5e0bdfbe2f1f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -667,6 +667,7 @@ enum zone_watermarks { #define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost) #define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost) #define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost) +#define promo_wmark_pages(z) (z->_watermark[WMARK_PROMO] + z->watermark_boost) #define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost) /* diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 14d39f34d336..254bfe29eaf1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -304,7 +304,7 @@ EXPORT_SYMBOL(nr_online_nodes); static bool page_contains_unaccepted(struct page *page, unsigned int order); static void accept_page(struct page *page, unsigned int order); -static bool try_to_accept_memory(struct zone *zone, unsigned int order); +static bool cond_accept_memory(struct zone *zone, unsigned int order); static inline bool has_unaccepted_memory(void); static bool __free_unaccepted(struct page *page); @@ -2947,9 +2947,6 @@ static inline long __zone_watermark_unusable_free(struct zone *z, if (!(alloc_flags & ALLOC_CMA)) unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES); #endif -#ifdef CONFIG_UNACCEPTED_MEMORY - unusable_free += zone_page_state(z, NR_UNACCEPTED); -#endif return unusable_free; } @@ -3243,6 +3240,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, } } + cond_accept_memory(zone, order); + /* * Detect whether the number of free pages is below high * watermark. If so, we will decrease pcp->high and free @@ -3268,10 +3267,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, gfp_mask)) { int ret; - if (has_unaccepted_memory()) { - if (try_to_accept_memory(zone, order)) - goto try_this_zone; - } + if (cond_accept_memory(zone, order)) + goto try_this_zone; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* @@ -3325,10 +3322,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, return page; } else { - if (has_unaccepted_memory()) { - if (try_to_accept_memory(zone, order)) - goto try_this_zone; - } + if (cond_accept_memory(zone, order)) + goto try_this_zone; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* Try again if zone has deferred pages */ @@ -4456,12 +4451,25 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, goto failed; } + cond_accept_memory(zone, 0); +retry_this_zone: mark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK) + nr_pages; if (zone_watermark_fast(zone, 0, mark, zonelist_zone_idx(ac.preferred_zoneref), alloc_flags, gfp)) { break; } + + if (cond_accept_memory(zone, 0)) + goto retry_this_zone; + +#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT + /* Try again if zone has deferred pages */ + if (deferred_pages_enabled()) { + if (_deferred_grow_zone(zone, 0)) + goto retry_this_zone; + } +#endif } /* @@ -6833,9 +6841,6 @@ static bool try_to_accept_memory_one(struct zone *zone) struct page *page; bool last; - if (list_empty(&zone->unaccepted_pages)) - return false; - spin_lock_irqsave(&zone->lock, flags); page = list_first_entry_or_null(&zone->unaccepted_pages, struct page, lru); @@ -6861,23 +6866,29 @@ static bool try_to_accept_memory_one(struct zone *zone) return true; } -static bool try_to_accept_memory(struct zone *zone, unsigned int order) +static bool cond_accept_memory(struct zone *zone, unsigned int order) { long to_accept; int ret = false; - /* How much to accept to get to high watermark? */ - to_accept = high_wmark_pages(zone) - - (zone_page_state(zone, NR_FREE_PAGES) - - __zone_watermark_unusable_free(zone, order, 0)); + if (!has_unaccepted_memory()) + return false; - /* Accept at least one page */ - do { + if (list_empty(&zone->unaccepted_pages)) + return false; + + /* How much to accept to get to high watermark? */ + to_accept = promo_wmark_pages(zone) - + (zone_page_state(zone, NR_FREE_PAGES) - + __zone_watermark_unusable_free(zone, order, 0) - + zone_page_state(zone, NR_UNACCEPTED)); + + while (to_accept > 0) { if (!try_to_accept_memory_one(zone)) break; ret = true; to_accept -= MAX_ORDER_NR_PAGES; - } while (to_accept > 0); + } return ret; } @@ -6920,7 +6931,7 @@ static void accept_page(struct page *page, unsigned int order) { } -static bool try_to_accept_memory(struct zone *zone, unsigned int order) +static bool cond_ccept_memory(struct zone *zone, unsigned int order) { return false; } -- Kiryl Shutsemau / Kirill A. Shutemov