On 9/13/21 8:50 AM, Michal Hocko wrote: > On Fri 10-09-21 17:11:05, Mike Kravetz wrote: > [...] >> @@ -5064,8 +5068,18 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask) >> if (did_some_progress > 0 && >> should_compact_retry(ac, order, alloc_flags, >> compact_result, &compact_priority, >> - &compaction_retries)) >> + &compaction_retries)) { >> + /* >> + * In one pathological case, pages can be stolen immediately >> + * after reclaimed. It looks like we are making progress, and >> + * compaction_retries is not incremented. This could cause >> + * an indefinite number of retries. Cap the number of retries >> + * for costly orders. >> + */ >> + if (max_tries && tries > max_tries) >> + goto nopage; >> goto retry; >> + } > > I do not think this is a good approach. We do not want to play with > retries numbers. If we want to go with a minimal change for now then the > compaction feedback mechanism should track the number of reclaimed pages > to satisfy watermarks and if that grows beyond reasonable (proportionaly > to the request size) then simply give up rather than keep trying again > and again. I am experimenting with code similar to the patch below. The idea is to key off of 'COMPACT_SKIPPED' being returned. This implies that not enough pages are available for compaction. One of the values in this calculation is compact_gap() which is scaled based on allocation order. The simple patch is to give up if number of reclaimed pages is greater than compact_gap(). The thought is that this implies pages are being stolen before compaction. Such a patch does help in my specific test. However, the concern is that giving up earlier may regress some workloads. One could of course come up with a scenario where one more retry would result in success. diff --git a/mm/page_alloc.c b/mm/page_alloc.c index eeb3a9c..f8e3b0e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4413,6 +4413,7 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...) static inline bool should_compact_retry(struct alloc_context *ac, int order, int alloc_flags, + unsigned long nr_reclaimed, enum compact_result compact_result, enum compact_priority *compact_priority, int *compaction_retries) @@ -4445,7 +4446,16 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...) * to work with, so we retry only if it looks like reclaim can help. */ if (compaction_needs_reclaim(compact_result)) { - ret = compaction_zonelist_suitable(ac, order, alloc_flags); + /* + * If we reclaimed enough pages, but they are not available + * now, this implies they were stolen. Do not reclaim again + * as this could go on indefinitely. + */ + if (nr_reclaimed > compact_gap(order)) + ret = false; + else + ret = compaction_zonelist_suitable(ac, order, + alloc_flags); goto out; } @@ -4504,6 +4514,7 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...) static inline bool should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_flags, enum compact_result compact_result, + unsigned long nr_reclaimed, enum compact_priority *compact_priority, int *compaction_retries) { @@ -4890,6 +4901,7 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask) struct page *page = NULL; unsigned int alloc_flags; unsigned long did_some_progress; + unsigned long nr_reclaimed; enum compact_priority compact_priority; enum compact_result compact_result; int compaction_retries; @@ -5033,6 +5045,7 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask) &did_some_progress); if (page) goto got_pg; + nr_reclaimed = did_some_progress; /* Try direct compaction and then allocating */ page = __alloc_pages_direct_compact(gfp_mask, order, alloc_flags, ac, @@ -5063,7 +5076,7 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask) */ if (did_some_progress > 0 && should_compact_retry(ac, order, alloc_flags, - compact_result, &compact_priority, + nr_reclaimed, compact_result, &compact_priority, &compaction_retries)) goto retry; diff --git a/mm/vmscan.c b/mm/vmscan.c index eeae2f6..d40eca5 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2819,10 +2819,18 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat, } /* + * If we have reclaimed enough pages for compaction, and compaction + * is not ready, this implies pages are being stolen as they are being + * reclaimed. Quit now instead of continual retrying. + */ + pages_for_compaction = compact_gap(sc->order); + if (!current_is_kswapd() && sc->nr_reclaimed > pages_for_compaction) + return false; + + /* * If we have not reclaimed enough pages for compaction and the * inactive lists are large enough, continue reclaiming */ - pages_for_compaction = compact_gap(sc->order); inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE); if (get_nr_swap_pages() > 0) inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON); -- Mike Kravetz