On 10/22/19 12:02 PM, Stefan Priebe - Profihost AG wrote: > > Am 22.10.19 um 09:48 schrieb Vlastimil Babka: >> On 10/22/19 9:41 AM, Stefan Priebe - Profihost AG wrote: >>>> Hi, could you try the patch below? I suspect you're hitting a corner >>>> case where compaction_suitable() returns COMPACT_SKIPPED for the >>>> ZONE_DMA, triggering reclaim even if other zones have plenty of free >>>> memory. And should_continue_reclaim() then returns true until twice the >>>> requested page size is reclaimed (compact_gap()). That means 4MB >>>> reclaimed for each THP allocation attempt, which roughly matches the >>>> trace data you preovided previously. >>>> >>>> The amplification to 4MB should be removed in patches merged for 5.4, so >>>> it would be only 32 pages reclaimed per THP allocation. The patch below >>>> tries to remove this corner case completely, and it should be more >>>> visible on your 5.2.x, so please apply it there. >>>> >>> is there any reason to not apply that one on top of 4.19? >>> >>> Greets, >>> Stefan >>> >> >> It should work, cherrypicks fine without conflict here. > > OK but does not work ;-) > > > mm/compaction.c: In function '__compaction_suitable': > mm/compaction.c:1451:19: error: implicit declaration of function > 'zone_managed_pages'; did you mean 'node_spanned_pages'? > [-Werror=implicit-function-declaration] > alloc_flags, zone_managed_pages(zone))) > ^~~~~~~~~~~~~~~~~~ > node_spanned_pages Ah, this? ----8<---- >From f1335e1c0d4b74205fc0cc40b5960223d6f1dec7 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka <vbabka@xxxxxxx> Date: Thu, 12 Sep 2019 13:40:46 +0200 Subject: [PATCH] WIP --- include/linux/compaction.h | 7 ++++++- include/trace/events/mmflags.h | 1 + mm/compaction.c | 16 +++++++++++++-- mm/vmscan.c | 36 ++++++++++++++++++++++++---------- 4 files changed, 47 insertions(+), 13 deletions(-) diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 68250a57aace..2f3b331c5239 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -17,8 +17,13 @@ enum compact_priority { }; /* Return values for compact_zone() and try_to_compact_pages() */ -/* When adding new states, please adjust include/trace/events/compaction.h */ +/* When adding new states, please adjust include/trace/events/mmflags.h */ enum compact_result { + /* + * The zone is too small to provide the requested allocation even if + * fully freed (i.e. ZONE_DMA for THP allocation due to lowmem reserves) + */ + COMPACT_IMPOSSIBLE, /* For more detailed tracepoint output - internal to compaction */ COMPACT_NOT_SUITABLE_ZONE, /* diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index a81cffb76d89..d7aa9cece234 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -169,6 +169,7 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \ #ifdef CONFIG_COMPACTION #define COMPACTION_STATUS \ + EM( COMPACT_IMPOSSIBLE, "impossible") \ EM( COMPACT_SKIPPED, "skipped") \ EM( COMPACT_DEFERRED, "deferred") \ EM( COMPACT_CONTINUE, "continue") \ diff --git a/mm/compaction.c b/mm/compaction.c index 5079ddbec8f9..7d2299c7faa2 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1416,6 +1416,7 @@ static enum compact_result compact_finished(struct zone *zone, /* * compaction_suitable: Is this suitable to run compaction on this zone now? * Returns + * COMPACT_IMPOSSIBLE If the allocation would fail even with all pages free * COMPACT_SKIPPED - If there are too few free pages for compaction * COMPACT_SUCCESS - If the allocation would succeed without compaction * COMPACT_CONTINUE - If compaction should run now @@ -1439,6 +1440,16 @@ static enum compact_result __compaction_suitable(struct zone *zone, int order, alloc_flags)) return COMPACT_SUCCESS; + /* + * If the allocation would not succeed even with a fully free zone + * due to e.g. lowmem reserves, indicate that compaction can't possibly + * help and it would be pointless to reclaim. + */ + watermark += 1UL << order; + if (!__zone_watermark_ok(zone, 0, watermark, classzone_idx, + alloc_flags, zone->managed_pages)) + return COMPACT_IMPOSSIBLE; + /* * Watermarks for order-0 must be met for compaction to be able to * isolate free pages for migration targets. This means that the @@ -1526,7 +1537,7 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order, available += zone_page_state_snapshot(zone, NR_FREE_PAGES); compact_result = __compaction_suitable(zone, order, alloc_flags, ac_classzone_idx(ac), available); - if (compact_result != COMPACT_SKIPPED) + if (compact_result > COMPACT_SKIPPED) return true; } @@ -1555,7 +1566,8 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro ret = compaction_suitable(zone, cc->order, cc->alloc_flags, cc->classzone_idx); /* Compaction is likely to fail */ - if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED) + if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED + || ret == COMPACT_IMPOSSIBLE) return ret; /* huh, compaction_suitable is returning something unexpected */ diff --git a/mm/vmscan.c b/mm/vmscan.c index b37610c0eac6..7ad331a64fc5 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2849,11 +2849,12 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) } /* - * Returns true if compaction should go ahead for a costly-order request, or - * the allocation would already succeed without compaction. Return false if we - * should reclaim first. + * Returns 1 if compaction should go ahead for a costly-order request, or the + * allocation would already succeed without compaction. Return 0 if we should + * reclaim first. Return -1 when compaction can't help at all due to zone being + * too small, which means there's no point in reclaim nor compaction. */ -static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) +static inline int compaction_ready(struct zone *zone, struct scan_control *sc) { unsigned long watermark; enum compact_result suitable; @@ -2861,10 +2862,16 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) suitable = compaction_suitable(zone, sc->order, 0, sc->reclaim_idx); if (suitable == COMPACT_SUCCESS) /* Allocation should succeed already. Don't reclaim. */ - return true; + return 1; if (suitable == COMPACT_SKIPPED) /* Compaction cannot yet proceed. Do reclaim. */ - return false; + return 0; + if (suitable == COMPACT_IMPOSSIBLE) + /* + * Compaction can't possibly help. So don't reclaim, but keep + * checking other zones. + */ + return -1; /* * Compaction is already possible, but it takes time to run and there @@ -2910,6 +2917,7 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) for_each_zone_zonelist_nodemask(zone, z, zonelist, sc->reclaim_idx, sc->nodemask) { + int compact_ready; /* * Take care memory controller reclaiming has small influence * to global LRU. @@ -2929,10 +2937,18 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) * page allocations. */ if (IS_ENABLED(CONFIG_COMPACTION) && - sc->order > PAGE_ALLOC_COSTLY_ORDER && - compaction_ready(zone, sc)) { - sc->compaction_ready = true; - continue; + sc->order > PAGE_ALLOC_COSTLY_ORDER) { + compact_ready = compaction_ready(zone, sc); + if (compact_ready == 1) { + sc->compaction_ready = true; + continue; + } else if (compact_ready == -1) { + /* + * In this zone, neither reclaim nor + * compaction can help. + */ + continue; + } } /* -- 2.23.0