works - thanks Am 22.10.19 um 12:21 schrieb Vlastimil Babka: > On 10/22/19 12:02 PM, Stefan Priebe - Profihost AG wrote: >> >> Am 22.10.19 um 09:48 schrieb Vlastimil Babka: >>> On 10/22/19 9:41 AM, Stefan Priebe - Profihost AG wrote: >>>>> Hi, could you try the patch below? I suspect you're hitting a corner >>>>> case where compaction_suitable() returns COMPACT_SKIPPED for the >>>>> ZONE_DMA, triggering reclaim even if other zones have plenty of free >>>>> memory. And should_continue_reclaim() then returns true until twice the >>>>> requested page size is reclaimed (compact_gap()). That means 4MB >>>>> reclaimed for each THP allocation attempt, which roughly matches the >>>>> trace data you preovided previously. >>>>> >>>>> The amplification to 4MB should be removed in patches merged for 5.4, so >>>>> it would be only 32 pages reclaimed per THP allocation. The patch below >>>>> tries to remove this corner case completely, and it should be more >>>>> visible on your 5.2.x, so please apply it there. >>>>> >>>> is there any reason to not apply that one on top of 4.19? >>>> >>>> Greets, >>>> Stefan >>>> >>> >>> It should work, cherrypicks fine without conflict here. >> >> OK but does not work ;-) >> >> >> mm/compaction.c: In function '__compaction_suitable': >> mm/compaction.c:1451:19: error: implicit declaration of function >> 'zone_managed_pages'; did you mean 'node_spanned_pages'? >> [-Werror=implicit-function-declaration] >> alloc_flags, zone_managed_pages(zone))) >> ^~~~~~~~~~~~~~~~~~ >> node_spanned_pages > > Ah, this? > > ----8<---- > From f1335e1c0d4b74205fc0cc40b5960223d6f1dec7 Mon Sep 17 00:00:00 2001 > From: Vlastimil Babka <vbabka@xxxxxxx> > Date: Thu, 12 Sep 2019 13:40:46 +0200 > Subject: [PATCH] WIP > > --- > include/linux/compaction.h | 7 ++++++- > include/trace/events/mmflags.h | 1 + > mm/compaction.c | 16 +++++++++++++-- > mm/vmscan.c | 36 ++++++++++++++++++++++++---------- > 4 files changed, 47 insertions(+), 13 deletions(-) > > diff --git a/include/linux/compaction.h b/include/linux/compaction.h > index 68250a57aace..2f3b331c5239 100644 > --- a/include/linux/compaction.h > +++ b/include/linux/compaction.h > @@ -17,8 +17,13 @@ enum compact_priority { > }; > > /* Return values for compact_zone() and try_to_compact_pages() */ > -/* When adding new states, please adjust include/trace/events/compaction.h */ > +/* When adding new states, please adjust include/trace/events/mmflags.h */ > enum compact_result { > + /* > + * The zone is too small to provide the requested allocation even if > + * fully freed (i.e. ZONE_DMA for THP allocation due to lowmem reserves) > + */ > + COMPACT_IMPOSSIBLE, > /* For more detailed tracepoint output - internal to compaction */ > COMPACT_NOT_SUITABLE_ZONE, > /* > diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h > index a81cffb76d89..d7aa9cece234 100644 > --- a/include/trace/events/mmflags.h > +++ b/include/trace/events/mmflags.h > @@ -169,6 +169,7 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \ > > #ifdef CONFIG_COMPACTION > #define COMPACTION_STATUS \ > + EM( COMPACT_IMPOSSIBLE, "impossible") \ > EM( COMPACT_SKIPPED, "skipped") \ > EM( COMPACT_DEFERRED, "deferred") \ > EM( COMPACT_CONTINUE, "continue") \ > diff --git a/mm/compaction.c b/mm/compaction.c > index 5079ddbec8f9..7d2299c7faa2 100644 > --- a/mm/compaction.c > +++ b/mm/compaction.c > @@ -1416,6 +1416,7 @@ static enum compact_result compact_finished(struct zone *zone, > /* > * compaction_suitable: Is this suitable to run compaction on this zone now? > * Returns > + * COMPACT_IMPOSSIBLE If the allocation would fail even with all pages free > * COMPACT_SKIPPED - If there are too few free pages for compaction > * COMPACT_SUCCESS - If the allocation would succeed without compaction > * COMPACT_CONTINUE - If compaction should run now > @@ -1439,6 +1440,16 @@ static enum compact_result __compaction_suitable(struct zone *zone, int order, > alloc_flags)) > return COMPACT_SUCCESS; > > + /* > + * If the allocation would not succeed even with a fully free zone > + * due to e.g. lowmem reserves, indicate that compaction can't possibly > + * help and it would be pointless to reclaim. > + */ > + watermark += 1UL << order; > + if (!__zone_watermark_ok(zone, 0, watermark, classzone_idx, > + alloc_flags, zone->managed_pages)) > + return COMPACT_IMPOSSIBLE; > + > /* > * Watermarks for order-0 must be met for compaction to be able to > * isolate free pages for migration targets. This means that the > @@ -1526,7 +1537,7 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order, > available += zone_page_state_snapshot(zone, NR_FREE_PAGES); > compact_result = __compaction_suitable(zone, order, alloc_flags, > ac_classzone_idx(ac), available); > - if (compact_result != COMPACT_SKIPPED) > + if (compact_result > COMPACT_SKIPPED) > return true; > } > > @@ -1555,7 +1566,8 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro > ret = compaction_suitable(zone, cc->order, cc->alloc_flags, > cc->classzone_idx); > /* Compaction is likely to fail */ > - if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED) > + if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED > + || ret == COMPACT_IMPOSSIBLE) > return ret; > > /* huh, compaction_suitable is returning something unexpected */ > diff --git a/mm/vmscan.c b/mm/vmscan.c > index b37610c0eac6..7ad331a64fc5 100644 > --- a/mm/vmscan.c > +++ b/mm/vmscan.c > @@ -2849,11 +2849,12 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) > } > > /* > - * Returns true if compaction should go ahead for a costly-order request, or > - * the allocation would already succeed without compaction. Return false if we > - * should reclaim first. > + * Returns 1 if compaction should go ahead for a costly-order request, or the > + * allocation would already succeed without compaction. Return 0 if we should > + * reclaim first. Return -1 when compaction can't help at all due to zone being > + * too small, which means there's no point in reclaim nor compaction. > */ > -static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) > +static inline int compaction_ready(struct zone *zone, struct scan_control *sc) > { > unsigned long watermark; > enum compact_result suitable; > @@ -2861,10 +2862,16 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) > suitable = compaction_suitable(zone, sc->order, 0, sc->reclaim_idx); > if (suitable == COMPACT_SUCCESS) > /* Allocation should succeed already. Don't reclaim. */ > - return true; > + return 1; > if (suitable == COMPACT_SKIPPED) > /* Compaction cannot yet proceed. Do reclaim. */ > - return false; > + return 0; > + if (suitable == COMPACT_IMPOSSIBLE) > + /* > + * Compaction can't possibly help. So don't reclaim, but keep > + * checking other zones. > + */ > + return -1; > > /* > * Compaction is already possible, but it takes time to run and there > @@ -2910,6 +2917,7 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) > > for_each_zone_zonelist_nodemask(zone, z, zonelist, > sc->reclaim_idx, sc->nodemask) { > + int compact_ready; > /* > * Take care memory controller reclaiming has small influence > * to global LRU. > @@ -2929,10 +2937,18 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) > * page allocations. > */ > if (IS_ENABLED(CONFIG_COMPACTION) && > - sc->order > PAGE_ALLOC_COSTLY_ORDER && > - compaction_ready(zone, sc)) { > - sc->compaction_ready = true; > - continue; > + sc->order > PAGE_ALLOC_COSTLY_ORDER) { > + compact_ready = compaction_ready(zone, sc); > + if (compact_ready == 1) { > + sc->compaction_ready = true; > + continue; > + } else if (compact_ready == -1) { > + /* > + * In this zone, neither reclaim nor > + * compaction can help. > + */ > + continue; > + } > } > > /* >