Kswapd's goal is to balance at least one zone in the node for the requested zoneidx, but no more than that. Kcompactd on the other hand compacts all the zones in the node, even if one of them is already compacted for the given request. This can hog kcompactd unnecessarily long on a requested zoneidx. It also has kcompactd working on zones without the cooperation of kswapd. There is a compaction_suitable() check of course, but whether that is true or not depends on luck, risking erratic behavior. Make kcompactd follow the same criteria as kswapd when deciding to work on a node, to keep them working in unison as much as possible. Likewise, direct reclaim can bail as soon as one zone in the zonelist is compaction_ready(), so check up front before hammering lower zones while higher zones might already be suitable. This matches compaction_zonelist_suitable() on the compaction side. Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx> --- mm/compaction.c | 5 +++-- mm/vmscan.c | 35 +++++++++++++++++------------------ 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index 52103545d58c..8080c04e644a 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2798,12 +2798,13 @@ static bool kcompactd_node_suitable(pg_data_t *pgdat) if (!populated_zone(zone)) continue; - /* Allocation can already succeed, check other zones */ + /* Allocation can succeed in any zone, done */ if (zone_watermark_ok(zone, pgdat->kcompactd_max_order, min_wmark_pages(zone), highest_zoneidx, 0)) - continue; + return true; + /* Allocation can't succed, but enough order-0 to compact */ if (compaction_suitable(zone, pgdat->kcompactd_max_order, highest_zoneidx) == COMPACT_CONTINUE) return true; diff --git a/mm/vmscan.c b/mm/vmscan.c index 723705b9e4d9..14d6116384cc 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -6277,7 +6277,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) sc->reclaim_idx, 0)) return true; - /* Compaction cannot yet proceed. Do reclaim. */ + /* Compaction cannot yet proceed, might need reclaim */ if (compaction_suitable(zone, sc->order, sc->reclaim_idx) == COMPACT_SKIPPED) return false; @@ -6357,6 +6357,21 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) sc->reclaim_idx = gfp_zone(sc->gfp_mask); } + /* Bail if any of the zones are already compactable */ + if (IS_ENABLED(CONFIG_COMPACTION) && + sc->order > PAGE_ALLOC_COSTLY_ORDER) { + for_each_zone_zonelist_nodemask(zone, z, zonelist, + sc->reclaim_idx, sc->nodemask) { + if (!cpuset_zone_allowed(zone, + GFP_KERNEL | __GFP_HARDWALL)) + continue; + if (compaction_ready(zone, sc)) { + sc->compaction_ready = true; + goto out; + } + } + } + for_each_zone_zonelist_nodemask(zone, z, zonelist, sc->reclaim_idx, sc->nodemask) { /* @@ -6368,22 +6383,6 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) GFP_KERNEL | __GFP_HARDWALL)) continue; - /* - * If we already have plenty of memory free for - * compaction in this zone, don't free any more. - * Even though compaction is invoked for any - * non-zero order, only frequent costly order - * reclamation is disruptive enough to become a - * noticeable problem, like transparent huge - * page allocations. - */ - if (IS_ENABLED(CONFIG_COMPACTION) && - sc->order > PAGE_ALLOC_COSTLY_ORDER && - compaction_ready(zone, sc)) { - sc->compaction_ready = true; - continue; - } - /* * Shrink each node in the zonelist once. If the * zonelist is ordered by zone (not the default) then a @@ -6420,7 +6419,7 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) if (first_pgdat) consider_reclaim_throttle(first_pgdat, sc); - +out: /* * Restore to original mask to avoid the impact on the caller if we * promoted it to __GFP_HIGHMEM. -- 2.39.2