From: Johannes Weiner <hannes@xxxxxxxxxxx> Subject: mm: vmscan: split shrink_node() into node part and memcgs part This function is getting long and unwieldy, split out the memcg bits. The updated shrink_node() handles the generic (node) reclaim aspects: - global vmpressure notifications - writeback and congestion throttling - reclaim/compaction management - kswapd giving up on unreclaimable nodes It then calls a new shrink_node_memcgs() which handles cgroup specifics: - the cgroup tree traversal - memory.low considerations - per-cgroup slab shrinking callbacks - per-cgroup vmpressure notifications [hannes@xxxxxxxxxxx: rename "root" to "target_memcg", per Roman] Link: http://lkml.kernel.org/r/20191025143640.GA386981@xxxxxxxxxxx Link: http://lkml.kernel.org/r/20191022144803.302233-8-hannes@xxxxxxxxxxx Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx> Reviewed-by: Roman Gushchin <guro@xxxxxx> Reviewed-by: Shakeel Butt <shakeelb@xxxxxxxxxx> Acked-by: Michal Hocko <mhocko@xxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- mm/vmscan.c | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) --- a/mm/vmscan.c~mm-vmscan-split-shrink_node-into-node-part-and-memcgs-part +++ a/mm/vmscan.c @@ -2722,26 +2722,18 @@ static bool pgdat_memcg_congested(pg_dat (memcg && memcg_congested(pgdat, memcg)); } -static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) +static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc) { - struct reclaim_state *reclaim_state = current->reclaim_state; - struct mem_cgroup *root = sc->target_mem_cgroup; - unsigned long nr_reclaimed, nr_scanned; - bool reclaimable = false; + struct mem_cgroup *target_memcg = sc->target_mem_cgroup; struct mem_cgroup *memcg; -again: - memset(&sc->nr, 0, sizeof(sc->nr)); - nr_reclaimed = sc->nr_reclaimed; - nr_scanned = sc->nr_scanned; - - memcg = mem_cgroup_iter(root, NULL, NULL); + memcg = mem_cgroup_iter(target_memcg, NULL, NULL); do { struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat); unsigned long reclaimed; unsigned long scanned; - switch (mem_cgroup_protected(root, memcg)) { + switch (mem_cgroup_protected(target_memcg, memcg)) { case MEMCG_PROT_MIN: /* * Hard protection. @@ -2785,7 +2777,23 @@ again: sc->nr_scanned - scanned, sc->nr_reclaimed - reclaimed); - } while ((memcg = mem_cgroup_iter(root, memcg, NULL))); + } while ((memcg = mem_cgroup_iter(target_memcg, memcg, NULL))); +} + +static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) +{ + struct reclaim_state *reclaim_state = current->reclaim_state; + struct mem_cgroup *target_memcg = sc->target_mem_cgroup; + unsigned long nr_reclaimed, nr_scanned; + bool reclaimable = false; + +again: + memset(&sc->nr, 0, sizeof(sc->nr)); + + nr_reclaimed = sc->nr_reclaimed; + nr_scanned = sc->nr_scanned; + + shrink_node_memcgs(pgdat, sc); if (reclaim_state) { sc->nr_reclaimed += reclaim_state->reclaimed_slab; @@ -2793,7 +2801,7 @@ again: } /* Record the subtree's reclaim efficiency */ - vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true, + vmpressure(sc->gfp_mask, target_memcg, true, sc->nr_scanned - nr_scanned, sc->nr_reclaimed - nr_reclaimed); @@ -2849,7 +2857,7 @@ again: */ if (cgroup_reclaim(sc) && writeback_throttling_sane(sc) && sc->nr.dirty && sc->nr.dirty == sc->nr.congested) - set_memcg_congestion(pgdat, root, true); + set_memcg_congestion(pgdat, target_memcg, true); /* * Stall direct reclaim for IO completions if underlying BDIs @@ -2858,7 +2866,8 @@ again: * the LRU too quickly. */ if (!sc->hibernation_mode && !current_is_kswapd() && - current_may_throttle() && pgdat_memcg_congested(pgdat, root)) + current_may_throttle() && + pgdat_memcg_congested(pgdat, target_memcg)) wait_iff_congested(BLK_RW_ASYNC, HZ/10); if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, _