The patch titled Subject: mm-vmscan-dont-mess-with-pgdat-flags-in-memcg-reclaim-v2-fix has been removed from the -mm tree. Its filename was mm-vmscan-dont-mess-with-pgdat-flags-in-memcg-reclaim-v2-fix.patch This patch was dropped because it was folded into mm-vmscan-dont-mess-with-pgdat-flags-in-memcg-reclaim-v2.patch ------------------------------------------------------ From: Andrey Ryabinin <aryabinin@xxxxxxxxxxxxx> Subject: mm-vmscan-dont-mess-with-pgdat-flags-in-memcg-reclaim-v2-fix On 04/06/2018 05:13 AM, Shakeel Butt wrote: > Question: Should this 'flags' be per-node? Is it ok for a congested > memcg to call wait_iff_congested for all nodes? Indeed, congestion state should be per-node. If memcg on node A is congested, there is no point is stalling memcg reclaim from node B. Make congestion state per-cgroup-per-node and record it in 'struct mem_cgroup_per_node'. Link: http://lkml.kernel.org/r/20180406135215.10057-1-aryabinin@xxxxxxxxxxxxx Signed-off-by: Andrey Ryabinin <aryabinin@xxxxxxxxxxxxx> Cc: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> Cc: Tejun Heo <tj@xxxxxxxxxx> Cc: Johannes Weiner <hannes@xxxxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxxxx> Cc: Shakeel Butt <shakeelb@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- diff -puN include/linux/memcontrol.h~mm-vmscan-dont-mess-with-pgdat-flags-in-memcg-reclaim-v2-fix include/linux/memcontrol.h --- a/include/linux/memcontrol.h~mm-vmscan-dont-mess-with-pgdat-flags-in-memcg-reclaim-v2-fix +++ a/include/linux/memcontrol.h @@ -120,6 +120,9 @@ struct mem_cgroup_per_node { unsigned long usage_in_excess;/* Set to the value by which */ /* the soft limit is exceeded*/ bool on_tree; + bool congested; /* memcg has many dirty pages */ + /* backed by a congested BDI */ + struct mem_cgroup *memcg; /* Back pointer, we cannot */ /* use container_of */ }; @@ -189,8 +192,6 @@ struct mem_cgroup { /* vmpressure notifications */ struct vmpressure vmpressure; - unsigned long flags; - /* * Should the accounting and control be hierarchical, per subtree? */ diff -puN mm/vmscan.c~mm-vmscan-dont-mess-with-pgdat-flags-in-memcg-reclaim-v2-fix mm/vmscan.c --- a/mm/vmscan.c~mm-vmscan-dont-mess-with-pgdat-flags-in-memcg-reclaim-v2-fix +++ a/mm/vmscan.c @@ -201,16 +201,27 @@ static bool sane_reclaim(struct scan_con return false; } -static void set_memcg_bit(enum pgdat_flags flag, - struct mem_cgroup *memcg) +static void set_memcg_congestion(pg_data_t *pgdat, + struct mem_cgroup *memcg, + bool congested) { - set_bit(flag, &memcg->flags); + struct mem_cgroup_per_node *mz; + + if (!memcg) + return; + + mz = mem_cgroup_nodeinfo(memcg, pgdat->node_id); + WRITE_ONCE(mz->congested, congested); } -static int test_memcg_bit(enum pgdat_flags flag, +static bool memcg_congested(pg_data_t *pgdat, struct mem_cgroup *memcg) { - return test_bit(flag, &memcg->flags); + struct mem_cgroup_per_node *mz; + + mz = mem_cgroup_nodeinfo(memcg, pgdat->node_id); + return READ_ONCE(mz->congested); + } #else static bool global_reclaim(struct scan_control *sc) @@ -223,15 +234,16 @@ static bool sane_reclaim(struct scan_con return true; } -static inline void set_memcg_bit(enum pgdat_flags flag, - struct mem_cgroup *memcg) +static inline void set_memcg_congestion(struct pglist_data *pgdat, + struct mem_cgroup *memcg, bool congested) { } -static inline int test_memcg_bit(enum pgdat_flags flag, - struct mem_cgroup *memcg) +static inline bool memcg_congested(struct pglist_data *pgdat, + struct mem_cgroup *memcg) { - return 0; + return false; + } #endif @@ -2500,7 +2512,7 @@ static inline bool should_continue_recla static bool pgdat_memcg_congested(pg_data_t *pgdat, struct mem_cgroup *memcg) { return test_bit(PGDAT_CONGESTED, &pgdat->flags) || - (memcg && test_memcg_bit(PGDAT_CONGESTED, memcg)); + (memcg && memcg_congested(pgdat, memcg)); } static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) @@ -2635,7 +2647,7 @@ static bool shrink_node(pg_data_t *pgdat */ if (!global_reclaim(sc) && sane_reclaim(sc) && sc->nr.dirty && sc->nr.dirty == sc->nr.congested) - set_memcg_bit(PGDAT_CONGESTED, root); + set_memcg_congestion(pgdat, root, true); /* * Stall direct reclaim for IO completions if underlying BDIs @@ -2862,6 +2874,7 @@ retry: continue; last_pgdat = zone->zone_pgdat; snapshot_refaults(sc->target_mem_cgroup, zone->zone_pgdat); + set_memcg_congestion(last_pgdat, sc->target_mem_cgroup, false); } delayacct_freepages_end(); @@ -3085,7 +3098,6 @@ unsigned long mem_cgroup_shrink_node(str * the priority and make it zero. */ shrink_node_memcg(pgdat, memcg, &sc, &lru_pages); - clear_bit(PGDAT_CONGESTED, &memcg->flags); trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); @@ -3131,7 +3143,6 @@ unsigned long try_to_free_mem_cgroup_pag noreclaim_flag = memalloc_noreclaim_save(); nr_reclaimed = do_try_to_free_pages(zonelist, &sc); memalloc_noreclaim_restore(noreclaim_flag); - clear_bit(PGDAT_CONGESTED, &memcg->flags); trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed); _ Patches currently in -mm which might be from aryabinin@xxxxxxxxxxxxx are mm-vmscan-update-stale-comments.patch mm-vmscan-remove-redundant-current_may_throttle-check.patch mm-vmscan-dont-change-pgdat-state-on-base-of-a-single-lru-list-state-v2.patch mm-vmscan-dont-mess-with-pgdat-flags-in-memcg-reclaim-v2.patch mm-vmscan-dont-mess-with-pgdat-flags-in-memcg-reclaim-v3.patch -- To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html