From: Kaiyang Zhao <kaiyang2@xxxxxxxxxx> When reclaim targets the top-tier node usage by the root memcg, apply local memory.low protection instead of global protection. Signed-off-by: Kaiyang Zhao <kaiyang2@xxxxxxxxxx> --- include/linux/memcontrol.h | 23 ++++++++++++++--------- mm/memcontrol.c | 4 ++-- mm/vmscan.c | 19 ++++++++++++++----- 3 files changed, 30 insertions(+), 16 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 94aba4498fca..256912b91922 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -586,9 +586,9 @@ static inline bool mem_cgroup_disabled(void) static inline void mem_cgroup_protection(struct mem_cgroup *root, struct mem_cgroup *memcg, unsigned long *min, - unsigned long *low) + unsigned long *low, unsigned long *locallow) { - *min = *low = 0; + *min = *low = *locallow = 0; if (mem_cgroup_disabled()) return; @@ -631,10 +631,11 @@ static inline void mem_cgroup_protection(struct mem_cgroup *root, *min = READ_ONCE(memcg->memory.emin); *low = READ_ONCE(memcg->memory.elow); + *locallow = READ_ONCE(memcg->memory.elocallow); } void mem_cgroup_calculate_protection(struct mem_cgroup *root, - struct mem_cgroup *memcg); + struct mem_cgroup *memcg, int is_local); static inline bool mem_cgroup_unprotected(struct mem_cgroup *target, struct mem_cgroup *memcg) @@ -651,13 +652,17 @@ static inline bool mem_cgroup_unprotected(struct mem_cgroup *target, unsigned long get_cgroup_local_usage(struct mem_cgroup *memcg, bool flush); static inline bool mem_cgroup_below_low(struct mem_cgroup *target, - struct mem_cgroup *memcg) + struct mem_cgroup *memcg, int is_local) { if (mem_cgroup_unprotected(target, memcg)) return false; - return READ_ONCE(memcg->memory.elow) >= - page_counter_read(&memcg->memory); + if (is_local) + return READ_ONCE(memcg->memory.elocallow) >= + get_cgroup_local_usage(memcg, true); + else + return READ_ONCE(memcg->memory.elow) >= + page_counter_read(&memcg->memory); } static inline bool mem_cgroup_below_min(struct mem_cgroup *target, @@ -1159,13 +1164,13 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm, static inline void mem_cgroup_protection(struct mem_cgroup *root, struct mem_cgroup *memcg, unsigned long *min, - unsigned long *low) + unsigned long *low, unsigned long *locallow) { *min = *low = 0; } static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root, - struct mem_cgroup *memcg) + struct mem_cgroup *memcg, int is_local) { } @@ -1175,7 +1180,7 @@ static inline bool mem_cgroup_unprotected(struct mem_cgroup *target, return true; } static inline bool mem_cgroup_below_low(struct mem_cgroup *target, - struct mem_cgroup *memcg) + struct mem_cgroup *memcg, int is_local) { return false; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d7c5fff12105..61718ba998fe 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4495,7 +4495,7 @@ struct cgroup_subsys memory_cgrp_subsys = { * of a top-down tree iteration, not for isolated queries. */ void mem_cgroup_calculate_protection(struct mem_cgroup *root, - struct mem_cgroup *memcg) + struct mem_cgroup *memcg, int is_local) { bool recursive_protection = cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_RECURSIVE_PROT; @@ -4507,7 +4507,7 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root, root = root_mem_cgroup; page_counter_calculate_protection(&root->memory, &memcg->memory, - recursive_protection, false); + recursive_protection, is_local); } static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg, diff --git a/mm/vmscan.c b/mm/vmscan.c index ce471d686a88..a2681d52fc5f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2377,6 +2377,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, enum scan_balance scan_balance; unsigned long ap, fp; enum lru_list lru; + int is_local = (pgdat->node_id == 0) && root_reclaim(sc); /* If we have no swap space, do not bother scanning anon folios. */ if (!sc->may_swap || !can_reclaim_anon_pages(memcg, pgdat->node_id, sc)) { @@ -2457,12 +2458,14 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, for_each_evictable_lru(lru) { bool file = is_file_lru(lru); unsigned long lruvec_size; - unsigned long low, min; + unsigned long low, min, locallow; unsigned long scan; lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx); mem_cgroup_protection(sc->target_mem_cgroup, memcg, - &min, &low); + &min, &low, &locallow); + if (is_local) + low = locallow; if (min || low) { /* @@ -2494,7 +2497,12 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, * again by how much of the total memory used is under * hard protection. */ - unsigned long cgroup_size = mem_cgroup_size(memcg); + unsigned long cgroup_size; + + if (is_local) + cgroup_size = get_cgroup_local_usage(memcg, true); + else + cgroup_size = mem_cgroup_size(memcg); unsigned long protection; /* memory.low scaling, make sure we retry before OOM */ @@ -5869,6 +5877,7 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc) }; struct mem_cgroup_reclaim_cookie *partial = &reclaim; struct mem_cgroup *memcg; + int is_local = (pgdat->node_id == 0) && root_reclaim(sc); /* * In most cases, direct reclaimers can do partial walks @@ -5896,7 +5905,7 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc) */ cond_resched(); - mem_cgroup_calculate_protection(target_memcg, memcg); + mem_cgroup_calculate_protection(target_memcg, memcg, is_local); if (mem_cgroup_below_min(target_memcg, memcg)) { /* @@ -5904,7 +5913,7 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc) * If there is no reclaimable memory, OOM. */ continue; - } else if (mem_cgroup_below_low(target_memcg, memcg)) { + } else if (mem_cgroup_below_low(target_memcg, memcg, is_local)) { /* * Soft protection. * Respect the protection only as long as -- 2.43.0