There're several cases like resize and force_empty that don't need to account to psi, otherwise is misleading. We also have a module reclaiming dying memcgs at background to avoid too many dead memcgs which can cause lots of trouble, then it makes the psi inaccuracy even worse without this patch. Signed-off-by: Xunlei Pang <xlpang@xxxxxxxxxxxxxxxxx> --- include/linux/swap.h | 3 ++- mm/memcontrol.c | 13 +++++++------ mm/vmscan.c | 9 ++++++--- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 4bfb5c4ac108..74b5443877d4 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -354,7 +354,8 @@ extern int __isolate_lru_page(struct page *page, isolate_mode_t mode); extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, - bool may_swap); + bool may_swap, + bool force_reclaim); extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem, gfp_t gfp_mask, bool noswap, pg_data_t *pgdat, diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f1dfa651f55d..f4ec57876ada 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2237,7 +2237,8 @@ static void reclaim_high(struct mem_cgroup *memcg, if (page_counter_read(&memcg->memory) <= memcg->high) continue; memcg_memory_event(memcg, MEMCG_HIGH); - try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true); + try_to_free_mem_cgroup_pages(memcg, nr_pages, + gfp_mask, true, false); } while ((memcg = parent_mem_cgroup(memcg))); } @@ -2330,7 +2331,7 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, memcg_memory_event(mem_over_limit, MEMCG_MAX); nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages, - gfp_mask, may_swap); + gfp_mask, may_swap, false); if (mem_cgroup_margin(mem_over_limit) >= nr_pages) goto retry; @@ -2860,7 +2861,7 @@ static int mem_cgroup_resize_max(struct mem_cgroup *memcg, } if (!try_to_free_mem_cgroup_pages(memcg, 1, - GFP_KERNEL, !memsw)) { + GFP_KERNEL, !memsw, true)) { ret = -EBUSY; break; } @@ -2993,7 +2994,7 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg) return -EINTR; progress = try_to_free_mem_cgroup_pages(memcg, 1, - GFP_KERNEL, true); + GFP_KERNEL, true, true); if (!progress) { nr_retries--; /* maybe some writeback is necessary */ @@ -5549,7 +5550,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of, nr_pages = page_counter_read(&memcg->memory); if (nr_pages > high) try_to_free_mem_cgroup_pages(memcg, nr_pages - high, - GFP_KERNEL, true); + GFP_KERNEL, true, true); memcg_wb_domain_size_changed(memcg); return nbytes; @@ -5596,7 +5597,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of, if (nr_reclaims) { if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max, - GFP_KERNEL, true)) + GFP_KERNEL, true, true)) nr_reclaims--; continue; } diff --git a/mm/vmscan.c b/mm/vmscan.c index 7acd0afdfc2a..3831848fca5a 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3212,7 +3212,8 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg, unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, - bool may_swap) + bool may_swap, + bool force_reclaim) { struct zonelist *zonelist; unsigned long nr_reclaimed; @@ -3243,13 +3244,15 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, trace_mm_vmscan_memcg_reclaim_begin(0, sc.gfp_mask); - psi_memstall_enter(&pflags); + if (!force_reclaim) + psi_memstall_enter(&pflags); noreclaim_flag = memalloc_noreclaim_save(); nr_reclaimed = do_try_to_free_pages(zonelist, &sc); memalloc_noreclaim_restore(noreclaim_flag); - psi_memstall_leave(&pflags); + if (!force_reclaim) + psi_memstall_leave(&pflags); trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed); -- 2.14.4.44.g2045bb6