From: Yafang Shao <laoar.shao@xxxxxxxxx> Subject: mm/vmscan.c: add a new member reclaim_state in struct shrink_control Patch series "mm/vmscan: calculate reclaimed slab in all reclaim paths". This patchset is to fix the issues in doing shrink slab. There're six different reclaim paths by now, - kswapd reclaim path - node reclaim path - hibernate preallocate memory reclaim path - direct reclaim path - memcg reclaim path - memcg softlimit reclaim path The slab caches reclaimed in these paths are only calculated in the above three paths. The issues are detailed explained in patch #2. We should calculate the reclaimed slab caches in every reclaim path. In order to do it, the struct reclaim_state is placed into the struct shrink_control. In node reclaim path, there'is another issue about shrinking slab, which is adressed in "mm/vmscan: shrink slab in node reclaim" (https://lore.kernel.org/linux-mm/1559874946-22960-1-git-send-email-laoar.shao@xxxxxxxxx/). This patch (of 2): The struct reclaim_state is used to record how many slab caches are reclaimed in one reclaim path. The struct shrink_control is used to control one reclaim path. So we'd better put reclaim_state into shrink_control. [laoar.shao@xxxxxxxxx: remove reclaim_state assignment from __perform_reclaim()] Link: http://lkml.kernel.org/r/1561381582-13697-1-git-send-email-laoar.shao@xxxxxxxxx Link: http://lkml.kernel.org/r/1561112086-6169-2-git-send-email-laoar.shao@xxxxxxxxx Signed-off-by: Yafang Shao <laoar.shao@xxxxxxxxx> Reviewed-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Reviewed-by: Kirill Tkhai <ktkhai@xxxxxxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxx> Cc: Johannes Weiner <hannes@xxxxxxxxxxx> Cc: Vladimir Davydov <vdavydov.dev@xxxxxxxxx> Cc: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- mm/page_alloc.c | 4 ---- mm/vmscan.c | 20 ++++++++------------ 2 files changed, 8 insertions(+), 16 deletions(-) --- a/mm/page_alloc.c~mm-vmscan-add-a-new-member-reclaim_state-in-struct-shrink_control +++ a/mm/page_alloc.c @@ -4102,7 +4102,6 @@ static int __perform_reclaim(gfp_t gfp_mask, unsigned int order, const struct alloc_context *ac) { - struct reclaim_state reclaim_state; int progress; unsigned int noreclaim_flag; unsigned long pflags; @@ -4114,13 +4113,10 @@ __perform_reclaim(gfp_t gfp_mask, unsign psi_memstall_enter(&pflags); fs_reclaim_acquire(gfp_mask); noreclaim_flag = memalloc_noreclaim_save(); - reclaim_state.reclaimed_slab = 0; - current->reclaim_state = &reclaim_state; progress = try_to_free_pages(ac->zonelist, order, gfp_mask, ac->nodemask); - current->reclaim_state = NULL; memalloc_noreclaim_restore(noreclaim_flag); fs_reclaim_release(gfp_mask); psi_memstall_leave(&pflags); --- a/mm/vmscan.c~mm-vmscan-add-a-new-member-reclaim_state-in-struct-shrink_control +++ a/mm/vmscan.c @@ -131,6 +131,9 @@ struct scan_control { unsigned int file_taken; unsigned int taken; } nr; + + /* for recording the reclaimed slab by now */ + struct reclaim_state reclaim_state; }; #ifdef ARCH_HAS_PREFETCH @@ -3483,6 +3486,7 @@ static int balance_pgdat(pg_data_t *pgda .may_unmap = 1, }; + current->reclaim_state = &sc.reclaim_state; psi_memstall_enter(&pflags); __fs_reclaim_acquire(); @@ -3664,6 +3668,8 @@ out: snapshot_refaults(NULL, pgdat); __fs_reclaim_release(); psi_memstall_leave(&pflags); + current->reclaim_state = NULL; + /* * Return the order kswapd stopped reclaiming at as * prepare_kswapd_sleep() takes it into account. If another caller @@ -3787,15 +3793,10 @@ static int kswapd(void *p) unsigned int classzone_idx = MAX_NR_ZONES - 1; pg_data_t *pgdat = (pg_data_t*)p; struct task_struct *tsk = current; - - struct reclaim_state reclaim_state = { - .reclaimed_slab = 0, - }; const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); if (!cpumask_empty(cpumask)) set_cpus_allowed_ptr(tsk, cpumask); - current->reclaim_state = &reclaim_state; /* * Tell the memory management that we're a "memory allocator", @@ -3857,7 +3858,6 @@ kswapd_try_sleep: } tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD); - current->reclaim_state = NULL; return 0; } @@ -3922,7 +3922,6 @@ void wakeup_kswapd(struct zone *zone, gf */ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) { - struct reclaim_state reclaim_state; struct scan_control sc = { .nr_to_reclaim = nr_to_reclaim, .gfp_mask = GFP_HIGHUSER_MOVABLE, @@ -3940,8 +3939,7 @@ unsigned long shrink_all_memory(unsigned fs_reclaim_acquire(sc.gfp_mask); noreclaim_flag = memalloc_noreclaim_save(); - reclaim_state.reclaimed_slab = 0; - p->reclaim_state = &reclaim_state; + p->reclaim_state = &sc.reclaim_state; nr_reclaimed = do_try_to_free_pages(zonelist, &sc); @@ -4110,7 +4108,6 @@ static int __node_reclaim(struct pglist_ /* Minimum pages needed in order to stay on node */ const unsigned long nr_pages = 1 << order; struct task_struct *p = current; - struct reclaim_state reclaim_state; unsigned int noreclaim_flag; struct scan_control sc = { .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX), @@ -4135,8 +4132,7 @@ static int __node_reclaim(struct pglist_ */ noreclaim_flag = memalloc_noreclaim_save(); p->flags |= PF_SWAPWRITE; - reclaim_state.reclaimed_slab = 0; - p->reclaim_state = &reclaim_state; + p->reclaim_state = &sc.reclaim_state; if (node_pagecache_reclaimable(pgdat) > pgdat->min_unmapped_pages) { /* _