From: Alex Shi <alex.shi@xxxxxxxxxxxxxxxxx> Subject: mm/lru: move lock into lru_note_cost We have to move lru_lock into lru_note_cost, since it cycle up on memcg tree, for future per lruvec lru_lock replace. It's a bit ugly and may cost a bit more locking, but benefit from multiple memcg locking could cover the lost. Link: https://lkml.kernel.org/r/1604566549-62481-11-git-send-email-alex.shi@xxxxxxxxxxxxxxxxx Signed-off-by: Alex Shi <alex.shi@xxxxxxxxxxxxxxxxx> Acked-by: Hugh Dickins <hughd@xxxxxxxxxx> Acked-by: Johannes Weiner <hannes@xxxxxxxxxxx> Cc: Johannes Weiner <hannes@xxxxxxxxxxx> Cc: Alexander Duyck <alexander.duyck@xxxxxxxxx> Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx> Cc: Andrey Ryabinin <aryabinin@xxxxxxxxxxxxx> Cc: "Chen, Rong A" <rong.a.chen@xxxxxxxxx> Cc: Daniel Jordan <daniel.m.jordan@xxxxxxxxxx> Cc: "Huang, Ying" <ying.huang@xxxxxxxxx> Cc: Jann Horn <jannh@xxxxxxxxxx> Cc: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx> Cc: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> Cc: Kirill A. Shutemov <kirill@xxxxxxxxxxxxx> Cc: Konstantin Khlebnikov <khlebnikov@xxxxxxxxxxxxxx> Cc: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx> Cc: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxx> Cc: Mika Penttilä <mika.penttila@xxxxxxxxxxxx> Cc: Minchan Kim <minchan@xxxxxxxxxx> Cc: Shakeel Butt <shakeelb@xxxxxxxxxx> Cc: Tejun Heo <tj@xxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Vladimir Davydov <vdavydov.dev@xxxxxxxxx> Cc: Vlastimil Babka <vbabka@xxxxxxx> Cc: Wei Yang <richard.weiyang@xxxxxxxxx> Cc: Yang Shi <yang.shi@xxxxxxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- mm/swap.c | 3 +++ mm/vmscan.c | 4 +--- mm/workingset.c | 2 -- 3 files changed, 4 insertions(+), 5 deletions(-) --- a/mm/swap.c~mm-lru-move-lock-into-lru_note_cost +++ a/mm/swap.c @@ -268,7 +268,9 @@ void lru_note_cost(struct lruvec *lruvec { do { unsigned long lrusize; + struct pglist_data *pgdat = lruvec_pgdat(lruvec); + spin_lock_irq(&pgdat->lru_lock); /* Record cost event */ if (file) lruvec->file_cost += nr_pages; @@ -292,6 +294,7 @@ void lru_note_cost(struct lruvec *lruvec lruvec->file_cost /= 2; lruvec->anon_cost /= 2; } + spin_unlock_irq(&pgdat->lru_lock); } while ((lruvec = parent_lruvec(lruvec))); } --- a/mm/vmscan.c~mm-lru-move-lock-into-lru_note_cost +++ a/mm/vmscan.c @@ -1971,19 +1971,17 @@ shrink_inactive_list(unsigned long nr_to nr_reclaimed = shrink_page_list(&page_list, pgdat, sc, &stat, false); spin_lock_irq(&pgdat->lru_lock); - move_pages_to_lru(lruvec, &page_list); __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); - lru_note_cost(lruvec, file, stat.nr_pageout); item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT; if (!cgroup_reclaim(sc)) __count_vm_events(item, nr_reclaimed); __count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed); __count_vm_events(PGSTEAL_ANON + file, nr_reclaimed); - spin_unlock_irq(&pgdat->lru_lock); + lru_note_cost(lruvec, file, stat.nr_pageout); mem_cgroup_uncharge_list(&page_list); free_unref_page_list(&page_list); --- a/mm/workingset.c~mm-lru-move-lock-into-lru_note_cost +++ a/mm/workingset.c @@ -381,9 +381,7 @@ void workingset_refault(struct page *pag if (workingset) { SetPageWorkingset(page); /* XXX: Move to lru_cache_add() when it supports new vs putback */ - spin_lock_irq(&page_pgdat(page)->lru_lock); lru_note_cost_page(page); - spin_unlock_irq(&page_pgdat(page)->lru_lock); inc_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file); } out: _