On Sat, Jul 25, 2020 at 6:00 AM Alex Shi <alex.shi@xxxxxxxxxxxxxxxxx> wrote: > > Use this new function to replace repeated same code, no func change. > > Signed-off-by: Alex Shi <alex.shi@xxxxxxxxxxxxxxxxx> > Cc: Johannes Weiner <hannes@xxxxxxxxxxx> > Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> > Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> > Cc: Andrey Ryabinin <aryabinin@xxxxxxxxxxxxx> > Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx> > Cc: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> > Cc: Konstantin Khlebnikov <khlebnikov@xxxxxxxxxxxxxx> > Cc: Hugh Dickins <hughd@xxxxxxxxxx> > Cc: Tejun Heo <tj@xxxxxxxxxx> > Cc: linux-kernel@xxxxxxxxxxxxxxx > Cc: cgroups@xxxxxxxxxxxxxxx > Cc: linux-mm@xxxxxxxxx > --- > include/linux/memcontrol.h | 40 ++++++++++++++++++++++++++++++++++++++++ > mm/mlock.c | 9 +-------- > mm/swap.c | 33 +++++++-------------------------- > mm/vmscan.c | 8 +------- > 4 files changed, 49 insertions(+), 41 deletions(-) > > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h > index 258901021c6c..6e670f991b42 100644 > --- a/include/linux/memcontrol.h > +++ b/include/linux/memcontrol.h > @@ -1313,6 +1313,46 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec, > spin_unlock_irqrestore(&lruvec->lru_lock, flags); > } > > +/* Don't lock again iff page's lruvec locked */ > +static inline struct lruvec *relock_page_lruvec_irq(struct page *page, > + struct lruvec *locked_lruvec) > +{ > + struct pglist_data *pgdat = page_pgdat(page); > + bool locked; > + > + rcu_read_lock(); > + locked = mem_cgroup_page_lruvec(page, pgdat) == locked_lruvec; > + rcu_read_unlock(); > + > + if (locked) > + return locked_lruvec; > + > + if (locked_lruvec) > + unlock_page_lruvec_irq(locked_lruvec); > + > + return lock_page_lruvec_irq(page); > +} > + > +/* Don't lock again iff page's lruvec locked */ > +static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page, > + struct lruvec *locked_lruvec, unsigned long *flags) > +{ > + struct pglist_data *pgdat = page_pgdat(page); > + bool locked; > + > + rcu_read_lock(); > + locked = mem_cgroup_page_lruvec(page, pgdat) == locked_lruvec; > + rcu_read_unlock(); > + > + if (locked) > + return locked_lruvec; > + > + if (locked_lruvec) > + unlock_page_lruvec_irqrestore(locked_lruvec, *flags); > + > + return lock_page_lruvec_irqsave(page, flags); > +} > + So looking these over they seem to be pretty inefficient for what they do. Basically in worst case (locked_lruvec == NULL) you end up calling mem_cgoup_page_lruvec and all the rcu_read_lock/unlock a couple times for a single page. It might make more sense to structure this like: if (locked_lruvec) { if (lruvec_holds_page_lru_lock(page, locked_lruvec)) return locked_lruvec; unlock_page_lruvec_irqrestore(locked_lruvec, *flags); } return lock_page_lruvec_irqsave(page, flags); The other piece that has me scratching my head is that I wonder if we couldn't do this without needing the rcu_read_lock. For example, what if we were to compare the page mem_cgroup pointer to the memcg back pointer stored in the mem_cgroup_per_node? It seems like ordering things this way would significantly reduce the overhead due to the pointer chasing to see if the page is in the locked lruvec or not. > #ifdef CONFIG_CGROUP_WRITEBACK > > struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb); > diff --git a/mm/mlock.c b/mm/mlock.c > index 5d40d259a931..bc2fb3bfbe7a 100644 > --- a/mm/mlock.c > +++ b/mm/mlock.c > @@ -303,17 +303,10 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) > /* Phase 1: page isolation */ > for (i = 0; i < nr; i++) { > struct page *page = pvec->pages[i]; > - struct lruvec *new_lruvec; > > /* block memcg change in mem_cgroup_move_account */ > lock_page_memcg(page); > - new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page)); > - if (new_lruvec != lruvec) { > - if (lruvec) > - unlock_page_lruvec_irq(lruvec); > - lruvec = lock_page_lruvec_irq(page); > - } > - > + lruvec = relock_page_lruvec_irq(page, lruvec); > if (TestClearPageMlocked(page)) { > /* > * We already have pin from follow_page_mask() > diff --git a/mm/swap.c b/mm/swap.c > index 09edac441eb6..6d9c7288f7de 100644 > --- a/mm/swap.c > +++ b/mm/swap.c > @@ -209,19 +209,12 @@ static void pagevec_lru_move_fn(struct pagevec *pvec, > > for (i = 0; i < pagevec_count(pvec); i++) { > struct page *page = pvec->pages[i]; > - struct lruvec *new_lruvec; > > /* block memcg migration during page moving between lru */ > if (!TestClearPageLRU(page)) > continue; > > - new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page)); > - if (lruvec != new_lruvec) { > - if (lruvec) > - unlock_page_lruvec_irqrestore(lruvec, flags); > - lruvec = lock_page_lruvec_irqsave(page, &flags); > - } > - > + lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags); > (*move_fn)(page, lruvec); > > SetPageLRU(page); > @@ -864,17 +857,12 @@ void release_pages(struct page **pages, int nr) > } > > if (PageLRU(page)) { > - struct lruvec *new_lruvec; > - > - new_lruvec = mem_cgroup_page_lruvec(page, > - page_pgdat(page)); > - if (new_lruvec != lruvec) { > - if (lruvec) > - unlock_page_lruvec_irqrestore(lruvec, > - flags); > + struct lruvec *prev_lruvec = lruvec; > + > + lruvec = relock_page_lruvec_irqsave(page, lruvec, > + &flags); > + if (prev_lruvec != lruvec) > lock_batch = 0; > - lruvec = lock_page_lruvec_irqsave(page, &flags); > - } > > __ClearPageLRU(page); > del_page_from_lru_list(page, lruvec, page_off_lru(page)); > @@ -980,15 +968,8 @@ void __pagevec_lru_add(struct pagevec *pvec) > > for (i = 0; i < pagevec_count(pvec); i++) { > struct page *page = pvec->pages[i]; > - struct lruvec *new_lruvec; > - > - new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page)); > - if (lruvec != new_lruvec) { > - if (lruvec) > - unlock_page_lruvec_irqrestore(lruvec, flags); > - lruvec = lock_page_lruvec_irqsave(page, &flags); > - } > > + lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags); > __pagevec_lru_add_fn(page, lruvec); > } > if (lruvec) > diff --git a/mm/vmscan.c b/mm/vmscan.c > index 168c1659e430..bdb53a678e7e 100644 > --- a/mm/vmscan.c > +++ b/mm/vmscan.c > @@ -4292,15 +4292,9 @@ void check_move_unevictable_pages(struct pagevec *pvec) > > for (i = 0; i < pvec->nr; i++) { > struct page *page = pvec->pages[i]; > - struct lruvec *new_lruvec; > > pgscanned++; > - new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page)); > - if (lruvec != new_lruvec) { > - if (lruvec) > - unlock_page_lruvec_irq(lruvec); > - lruvec = lock_page_lruvec_irq(page); > - } > + lruvec = relock_page_lruvec_irq(page, lruvec); > > if (!PageLRU(page) || !PageUnevictable(page)) > continue; > -- > 1.8.3.1 >