On Mon, 24 Aug 2020, Alex Shi wrote: > From: Alexander Duyck <alexander.h.duyck@xxxxxxxxxxxxxxx> > > Use this new function to replace repeated same code, no func change. > > When testing for relock we can avoid the need for RCU locking if we simply > compare the page pgdat and memcg pointers versus those that the lruvec is > holding. By doing this we can avoid the extra pointer walks and accesses of > the memory cgroup. > > In addition we can avoid the checks entirely if lruvec is currently NULL. > > Signed-off-by: Alexander Duyck <alexander.h.duyck@xxxxxxxxxxxxxxx> > Signed-off-by: Alex Shi <alex.shi@xxxxxxxxxxxxxxxxx> Again, I'll wait to see __munlock_pagevec() fixed before Acking this one, but that's the only issue. And I've suggested that you use lruvec_holds_page_lru_lock() in mm/vmscan.c move_pages_to_lru(), to replace the uglier and less efficient VM_BUG_ON_PAGE there. Oh, there is one other issue: 0day robot did report (2020-06-19) that sparse doesn't understand relock_page_lruvec*(): I've never got around to working out how to write what it needs, conditional __release plus __acquire in some form, I imagine. I've never got into sparse annotations before, I'll give it a try, but if anyone beats me that will be welcome: and there are higher priorities - I do not think you should wait for the sparse warning to be fixed before reposting. > Cc: Johannes Weiner <hannes@xxxxxxxxxxx> > Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> > Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> > Cc: Andrey Ryabinin <aryabinin@xxxxxxxxxxxxx> > Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx> > Cc: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> > Cc: Konstantin Khlebnikov <khlebnikov@xxxxxxxxxxxxxx> > Cc: Hugh Dickins <hughd@xxxxxxxxxx> > Cc: Tejun Heo <tj@xxxxxxxxxx> > Cc: linux-kernel@xxxxxxxxxxxxxxx > Cc: cgroups@xxxxxxxxxxxxxxx > Cc: linux-mm@xxxxxxxxx > --- > include/linux/memcontrol.h | 52 ++++++++++++++++++++++++++++++++++++++++++++++ > mm/mlock.c | 9 +------- > mm/swap.c | 33 +++++++---------------------- > mm/vmscan.c | 8 +------ > 4 files changed, 61 insertions(+), 41 deletions(-) > > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h > index 7b170e9028b5..ee6ef2d8ad52 100644 > --- a/include/linux/memcontrol.h > +++ b/include/linux/memcontrol.h > @@ -488,6 +488,22 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg, > > struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *); > > +static inline bool lruvec_holds_page_lru_lock(struct page *page, > + struct lruvec *lruvec) > +{ > + pg_data_t *pgdat = page_pgdat(page); > + const struct mem_cgroup *memcg; > + struct mem_cgroup_per_node *mz; > + > + if (mem_cgroup_disabled()) > + return lruvec == &pgdat->__lruvec; > + > + mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); > + memcg = page->mem_cgroup ? : root_mem_cgroup; > + > + return lruvec->pgdat == pgdat && mz->memcg == memcg; > +} > + > struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); > > struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm); > @@ -1023,6 +1039,14 @@ static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, > return &pgdat->__lruvec; > } > > +static inline bool lruvec_holds_page_lru_lock(struct page *page, > + struct lruvec *lruvec) > +{ > + pg_data_t *pgdat = page_pgdat(page); > + > + return lruvec == &pgdat->__lruvec; > +} > + > static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) > { > return NULL; > @@ -1469,6 +1493,34 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec, > spin_unlock_irqrestore(&lruvec->lru_lock, flags); > } > > +/* Don't lock again iff page's lruvec locked */ > +static inline struct lruvec *relock_page_lruvec_irq(struct page *page, > + struct lruvec *locked_lruvec) > +{ > + if (locked_lruvec) { > + if (lruvec_holds_page_lru_lock(page, locked_lruvec)) > + return locked_lruvec; > + > + unlock_page_lruvec_irq(locked_lruvec); > + } > + > + return lock_page_lruvec_irq(page); > +} > + > +/* Don't lock again iff page's lruvec locked */ > +static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page, > + struct lruvec *locked_lruvec, unsigned long *flags) > +{ > + if (locked_lruvec) { > + if (lruvec_holds_page_lru_lock(page, locked_lruvec)) > + return locked_lruvec; > + > + unlock_page_lruvec_irqrestore(locked_lruvec, *flags); > + } > + > + return lock_page_lruvec_irqsave(page, flags); > +} > + > #ifdef CONFIG_CGROUP_WRITEBACK > > struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb); > diff --git a/mm/mlock.c b/mm/mlock.c > index 177d2588e863..0448409184e3 100644 > --- a/mm/mlock.c > +++ b/mm/mlock.c > @@ -302,17 +302,10 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) > /* Phase 1: page isolation */ > for (i = 0; i < nr; i++) { > struct page *page = pvec->pages[i]; > - struct lruvec *new_lruvec; > > /* block memcg change in mem_cgroup_move_account */ > lock_page_memcg(page); > - new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page)); > - if (new_lruvec != lruvec) { > - if (lruvec) > - unlock_page_lruvec_irq(lruvec); > - lruvec = lock_page_lruvec_irq(page); > - } > - > + lruvec = relock_page_lruvec_irq(page, lruvec); > if (TestClearPageMlocked(page)) { > /* > * We already have pin from follow_page_mask() > diff --git a/mm/swap.c b/mm/swap.c > index b67959b701c0..2ac78e8fab71 100644 > --- a/mm/swap.c > +++ b/mm/swap.c > @@ -209,19 +209,12 @@ static void pagevec_lru_move_fn(struct pagevec *pvec, > > for (i = 0; i < pagevec_count(pvec); i++) { > struct page *page = pvec->pages[i]; > - struct lruvec *new_lruvec; > > /* block memcg migration during page moving between lru */ > if (!TestClearPageLRU(page)) > continue; > > - new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page)); > - if (lruvec != new_lruvec) { > - if (lruvec) > - unlock_page_lruvec_irqrestore(lruvec, flags); > - lruvec = lock_page_lruvec_irqsave(page, &flags); > - } > - > + lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags); > (*move_fn)(page, lruvec); > > SetPageLRU(page); > @@ -865,17 +858,12 @@ void release_pages(struct page **pages, int nr) > } > > if (PageLRU(page)) { > - struct lruvec *new_lruvec; > - > - new_lruvec = mem_cgroup_page_lruvec(page, > - page_pgdat(page)); > - if (new_lruvec != lruvec) { > - if (lruvec) > - unlock_page_lruvec_irqrestore(lruvec, > - flags); > + struct lruvec *prev_lruvec = lruvec; > + > + lruvec = relock_page_lruvec_irqsave(page, lruvec, > + &flags); > + if (prev_lruvec != lruvec) > lock_batch = 0; > - lruvec = lock_page_lruvec_irqsave(page, &flags); > - } > > VM_BUG_ON_PAGE(!PageLRU(page), page); > __ClearPageLRU(page); > @@ -982,15 +970,8 @@ void __pagevec_lru_add(struct pagevec *pvec) > > for (i = 0; i < pagevec_count(pvec); i++) { > struct page *page = pvec->pages[i]; > - struct lruvec *new_lruvec; > - > - new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page)); > - if (lruvec != new_lruvec) { > - if (lruvec) > - unlock_page_lruvec_irqrestore(lruvec, flags); > - lruvec = lock_page_lruvec_irqsave(page, &flags); > - } > > + lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags); > __pagevec_lru_add_fn(page, lruvec); > } > if (lruvec) > diff --git a/mm/vmscan.c b/mm/vmscan.c > index 789444ae4c88..2c94790d4cb1 100644 > --- a/mm/vmscan.c > +++ b/mm/vmscan.c > @@ -4280,15 +4280,9 @@ void check_move_unevictable_pages(struct pagevec *pvec) > > for (i = 0; i < pvec->nr; i++) { > struct page *page = pvec->pages[i]; > - struct lruvec *new_lruvec; > > pgscanned++; > - new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page)); > - if (lruvec != new_lruvec) { > - if (lruvec) > - unlock_page_lruvec_irq(lruvec); > - lruvec = lock_page_lruvec_irq(page); > - } > + lruvec = relock_page_lruvec_irq(page, lruvec); > > if (!PageLRU(page) || !PageUnevictable(page)) > continue; > -- > 1.8.3.1