It looks much better than mine. and could replace 'mm/lru: introduce the relock_page_lruvec function' with your author signed. :) BTW, it's the rcu_read_lock cause the will-it-scale/page_fault3 regression which you mentained in another letter? Thanks Alex 在 2020/8/1 上午5:14, alexander.h.duyck@xxxxxxxxx 写道: > From: Alexander Duyck <alexander.h.duyck@xxxxxxxxxxxxxxx> > > When testing for relock we can avoid the need for RCU locking if we simply > compare the page pgdat and memcg pointers versus those that the lruvec is > holding. By doing this we can avoid the extra pointer walks and accesses of > the memory cgroup. > > In addition we can avoid the checks entirely if lruvec is currently NULL. > > Signed-off-by: Alexander Duyck <alexander.h.duyck@xxxxxxxxxxxxxxx> > --- > include/linux/memcontrol.h | 52 +++++++++++++++++++++++++++----------------- > 1 file changed, 32 insertions(+), 20 deletions(-) > > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h > index 6e670f991b42..7a02f00bf3de 100644 > --- a/include/linux/memcontrol.h > +++ b/include/linux/memcontrol.h > @@ -405,6 +405,22 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg, > > struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *); > > +static inline bool lruvec_holds_page_lru_lock(struct page *page, > + struct lruvec *lruvec) > +{ > + pg_data_t *pgdat = page_pgdat(page); > + const struct mem_cgroup *memcg; > + struct mem_cgroup_per_node *mz; > + > + if (mem_cgroup_disabled()) > + return lruvec == &pgdat->__lruvec; > + > + mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); > + memcg = page->mem_cgroup ? : root_mem_cgroup; > + > + return lruvec->pgdat == pgdat && mz->memcg == memcg; > +} > + > struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); > > struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm); > @@ -880,6 +896,14 @@ static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, > return &pgdat->__lruvec; > } > > +static inline bool lruvec_holds_page_lru_lock(struct page *page, > + struct lruvec *lruvec) > +{ > + pg_data_t *pgdat = page_pgdat(page); > + > + return lruvec == &pgdat->__lruvec; > +} > + > static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) > { > return NULL; > @@ -1317,18 +1341,12 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec, > static inline struct lruvec *relock_page_lruvec_irq(struct page *page, > struct lruvec *locked_lruvec) > { > - struct pglist_data *pgdat = page_pgdat(page); > - bool locked; > + if (locked_lruvec) { > + if (lruvec_holds_page_lru_lock(page, locked_lruvec)) > + return locked_lruvec; > > - rcu_read_lock(); > - locked = mem_cgroup_page_lruvec(page, pgdat) == locked_lruvec; > - rcu_read_unlock(); > - > - if (locked) > - return locked_lruvec; > - > - if (locked_lruvec) > unlock_page_lruvec_irq(locked_lruvec); > + } > > return lock_page_lruvec_irq(page); > } > @@ -1337,18 +1355,12 @@ static inline struct lruvec *relock_page_lruvec_irq(struct page *page, > static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page, > struct lruvec *locked_lruvec, unsigned long *flags) > { > - struct pglist_data *pgdat = page_pgdat(page); > - bool locked; > - > - rcu_read_lock(); > - locked = mem_cgroup_page_lruvec(page, pgdat) == locked_lruvec; > - rcu_read_unlock(); > - > - if (locked) > - return locked_lruvec; > + if (locked_lruvec) { > + if (lruvec_holds_page_lru_lock(page, locked_lruvec)) > + return locked_lruvec; > > - if (locked_lruvec) > unlock_page_lruvec_irqrestore(locked_lruvec, *flags); > + } > > return lock_page_lruvec_irqsave(page, flags); > } >