From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx> When we try to enhance page's status update to support other flags, one of problem is updating status from IRQ context. Now, mem_cgroup_update_file_stat() takes lock_page_cgroup() to avoid race with _account move_. IOW, there are no races with charge/uncharge in nature. Considering an update from IRQ context, it seems better to disable IRQ at lock_page_cgroup() to avoid deadlock. But lock_page_cgroup() is used too widerly and adding IRQ disable there makes the performance bad. To avoid the big hammer, this patch adds a new lock for update_stat(). This lock is for mutual execustion of updating stat and accout moving. This adds a new lock to move_account..so, this makes move_account slow. But considering trade-off, I think it's acceptable. A score of moving 8GB anon pages, 8cpu Xeon(3.1GHz) is here. [before patch] (mmotm + optimization patch (#1 in this series) [root@bluextal kamezawa]# time echo 2257 > /cgroup/B/tasks real 0m0.694s user 0m0.000s sys 0m0.683s [After patch] [root@bluextal kamezawa]# time echo 2238 > /cgroup/B/tasks real 0m0.741s user 0m0.000s sys 0m0.730s This moves 8Gbytes == 2048k pages. But no bad effects to codes other than "move". Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx> --- include/linux/page_cgroup.h | 29 +++++++++++++++++++++++++++++ mm/memcontrol.c | 11 +++++++++-- 2 files changed, 38 insertions(+), 2 deletions(-) Index: mmotm-1013/include/linux/page_cgroup.h =================================================================== --- mmotm-1013.orig/include/linux/page_cgroup.h +++ mmotm-1013/include/linux/page_cgroup.h @@ -36,6 +36,7 @@ struct page_cgroup *lookup_page_cgroup(s enum { /* flags for mem_cgroup */ PCG_LOCK, /* page cgroup is locked */ + PCG_LOCK_STATS, /* page cgroup's stat accounting flags are locked */ PCG_CACHE, /* charged as cache */ PCG_USED, /* this object is in use. */ PCG_ACCT_LRU, /* page has been accounted for */ @@ -104,6 +105,34 @@ static inline void unlock_page_cgroup(st bit_spin_unlock(PCG_LOCK, &pc->flags); } +/* + * Because page's status can be updated in IRQ context(PG_writeback) + * we disable IRQ at updating page's stat. + */ +static inline void lock_page_cgroup_stat(struct page_cgroup *pc, + unsigned long *flags) +{ + local_irq_save(*flags); + bit_spin_lock(PCG_LOCK_STATS, &pc->flags); +} + +static inline void __lock_page_cgroup_stat(struct page_cgroup *pc) +{ + bit_spin_lock(PCG_LOCK_STATS, &pc->flags); +} + +static inline void unlock_page_cgroup_stat(struct page_cgroup *pc, + unsigned long *flags) +{ + bit_spin_unlock(PCG_LOCK_STATS, &pc->flags); + local_irq_restore(*flags); +} + +static inline void __unlock_page_cgroup_stat(struct page_cgroup *pc) +{ + bit_spin_unlock(PCG_LOCK_STATS, &pc->flags); +} + #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct page_cgroup; Index: mmotm-1013/mm/memcontrol.c =================================================================== --- mmotm-1013.orig/mm/memcontrol.c +++ mmotm-1013/mm/memcontrol.c @@ -1596,6 +1596,7 @@ static void mem_cgroup_update_file_stat( struct mem_cgroup *mem; struct page_cgroup *pc = lookup_page_cgroup(page); bool need_unlock = false; + unsigned long flags = 0; if (unlikely(!pc)) return; @@ -1607,7 +1608,7 @@ static void mem_cgroup_update_file_stat( /* pc->mem_cgroup is unstable ? */ if (unlikely(mem_cgroup_stealed(mem))) { /* take a lock against to access pc->mem_cgroup */ - lock_page_cgroup(pc); + lock_page_cgroup_stat(pc, &flags); need_unlock = true; mem = pc->mem_cgroup; if (!mem || !PageCgroupUsed(pc)) @@ -1629,7 +1630,7 @@ static void mem_cgroup_update_file_stat( out: if (unlikely(need_unlock)) - unlock_page_cgroup(pc); + unlock_page_cgroup_stat(pc, &flags); rcu_read_unlock(); return; } @@ -2187,12 +2188,18 @@ static int mem_cgroup_move_account(struc struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge) { int ret = -EINVAL; + + /* Avoiding dead-lock with page stat updates via irq context */ + local_irq_disable(); lock_page_cgroup(pc); if (PageCgroupUsed(pc) && pc->mem_cgroup == from) { + __lock_page_cgroup_stat(pc); __mem_cgroup_move_account(pc, from, to, uncharge); + __unlock_page_cgroup_stat(pc); ret = 0; } unlock_page_cgroup(pc); + local_irq_enable(); /* * check events */ -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxxx For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>