If pages are being migrated from a memcg, then updates to that memcg's page statistics are protected by grabbing a bit spin lock using lock_page_cgroup(). In an upcoming commit memcg dirty page accounting will be updating memcg page accounting (specifically: num writeback pages) from softirq. Avoid a deadlocking nested spin lock attempt by disabling interrupts on the local processor when grabbing the page_cgroup bit_spin_lock in lock_page_cgroup(). This avoids the following deadlock: statistic CPU 0 CPU 1 inc_file_mapped rcu_read_lock start move synchronize_rcu lock_page_cgroup softirq test_clear_page_writeback mem_cgroup_dec_page_stat(NR_WRITEBACK) rcu_read_lock lock_page_cgroup /* deadlock */ unlock_page_cgroup rcu_read_unlock unlock_page_cgroup rcu_read_unlock By disabling interrupts in lock_page_cgroup, nested calls are avoided. The softirq would be delayed until after inc_file_mapped enables interrupts when calling unlock_page_cgroup(). The normal, fast path, of memcg page stat updates typically does not need to call lock_page_cgroup(), so this change does not affect the performance of the common case page accounting. Signed-off-by: Andrea Righi <arighi@xxxxxxxxxxx> Signed-off-by: Greg Thelen <gthelen@xxxxxxxxxx> --- include/linux/page_cgroup.h | 8 +++++- mm/memcontrol.c | 51 +++++++++++++++++++++++++----------------- 2 files changed, 36 insertions(+), 23 deletions(-) diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index b59c298..872f6b1 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -117,14 +117,18 @@ static inline enum zone_type page_cgroup_zid(struct page_cgroup *pc) return page_zonenum(pc->page); } -static inline void lock_page_cgroup(struct page_cgroup *pc) +static inline void lock_page_cgroup(struct page_cgroup *pc, + unsigned long *flags) { + local_irq_save(*flags); bit_spin_lock(PCG_LOCK, &pc->flags); } -static inline void unlock_page_cgroup(struct page_cgroup *pc) +static inline void unlock_page_cgroup(struct page_cgroup *pc, + unsigned long flags) { bit_spin_unlock(PCG_LOCK, &pc->flags); + local_irq_restore(flags); } #else /* CONFIG_CGROUP_MEM_RES_CTLR */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f4259f4..267d774 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1599,6 +1599,7 @@ void mem_cgroup_update_page_stat(struct page *page, struct mem_cgroup *mem; struct page_cgroup *pc = lookup_page_cgroup(page); bool need_unlock = false; + unsigned long flags; if (unlikely(!pc)) return; @@ -1610,7 +1611,7 @@ void mem_cgroup_update_page_stat(struct page *page, /* pc->mem_cgroup is unstable ? */ if (unlikely(mem_cgroup_stealed(mem))) { /* take a lock against to access pc->mem_cgroup */ - lock_page_cgroup(pc); + lock_page_cgroup(pc, &flags); need_unlock = true; mem = pc->mem_cgroup; if (!mem || !PageCgroupUsed(pc)) @@ -1633,7 +1634,7 @@ void mem_cgroup_update_page_stat(struct page *page, out: if (unlikely(need_unlock)) - unlock_page_cgroup(pc); + unlock_page_cgroup(pc, flags); rcu_read_unlock(); return; } @@ -2053,11 +2054,12 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) struct page_cgroup *pc; unsigned short id; swp_entry_t ent; + unsigned long flags; VM_BUG_ON(!PageLocked(page)); pc = lookup_page_cgroup(page); - lock_page_cgroup(pc); + lock_page_cgroup(pc, &flags); if (PageCgroupUsed(pc)) { mem = pc->mem_cgroup; if (mem && !css_tryget(&mem->css)) @@ -2071,7 +2073,7 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) mem = NULL; rcu_read_unlock(); } - unlock_page_cgroup(pc); + unlock_page_cgroup(pc, flags); return mem; } @@ -2084,13 +2086,15 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, struct page_cgroup *pc, enum charge_type ctype) { + unsigned long flags; + /* try_charge() can return NULL to *memcg, taking care of it. */ if (!mem) return; - lock_page_cgroup(pc); + lock_page_cgroup(pc, &flags); if (unlikely(PageCgroupUsed(pc))) { - unlock_page_cgroup(pc); + unlock_page_cgroup(pc, flags); mem_cgroup_cancel_charge(mem); return; } @@ -2120,7 +2124,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, mem_cgroup_charge_statistics(mem, pc, true); - unlock_page_cgroup(pc); + unlock_page_cgroup(pc, flags); /* * "charge_statistics" updated event counter. Then, check it. * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. @@ -2187,12 +2191,13 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge) { int ret = -EINVAL; - lock_page_cgroup(pc); + unsigned long flags; + lock_page_cgroup(pc, &flags); if (PageCgroupUsed(pc) && pc->mem_cgroup == from) { __mem_cgroup_move_account(pc, from, to, uncharge); ret = 0; } - unlock_page_cgroup(pc); + unlock_page_cgroup(pc, flags); /* * check events */ @@ -2298,6 +2303,7 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { int ret; + unsigned long flags; if (mem_cgroup_disabled()) return 0; @@ -2320,12 +2326,12 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, pc = lookup_page_cgroup(page); if (!pc) return 0; - lock_page_cgroup(pc); + lock_page_cgroup(pc, &flags); if (PageCgroupUsed(pc)) { - unlock_page_cgroup(pc); + unlock_page_cgroup(pc, flags); return 0; } - unlock_page_cgroup(pc); + unlock_page_cgroup(pc, flags); } if (unlikely(!mm)) @@ -2511,6 +2517,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) { struct page_cgroup *pc; struct mem_cgroup *mem = NULL; + unsigned long flags; if (mem_cgroup_disabled()) return NULL; @@ -2525,7 +2532,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) if (unlikely(!pc || !PageCgroupUsed(pc))) return NULL; - lock_page_cgroup(pc); + lock_page_cgroup(pc, &flags); mem = pc->mem_cgroup; @@ -2560,7 +2567,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) * special functions. */ - unlock_page_cgroup(pc); + unlock_page_cgroup(pc, flags); /* * even after unlock, we have mem->res.usage here and this memcg * will never be freed. @@ -2576,7 +2583,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) return mem; unlock_out: - unlock_page_cgroup(pc); + unlock_page_cgroup(pc, flags); return NULL; } @@ -2765,12 +2772,13 @@ int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup *mem = NULL; enum charge_type ctype; int ret = 0; + unsigned long flags; if (mem_cgroup_disabled()) return 0; pc = lookup_page_cgroup(page); - lock_page_cgroup(pc); + lock_page_cgroup(pc, &flags); if (PageCgroupUsed(pc)) { mem = pc->mem_cgroup; css_get(&mem->css); @@ -2806,7 +2814,7 @@ int mem_cgroup_prepare_migration(struct page *page, if (PageAnon(page)) SetPageCgroupMigration(pc); } - unlock_page_cgroup(pc); + unlock_page_cgroup(pc, flags); /* * If the page is not charged at this point, * we return here. @@ -2819,9 +2827,9 @@ int mem_cgroup_prepare_migration(struct page *page, css_put(&mem->css);/* drop extra refcnt */ if (ret || *ptr == NULL) { if (PageAnon(page)) { - lock_page_cgroup(pc); + lock_page_cgroup(pc, &flags); ClearPageCgroupMigration(pc); - unlock_page_cgroup(pc); + unlock_page_cgroup(pc, flags); /* * The old page may be fully unmapped while we kept it. */ @@ -2852,6 +2860,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem, { struct page *used, *unused; struct page_cgroup *pc; + unsigned long flags; if (!mem) return; @@ -2871,9 +2880,9 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem, * Clear the flag and check the page should be charged. */ pc = lookup_page_cgroup(oldpage); - lock_page_cgroup(pc); + lock_page_cgroup(pc, &flags); ClearPageCgroupMigration(pc); - unlock_page_cgroup(pc); + unlock_page_cgroup(pc, flags); __mem_cgroup_uncharge_common(unused, MEM_CGROUP_CHARGE_TYPE_FORCE); -- 1.7.1 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxxx For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>