KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx> writes: > On Sun, 3 Oct 2010 23:57:59 -0700 > Greg Thelen <gthelen@xxxxxxxxxx> wrote: > >> If pages are being migrated from a memcg, then updates to that >> memcg's page statistics are protected by grabbing a bit spin lock >> using lock_page_cgroup(). In an upcoming commit memcg dirty page >> accounting will be updating memcg page accounting (specifically: >> num writeback pages) from softirq. Avoid a deadlocking nested >> spin lock attempt by disabling interrupts on the local processor >> when grabbing the page_cgroup bit_spin_lock in lock_page_cgroup(). >> This avoids the following deadlock: >> statistic >> CPU 0 CPU 1 >> inc_file_mapped >> rcu_read_lock >> start move >> synchronize_rcu >> lock_page_cgroup >> softirq >> test_clear_page_writeback >> mem_cgroup_dec_page_stat(NR_WRITEBACK) >> rcu_read_lock >> lock_page_cgroup /* deadlock */ >> unlock_page_cgroup >> rcu_read_unlock >> unlock_page_cgroup >> rcu_read_unlock >> >> By disabling interrupts in lock_page_cgroup, nested calls >> are avoided. The softirq would be delayed until after inc_file_mapped >> enables interrupts when calling unlock_page_cgroup(). >> >> The normal, fast path, of memcg page stat updates typically >> does not need to call lock_page_cgroup(), so this change does >> not affect the performance of the common case page accounting. >> >> Signed-off-by: Andrea Righi <arighi@xxxxxxxxxxx> >> Signed-off-by: Greg Thelen <gthelen@xxxxxxxxxx> > > Nice Catch! > > But..hmm this wasn't necessary for FILE_MAPPED but necesary for new > statistics, right ? (This affects the order of patches.) This patch (disabling interrupts) is not needed until later patches (in this series) update memcg statistics from softirq. If we only had FILE_MAPPED, then this patch would not be needed. I placed this patch before the following dependent patches that need it. The opposite order seemed wrong because it would introduce the possibility of the deadlock until this patch was applied. By having this patch come first there should be no way to apply the series in order and see the mentioned deadlock. > Anyway > > Acked-By: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx> > > >> --- >> include/linux/page_cgroup.h | 8 +++++- >> mm/memcontrol.c | 51 +++++++++++++++++++++++++----------------- >> 2 files changed, 36 insertions(+), 23 deletions(-) >> >> diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h >> index b59c298..872f6b1 100644 >> --- a/include/linux/page_cgroup.h >> +++ b/include/linux/page_cgroup.h >> @@ -117,14 +117,18 @@ static inline enum zone_type page_cgroup_zid(struct page_cgroup *pc) >> return page_zonenum(pc->page); >> } >> >> -static inline void lock_page_cgroup(struct page_cgroup *pc) >> +static inline void lock_page_cgroup(struct page_cgroup *pc, >> + unsigned long *flags) >> { >> + local_irq_save(*flags); >> bit_spin_lock(PCG_LOCK, &pc->flags); >> } >> >> -static inline void unlock_page_cgroup(struct page_cgroup *pc) >> +static inline void unlock_page_cgroup(struct page_cgroup *pc, >> + unsigned long flags) >> { >> bit_spin_unlock(PCG_LOCK, &pc->flags); >> + local_irq_restore(flags); >> } >> >> #else /* CONFIG_CGROUP_MEM_RES_CTLR */ >> diff --git a/mm/memcontrol.c b/mm/memcontrol.c >> index f4259f4..267d774 100644 >> --- a/mm/memcontrol.c >> +++ b/mm/memcontrol.c >> @@ -1599,6 +1599,7 @@ void mem_cgroup_update_page_stat(struct page *page, >> struct mem_cgroup *mem; >> struct page_cgroup *pc = lookup_page_cgroup(page); >> bool need_unlock = false; >> + unsigned long flags; >> >> if (unlikely(!pc)) >> return; >> @@ -1610,7 +1611,7 @@ void mem_cgroup_update_page_stat(struct page *page, >> /* pc->mem_cgroup is unstable ? */ >> if (unlikely(mem_cgroup_stealed(mem))) { >> /* take a lock against to access pc->mem_cgroup */ >> - lock_page_cgroup(pc); >> + lock_page_cgroup(pc, &flags); >> need_unlock = true; >> mem = pc->mem_cgroup; >> if (!mem || !PageCgroupUsed(pc)) >> @@ -1633,7 +1634,7 @@ void mem_cgroup_update_page_stat(struct page *page, >> >> out: >> if (unlikely(need_unlock)) >> - unlock_page_cgroup(pc); >> + unlock_page_cgroup(pc, flags); >> rcu_read_unlock(); >> return; >> } >> @@ -2053,11 +2054,12 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) >> struct page_cgroup *pc; >> unsigned short id; >> swp_entry_t ent; >> + unsigned long flags; >> >> VM_BUG_ON(!PageLocked(page)); >> >> pc = lookup_page_cgroup(page); >> - lock_page_cgroup(pc); >> + lock_page_cgroup(pc, &flags); >> if (PageCgroupUsed(pc)) { >> mem = pc->mem_cgroup; >> if (mem && !css_tryget(&mem->css)) >> @@ -2071,7 +2073,7 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) >> mem = NULL; >> rcu_read_unlock(); >> } >> - unlock_page_cgroup(pc); >> + unlock_page_cgroup(pc, flags); >> return mem; >> } >> >> @@ -2084,13 +2086,15 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, >> struct page_cgroup *pc, >> enum charge_type ctype) >> { >> + unsigned long flags; >> + >> /* try_charge() can return NULL to *memcg, taking care of it. */ >> if (!mem) >> return; >> >> - lock_page_cgroup(pc); >> + lock_page_cgroup(pc, &flags); >> if (unlikely(PageCgroupUsed(pc))) { >> - unlock_page_cgroup(pc); >> + unlock_page_cgroup(pc, flags); >> mem_cgroup_cancel_charge(mem); >> return; >> } >> @@ -2120,7 +2124,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, >> >> mem_cgroup_charge_statistics(mem, pc, true); >> >> - unlock_page_cgroup(pc); >> + unlock_page_cgroup(pc, flags); >> /* >> * "charge_statistics" updated event counter. Then, check it. >> * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. >> @@ -2187,12 +2191,13 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, >> struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge) >> { >> int ret = -EINVAL; >> - lock_page_cgroup(pc); >> + unsigned long flags; >> + lock_page_cgroup(pc, &flags); >> if (PageCgroupUsed(pc) && pc->mem_cgroup == from) { >> __mem_cgroup_move_account(pc, from, to, uncharge); >> ret = 0; >> } >> - unlock_page_cgroup(pc); >> + unlock_page_cgroup(pc, flags); >> /* >> * check events >> */ >> @@ -2298,6 +2303,7 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, >> gfp_t gfp_mask) >> { >> int ret; >> + unsigned long flags; >> >> if (mem_cgroup_disabled()) >> return 0; >> @@ -2320,12 +2326,12 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, >> pc = lookup_page_cgroup(page); >> if (!pc) >> return 0; >> - lock_page_cgroup(pc); >> + lock_page_cgroup(pc, &flags); >> if (PageCgroupUsed(pc)) { >> - unlock_page_cgroup(pc); >> + unlock_page_cgroup(pc, flags); >> return 0; >> } >> - unlock_page_cgroup(pc); >> + unlock_page_cgroup(pc, flags); >> } >> >> if (unlikely(!mm)) >> @@ -2511,6 +2517,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) >> { >> struct page_cgroup *pc; >> struct mem_cgroup *mem = NULL; >> + unsigned long flags; >> >> if (mem_cgroup_disabled()) >> return NULL; >> @@ -2525,7 +2532,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) >> if (unlikely(!pc || !PageCgroupUsed(pc))) >> return NULL; >> >> - lock_page_cgroup(pc); >> + lock_page_cgroup(pc, &flags); >> >> mem = pc->mem_cgroup; >> >> @@ -2560,7 +2567,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) >> * special functions. >> */ >> >> - unlock_page_cgroup(pc); >> + unlock_page_cgroup(pc, flags); >> /* >> * even after unlock, we have mem->res.usage here and this memcg >> * will never be freed. >> @@ -2576,7 +2583,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) >> return mem; >> >> unlock_out: >> - unlock_page_cgroup(pc); >> + unlock_page_cgroup(pc, flags); >> return NULL; >> } >> >> @@ -2765,12 +2772,13 @@ int mem_cgroup_prepare_migration(struct page *page, >> struct mem_cgroup *mem = NULL; >> enum charge_type ctype; >> int ret = 0; >> + unsigned long flags; >> >> if (mem_cgroup_disabled()) >> return 0; >> >> pc = lookup_page_cgroup(page); >> - lock_page_cgroup(pc); >> + lock_page_cgroup(pc, &flags); >> if (PageCgroupUsed(pc)) { >> mem = pc->mem_cgroup; >> css_get(&mem->css); >> @@ -2806,7 +2814,7 @@ int mem_cgroup_prepare_migration(struct page *page, >> if (PageAnon(page)) >> SetPageCgroupMigration(pc); >> } >> - unlock_page_cgroup(pc); >> + unlock_page_cgroup(pc, flags); >> /* >> * If the page is not charged at this point, >> * we return here. >> @@ -2819,9 +2827,9 @@ int mem_cgroup_prepare_migration(struct page *page, >> css_put(&mem->css);/* drop extra refcnt */ >> if (ret || *ptr == NULL) { >> if (PageAnon(page)) { >> - lock_page_cgroup(pc); >> + lock_page_cgroup(pc, &flags); >> ClearPageCgroupMigration(pc); >> - unlock_page_cgroup(pc); >> + unlock_page_cgroup(pc, flags); >> /* >> * The old page may be fully unmapped while we kept it. >> */ >> @@ -2852,6 +2860,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem, >> { >> struct page *used, *unused; >> struct page_cgroup *pc; >> + unsigned long flags; >> >> if (!mem) >> return; >> @@ -2871,9 +2880,9 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem, >> * Clear the flag and check the page should be charged. >> */ >> pc = lookup_page_cgroup(oldpage); >> - lock_page_cgroup(pc); >> + lock_page_cgroup(pc, &flags); >> ClearPageCgroupMigration(pc); >> - unlock_page_cgroup(pc); >> + unlock_page_cgroup(pc, flags); >> >> __mem_cgroup_uncharge_common(unused, MEM_CGROUP_CHARGE_TYPE_FORCE); >> >> -- >> 1.7.1 >> >> -- >> To unsubscribe, send a message with 'unsubscribe linux-mm' in >> the body to majordomo@xxxxxxxxxx For more info on Linux MM, >> see: http://www.linux-mm.org/ . >> Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a> >> >> -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxxx For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>