>From feb695347b46885b5c785892271e19e503a43aa4 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx> Date: Thu, 15 Dec 2011 14:00:15 +0900 Subject: [PATCH 5/5] memcg: remove PCG_MOVE_LOCK. PCG_MOVE_LOCK was introduced to prevent pc->mem_cgroup from being overwritten by move_account() while updating the states. lock_page_cgroup() cannot be used because page status can be updated by IRQ context. Considering again, in most case, this lock will not be held. It's held when a task is moving between cgroups (with account_move flag) and rmdir() is called. So, it seems that having 1bit per page for avoiding this race is not very good. This patch adds a hash of rwlock to update page status. Hash size is NR_CPUS and we have rwlock array mem_cgroup_move_lock[hash(page)] - move account need to take wlock for the page with IRQ off. - stat updates need to take rwlock. After this patch, PCG_MOVE_LOCK bit can go away. And, at updating page stats, the caller doesn't need to call irq_disable. In old path - disable_irq -> bit_spin_lock per page In new path - read_lock on hashed rwlock. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx> --- include/linux/page_cgroup.h | 20 ------------- mm/memcontrol.c | 66 +++++++++++++++++++++++++++++++++++++++---- 2 files changed, 60 insertions(+), 26 deletions(-) diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 86967ed..f9441ca 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -6,8 +6,6 @@ enum { PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */ PCG_USED, /* this object is in use. */ PCG_MIGRATION, /* under page migration */ - /* flags for mem_cgroup and file and I/O status */ - PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */ __NR_PCG_FLAGS, }; @@ -84,24 +82,6 @@ static inline void unlock_page_cgroup(struct page_cgroup *pc) bit_spin_unlock(PCG_LOCK, &pc->flags); } -static inline void move_lock_page_cgroup(struct page_cgroup *pc, - unsigned long *flags) -{ - /* - * We know updates to pc->flags of page cache's stats are from both of - * usual context or IRQ context. Disable IRQ to avoid deadlock. - */ - local_irq_save(*flags); - bit_spin_lock(PCG_MOVE_LOCK, &pc->flags); -} - -static inline void move_unlock_page_cgroup(struct page_cgroup *pc, - unsigned long *flags) -{ - bit_spin_unlock(PCG_MOVE_LOCK, &pc->flags); - local_irq_restore(*flags); -} - #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct page_cgroup; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c9b1131..66e03ad 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -307,6 +307,61 @@ struct mem_cgroup { #endif }; +/* locks for account moving between memcg. */ +#define NR_MOVE_ACCOUNT_LOCKS (NR_CPUS) +static rwlock_t move_account_locks[NR_MOVE_ACCOUNT_LOCKS]; +#define move_account_hash(page) ((page_to_pfn(page) % NR_MOVE_ACCOUNT_LOCKS)) + +static rwlock_t *move_account_rwlock(struct page *page) +{ + int hnum = page_to_pfn(page) % NR_MOVE_ACCOUNT_LOCKS; + + return &move_account_locks[hnum]; +} + +/* + * Read move lock is taken at updating statistics. + */ +static void mem_cgroup_move_account_rlock(struct page *page) +{ + rwlock_t *lock = move_account_rwlock(page); + + read_lock(lock); +} + +static void mem_cgroup_move_account_runlock(struct page *page) +{ + rwlock_t *lock = move_account_rwlock(page); + + read_unlock(lock); +} +/* + * Write move lock is taken when moving task account information. + */ +static void mem_cgroup_move_account_wlock(struct page *page, unsigned long *flags) +{ + rwlock_t *lock = move_account_rwlock(page); + + write_lock_irqsave(lock, *flags); +} + +static void mem_cgroup_move_account_wunlock(struct page *page, unsigned long *flags) +{ + rwlock_t *lock = move_account_rwlock(page); + + write_unlock_irqrestore(lock, *flags); +} + +static int __init mem_cgroup_rwlock_init(void) +{ + int num; + + for (num = 0; num < NR_MOVE_ACCOUNT_LOCKS; num++) + rwlock_init(&move_account_locks[num]); + return 0; +} +late_initcall(mem_cgroup_rwlock_init); + /* Stuffs for move charges at task migration. */ /* * Types of charges to be moved. "move_charge_at_immitgrate" is treated as a @@ -1846,7 +1901,7 @@ bool __mem_cgroup_begin_update_page_stats(struct page *page, unsigned long *flag if (!memcg || !PageCgroupUsed(pc)) goto out; if (unlikely(mem_cgroup_stealed(memcg)) || PageTransHuge(page)) { - move_lock_page_cgroup(pc, flags); + mem_cgroup_move_account_rlock(page); need_unlock = true; } out: @@ -1861,7 +1916,7 @@ void __mem_cgroup_end_update_page_stats(struct page *page, bool locked, if (unlikely(locked)) { pc = lookup_page_cgroup(page); - move_unlock_page_cgroup(pc, flags); + mem_cgroup_move_account_runlock(page); } rcu_read_unlock(); } @@ -2414,8 +2469,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, #ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\ - (1 << PCG_MIGRATION)) +#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MIGRATION)) /* * Because tail pages are not marked as "used", set it. We're under * zone->lru_lock, 'splitting on pmd' and compound_lock. @@ -2494,7 +2548,7 @@ static int mem_cgroup_move_account(struct page *page, if (!PageCgroupUsed(pc) || pc->mem_cgroup != from) goto unlock; - move_lock_page_cgroup(pc, &flags); + mem_cgroup_move_account_wlock(page, &flags); if (page_mapped(page) && !PageAnon(page)) { /* Update mapped_file data for mem_cgroup */ @@ -2518,7 +2572,7 @@ static int mem_cgroup_move_account(struct page *page, * guaranteed that "to" is never removed. So, we don't check rmdir * status here. */ - move_unlock_page_cgroup(pc, &flags); + mem_cgroup_move_account_wunlock(page, &flags); ret = 0; unlock: unlock_page_cgroup(pc); -- 1.7.4.1 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/ Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>