>From 94b17cbc95e068a0a841c84fb0345f48a2a27d24 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx> Date: Thu, 2 Feb 2012 11:49:59 +0900 Subject: [PATCH 3/6] memcg: remove PCG_MOVE_LOCK flag from page_cgroup. PCG_MOVE_LOCK is used for bit spinlock to avoid race between overwriting pc->mem_cgroup and page statistics accounting per memcg. This lock helps to avoid the race but the race is very rare because moving tasks between cgroup is not a usual job. So, it seems using 1bit per page is too costly. This patch changes this lock as per-memcg spinlock and removes PCG_MOVE_LOCK. If smaller lock is required, we'll be able to add some hashes but I'd like to start from this. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx> --- include/linux/page_cgroup.h | 19 ------------------- mm/memcontrol.c | 34 ++++++++++++++++++++++++++++++++-- 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 1060292..7a3af74 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -7,7 +7,6 @@ enum { PCG_USED, /* this object is in use. */ PCG_MIGRATION, /* under page migration */ /* flags for mem_cgroup and file and I/O status */ - PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */ PCG_FILE_MAPPED, /* page is accounted as "mapped" */ __NR_PCG_FLAGS, }; @@ -89,24 +88,6 @@ static inline void unlock_page_cgroup(struct page_cgroup *pc) bit_spin_unlock(PCG_LOCK, &pc->flags); } -static inline void move_lock_page_cgroup(struct page_cgroup *pc, - unsigned long *flags) -{ - /* - * We know updates to pc->flags of page cache's stats are from both of - * usual context or IRQ context. Disable IRQ to avoid deadlock. - */ - local_irq_save(*flags); - bit_spin_lock(PCG_MOVE_LOCK, &pc->flags); -} - -static inline void move_unlock_page_cgroup(struct page_cgroup *pc, - unsigned long *flags) -{ - bit_spin_unlock(PCG_MOVE_LOCK, &pc->flags); - local_irq_restore(*flags); -} - #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct page_cgroup; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 4ba0d76..083154d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -280,6 +280,8 @@ struct mem_cgroup { * set > 0 if pages under this cgroup are moving to other cgroup. */ atomic_t moving_account; + /* taken only while moving_account > 0 */ + spinlock_t move_lock; /* * percpu counter. */ @@ -1338,6 +1340,34 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg) return false; } +/* + * Take this lock when + * - a code tries to modify page's memcg while it's USED. + * - a code tries to modify page state accounting in a memcg. + * see mem_cgroup_stealed(), too. + */ +static void move_lock_page_cgroup(struct page_cgroup *pc, unsigned long *flags) +{ + struct mem_cgroup *memcg; + +again: + memcg = pc->mem_cgroup; + spin_lock_irqsave(&memcg->move_lock, *flags); + if (unlikely(pc->mem_cgroup != memcg)) { + spin_unlock_irqrestore(&memcg->move_lock, *flags); + goto again; + } +} + +static void move_unlock_page_cgroup(struct page_cgroup *pc, + unsigned long *flags) +{ + struct mem_cgroup *memcg; + + memcg = pc->mem_cgroup; + spin_unlock_irqrestore(&memcg->move_lock, *flags); +} + /** * mem_cgroup_print_oom_info: Called from OOM with tasklist_lock held in read mode. * @memcg: The memory cgroup that went over limit @@ -2435,8 +2465,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, #ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\ - (1 << PCG_MIGRATION)) +#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MIGRATION)) /* * Because tail pages are not marked as "used", set it. We're under * zone->lru_lock, 'splitting on pmd' and compound_lock. @@ -4923,6 +4952,7 @@ mem_cgroup_create(struct cgroup *cont) atomic_set(&memcg->refcnt, 1); memcg->move_charge_at_immigrate = 0; mutex_init(&memcg->thresholds_lock); + spin_lock_init(&memcg->move_lock); return &memcg->css; free_out: __mem_cgroup_free(memcg); -- 1.7.4.1 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/ Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>