On Thu, 3 Jun 2010 11:51:19 +0900, KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx> wrote: > From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx> > > Now, for checking a memcg is under task-account-moving, we do css_tryget() > against mc.to and mc.from. But this is just complicating things. This patch > makes the check easier. > > This patch adds a spinlock to move_charge_struct and guard modification > of mc.to and mc.from. By this, we don't have to think about complicated > races around this not-critical path. > > Changelog: > - removed disable/enable irq. > > Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx> > --- > mm/memcontrol.c | 48 ++++++++++++++++++++++++++++-------------------- > 1 file changed, 28 insertions(+), 20 deletions(-) > > Index: mmotm-2.6.34-May21/mm/memcontrol.c > =================================================================== > --- mmotm-2.6.34-May21.orig/mm/memcontrol.c > +++ mmotm-2.6.34-May21/mm/memcontrol.c > @@ -268,6 +268,7 @@ enum move_type { > > /* "mc" and its members are protected by cgroup_mutex */ > static struct move_charge_struct { > + spinlock_t lock; /* for from, to, moving_task */ > struct mem_cgroup *from; > struct mem_cgroup *to; > unsigned long precharge; > @@ -276,6 +277,7 @@ static struct move_charge_struct { > struct task_struct *moving_task; /* a task moving charges */ > wait_queue_head_t waitq; /* a waitq for other context */ > } mc = { > + .lock = __SPIN_LOCK_UNLOCKED(mc.lock), > .waitq = __WAIT_QUEUE_HEAD_INITIALIZER(mc.waitq), > }; > > @@ -1076,26 +1078,25 @@ static unsigned int get_swappiness(struc > > static bool mem_cgroup_under_move(struct mem_cgroup *mem) > { > - struct mem_cgroup *from = mc.from; > - struct mem_cgroup *to = mc.to; > + struct mem_cgroup *from; > + struct mem_cgroup *to; > bool ret = false; > - > - if (from == mem || to == mem) > - return true; > - > - if (!from || !to || !mem->use_hierarchy) > - return false; > - > - rcu_read_lock(); > - if (css_tryget(&from->css)) { > - ret = css_is_ancestor(&from->css, &mem->css); > - css_put(&from->css); > - } > - if (!ret && css_tryget(&to->css)) { > - ret = css_is_ancestor(&to->css, &mem->css); > + /* > + * Unlike task_move routines, we access mc.to, mc.from not under > + * mutual exclusion by cgroup_mutex. Here, we take spinlock instead. > + */ > + spin_lock(&mc.lock); > + from = mc.from; > + to = mc.to; > + if (!from) > + goto unlock; > + if (from == mem || to == mem > + || (mem->use_hierarchy && css_is_ancestor(&from->css, &mem->css)) > + || (mem->use_hierarchy && css_is_ancestor(&to->css, &mem->css))) > css_put(&to->css); I think this css_put() must be removed too. Except for it, this patch looks good to me. Thank you for cleaning up my dirty code :) Thanks, Daisuke Nishimura. > - } > - rcu_read_unlock(); > + ret = true; > +unlock: > + spin_unlock(&mc.lock); > return ret; > } > > @@ -4447,11 +4448,13 @@ static int mem_cgroup_precharge_mc(struc > > static void mem_cgroup_clear_mc(void) > { > + struct mem_cgroup *from = mc.from; > + struct mem_cgroup *to = mc.to; > + > /* we must uncharge all the leftover precharges from mc.to */ > if (mc.precharge) { > __mem_cgroup_cancel_charge(mc.to, mc.precharge); > mc.precharge = 0; > - memcg_oom_recover(mc.to); > } > /* > * we didn't uncharge from mc.from at mem_cgroup_move_account(), so > @@ -4460,7 +4463,6 @@ static void mem_cgroup_clear_mc(void) > if (mc.moved_charge) { > __mem_cgroup_cancel_charge(mc.from, mc.moved_charge); > mc.moved_charge = 0; > - memcg_oom_recover(mc.from); > } > /* we must fixup refcnts and charges */ > if (mc.moved_swap) { > @@ -4485,9 +4487,13 @@ static void mem_cgroup_clear_mc(void) > > mc.moved_swap = 0; > } > + spin_lock(&mc.lock); > mc.from = NULL; > mc.to = NULL; > mc.moving_task = NULL; > + spin_unlock(&mc.lock); > + memcg_oom_recover(from); > + memcg_oom_recover(to); > wake_up_all(&mc.waitq); > } > > @@ -4516,12 +4522,14 @@ static int mem_cgroup_can_attach(struct > VM_BUG_ON(mc.moved_charge); > VM_BUG_ON(mc.moved_swap); > VM_BUG_ON(mc.moving_task); > + spin_lock(&mc.lock); > mc.from = from; > mc.to = mem; > mc.precharge = 0; > mc.moved_charge = 0; > mc.moved_swap = 0; > mc.moving_task = current; > + spin_unlock(&mc.lock); > > ret = mem_cgroup_precharge_mc(mm); > if (ret) > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxxx For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>