On Mon 16-12-13 00:36:05, Hugh Dickins wrote: [...] OK, I went through the patch and it looks good except for suspicious ctrl->lock handling in swap_cgroup_reassign (see below). I am just suggesting to split it into 4 parts * swap_cgroup_mutex -> swap_cgroup_lock * swapon cleanup * drop irqsave when taking ctrl->lock * mem_cgroup_reparent_swap but I will leave the split up to you. Just make sure that the fix is a separate patch, please. [...] > --- 3.13-rc4/mm/page_cgroup.c 2013-02-18 15:58:34.000000000 -0800 > +++ linux/mm/page_cgroup.c 2013-12-15 14:34:36.312485960 -0800 > @@ -322,7 +322,8 @@ void __meminit pgdat_page_cgroup_init(st > > #ifdef CONFIG_MEMCG_SWAP > > -static DEFINE_MUTEX(swap_cgroup_mutex); > +static DEFINE_SPINLOCK(swap_cgroup_lock); > + This one is worth a separate patch IMO. > struct swap_cgroup_ctrl { > struct page **map; > unsigned long length; > @@ -353,14 +354,11 @@ struct swap_cgroup { > /* > * allocate buffer for swap_cgroup. > */ > -static int swap_cgroup_prepare(int type) > +static int swap_cgroup_prepare(struct swap_cgroup_ctrl *ctrl) > { > struct page *page; > - struct swap_cgroup_ctrl *ctrl; > unsigned long idx, max; > > - ctrl = &swap_cgroup_ctrl[type]; > - > for (idx = 0; idx < ctrl->length; idx++) { > page = alloc_page(GFP_KERNEL | __GFP_ZERO); > if (!page) This with swap_cgroup_swapon should be in a separate patch as a cleanup. > @@ -407,18 +405,17 @@ unsigned short swap_cgroup_cmpxchg(swp_e > { > struct swap_cgroup_ctrl *ctrl; > struct swap_cgroup *sc; > - unsigned long flags; > unsigned short retval; > > sc = lookup_swap_cgroup(ent, &ctrl); > > - spin_lock_irqsave(&ctrl->lock, flags); > + spin_lock(&ctrl->lock); > retval = sc->id; > if (retval == old) > sc->id = new; > else > retval = 0; > - spin_unlock_irqrestore(&ctrl->lock, flags); > + spin_unlock(&ctrl->lock); > return retval; > } > > @@ -435,14 +432,13 @@ unsigned short swap_cgroup_record(swp_en > struct swap_cgroup_ctrl *ctrl; > struct swap_cgroup *sc; > unsigned short old; > - unsigned long flags; > > sc = lookup_swap_cgroup(ent, &ctrl); > > - spin_lock_irqsave(&ctrl->lock, flags); > + spin_lock(&ctrl->lock); > old = sc->id; > sc->id = id; > - spin_unlock_irqrestore(&ctrl->lock, flags); > + spin_unlock(&ctrl->lock); > > return old; > } I would prefer these two in a separate patch as well. I have no idea why these were IRQ aware as this was never needed AFAICS. e9e58a4ec3b10 is not very specific... > @@ -451,19 +447,60 @@ unsigned short swap_cgroup_record(swp_en > * lookup_swap_cgroup_id - lookup mem_cgroup id tied to swap entry > * @ent: swap entry to be looked up. > * > - * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID) > + * Returns ID of mem_cgroup at success. 0 at failure. (0 is invalid ID) > */ > unsigned short lookup_swap_cgroup_id(swp_entry_t ent) > { > return lookup_swap_cgroup(ent, NULL)->id; > } > > +/** > + * swap_cgroup_reassign - assign all old entries to new (before old is freed). > + * @old: id of emptied memcg whose entries are now to be reassigned > + * @new: id of parent memcg to which those entries are to be assigned > + * > + * Returns number of entries reassigned, for debugging or for statistics. > + */ > +long swap_cgroup_reassign(unsigned short old, unsigned short new) > +{ > + long reassigned = 0; > + int type; > + > + for (type = 0; type < MAX_SWAPFILES; type++) { > + struct swap_cgroup_ctrl *ctrl = &swap_cgroup_ctrl[type]; > + unsigned long idx; > + > + for (idx = 0; idx < ACCESS_ONCE(ctrl->length); idx++) { > + struct swap_cgroup *sc, *scend; > + > + spin_lock(&swap_cgroup_lock); > + if (idx >= ACCESS_ONCE(ctrl->length)) > + goto unlock; > + sc = page_address(ctrl->map[idx]); > + for (scend = sc + SC_PER_PAGE; sc < scend; sc++) { > + if (sc->id != old) > + continue; Is this safe? What prevents from race when id is set to old? > + spin_lock(&ctrl->lock); > + if (sc->id == old) { Also it seems that compiler is free to optimize this test away, no? You need ACCESS_ONCE here as well, I guess. > + sc->id = new; > + reassigned++; > + } > + spin_unlock(&ctrl->lock); > + } > +unlock: > + spin_unlock(&swap_cgroup_lock); > + cond_resched(); > + } > + } > + return reassigned; > +} > + > int swap_cgroup_swapon(int type, unsigned long max_pages) > { > void *array; > unsigned long array_size; > unsigned long length; > - struct swap_cgroup_ctrl *ctrl; > + struct swap_cgroup_ctrl ctrl; > > if (!do_swap_account) > return 0; [...] -- Michal Hocko SUSE Labs -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>