On Thu 29-05-14 12:15:58, Johannes Weiner wrote: > Due to an old optimization to keep expensive res_counter changes at a > minimum, the root_mem_cgroup res_counter is never charged; there is no > limit at that level anyway, and any statistics can be generated on > demand by summing up the counters of all other cgroups. > > However, with per-cpu charge caches, res_counter operations do not > even show up in profiles anymore, so this optimization is no longer > necessary. > > Remove it to simplify the code. > > Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx> OK, I like the resulting code much more. Acked-by: Michal Hocko <mhocko@xxxxxxxx> > --- > mm/memcontrol.c | 149 ++++++++++++++++---------------------------------------- > 1 file changed, 43 insertions(+), 106 deletions(-) > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index 184e67cce4e4..84195a80068c 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -2573,9 +2573,8 @@ static int mem_cgroup_try_charge(struct mem_cgroup *memcg, > unsigned long nr_reclaimed; > unsigned long flags = 0; > unsigned long long size; > + int ret = 0; > > - if (mem_cgroup_is_root(memcg)) > - goto done; > retry: > if (consume_stock(memcg, nr_pages)) > goto done; > @@ -2656,13 +2655,15 @@ nomem: > if (!(gfp_mask & __GFP_NOFAIL)) > return -ENOMEM; > bypass: > - return -EINTR; > + memcg = root_mem_cgroup; > + ret = -EINTR; > + goto retry; > > done_restock: > if (batch > nr_pages) > refill_stock(memcg, batch - nr_pages); > done: > - return 0; > + return ret; > } > > /** > @@ -2702,13 +2703,11 @@ static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm, > static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg, > unsigned int nr_pages) > { > - if (!mem_cgroup_is_root(memcg)) { > - unsigned long bytes = nr_pages * PAGE_SIZE; > + unsigned long bytes = nr_pages * PAGE_SIZE; > > - res_counter_uncharge(&memcg->res, bytes); > - if (do_swap_account) > - res_counter_uncharge(&memcg->memsw, bytes); > - } > + res_counter_uncharge(&memcg->res, bytes); > + if (do_swap_account) > + res_counter_uncharge(&memcg->memsw, bytes); > } > > /* > @@ -2720,9 +2719,6 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg, > { > unsigned long bytes = nr_pages * PAGE_SIZE; > > - if (mem_cgroup_is_root(memcg)) > - return; > - > res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes); > if (do_swap_account) > res_counter_uncharge_until(&memcg->memsw, > @@ -3997,7 +3993,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype, > * replacement page, so leave it alone when phasing out the > * page that is unused after the migration. > */ > - if (!end_migration && !mem_cgroup_is_root(memcg)) > + if (!end_migration) > mem_cgroup_do_uncharge(memcg, nr_pages, ctype); > > return memcg; > @@ -4130,8 +4126,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent) > * We uncharge this because swap is freed. This memcg can > * be obsolete one. We avoid calling css_tryget_online(). > */ > - if (!mem_cgroup_is_root(memcg)) > - res_counter_uncharge(&memcg->memsw, PAGE_SIZE); > + res_counter_uncharge(&memcg->memsw, PAGE_SIZE); > mem_cgroup_swap_statistics(memcg, false); > css_put(&memcg->css); > } > @@ -4825,78 +4820,24 @@ out: > return retval; > } > > - > -static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg, > - enum mem_cgroup_stat_index idx) > -{ > - struct mem_cgroup *iter; > - long val = 0; > - > - /* Per-cpu values can be negative, use a signed accumulator */ > - for_each_mem_cgroup_tree(iter, memcg) > - val += mem_cgroup_read_stat(iter, idx); > - > - if (val < 0) /* race ? */ > - val = 0; > - return val; > -} > - > -static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) > -{ > - u64 val; > - > - if (!mem_cgroup_is_root(memcg)) { > - if (!swap) > - return res_counter_read_u64(&memcg->res, RES_USAGE); > - else > - return res_counter_read_u64(&memcg->memsw, RES_USAGE); > - } > - > - /* > - * Transparent hugepages are still accounted for in MEM_CGROUP_STAT_RSS > - * as well as in MEM_CGROUP_STAT_RSS_HUGE. > - */ > - val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE); > - val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS); > - > - if (swap) > - val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP); > - > - return val << PAGE_SHIFT; > -} > - > static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, > - struct cftype *cft) > + struct cftype *cft) > { > struct mem_cgroup *memcg = mem_cgroup_from_css(css); > - u64 val; > - int name; > - enum res_type type; > - > - type = MEMFILE_TYPE(cft->private); > - name = MEMFILE_ATTR(cft->private); > + enum res_type type = MEMFILE_TYPE(cft->private); > + int name = MEMFILE_ATTR(cft->private); > > switch (type) { > case _MEM: > - if (name == RES_USAGE) > - val = mem_cgroup_usage(memcg, false); > - else > - val = res_counter_read_u64(&memcg->res, name); > - break; > + return res_counter_read_u64(&memcg->res, name); > case _MEMSWAP: > - if (name == RES_USAGE) > - val = mem_cgroup_usage(memcg, true); > - else > - val = res_counter_read_u64(&memcg->memsw, name); > - break; > + return res_counter_read_u64(&memcg->memsw, name); > case _KMEM: > - val = res_counter_read_u64(&memcg->kmem, name); > + return res_counter_read_u64(&memcg->kmem, name); > break; > default: > BUG(); > } > - > - return val; > } > > #ifdef CONFIG_MEMCG_KMEM > @@ -5376,7 +5317,10 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap) > if (!t) > goto unlock; > > - usage = mem_cgroup_usage(memcg, swap); > + if (!swap) > + usage = res_counter_read_u64(&memcg->res, RES_USAGE); > + else > + usage = res_counter_read_u64(&memcg->memsw, RES_USAGE); > > /* > * current_threshold points to threshold just below or equal to usage. > @@ -5468,15 +5412,15 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg, > > mutex_lock(&memcg->thresholds_lock); > > - if (type == _MEM) > + if (type == _MEM) { > thresholds = &memcg->thresholds; > - else if (type == _MEMSWAP) > + usage = res_counter_read_u64(&memcg->res, RES_USAGE); > + } else if (type == _MEMSWAP) { > thresholds = &memcg->memsw_thresholds; > - else > + usage = res_counter_read_u64(&memcg->memsw, RES_USAGE); > + } else > BUG(); > > - usage = mem_cgroup_usage(memcg, type == _MEMSWAP); > - > /* Check if a threshold crossed before adding a new one */ > if (thresholds->primary) > __mem_cgroup_threshold(memcg, type == _MEMSWAP); > @@ -5556,18 +5500,19 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg, > int i, j, size; > > mutex_lock(&memcg->thresholds_lock); > - if (type == _MEM) > + > + if (type == _MEM) { > thresholds = &memcg->thresholds; > - else if (type == _MEMSWAP) > + usage = res_counter_read_u64(&memcg->res, RES_USAGE); > + } else if (type == _MEMSWAP) { > thresholds = &memcg->memsw_thresholds; > - else > + usage = res_counter_read_u64(&memcg->memsw, RES_USAGE); > + } else > BUG(); > > if (!thresholds->primary) > goto unlock; > > - usage = mem_cgroup_usage(memcg, type == _MEMSWAP); > - > /* Check if a threshold crossed before removing */ > __mem_cgroup_threshold(memcg, type == _MEMSWAP); > > @@ -6329,9 +6274,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) > * core guarantees its existence. > */ > } else { > - res_counter_init(&memcg->res, NULL); > - res_counter_init(&memcg->memsw, NULL); > - res_counter_init(&memcg->kmem, NULL); > + res_counter_init(&memcg->res, &root_mem_cgroup->res); > + res_counter_init(&memcg->memsw, &root_mem_cgroup->memsw); > + res_counter_init(&memcg->kmem, &root_mem_cgroup->kmem); > /* > * Deeper hierachy with use_hierarchy == false doesn't make > * much sense so let cgroup subsystem know about this > @@ -6449,11 +6394,6 @@ static int mem_cgroup_do_precharge(unsigned long count) > int batch_count = PRECHARGE_COUNT_AT_ONCE; > struct mem_cgroup *memcg = mc.to; > > - if (mem_cgroup_is_root(memcg)) { > - mc.precharge += count; > - /* we don't need css_get for root */ > - return ret; > - } > /* try to charge at once */ > if (count > 1) { > struct res_counter *dummy; > @@ -6769,21 +6709,18 @@ static void __mem_cgroup_clear_mc(void) > /* we must fixup refcnts and charges */ > if (mc.moved_swap) { > /* uncharge swap account from the old cgroup */ > - if (!mem_cgroup_is_root(mc.from)) > - res_counter_uncharge(&mc.from->memsw, > - PAGE_SIZE * mc.moved_swap); > + res_counter_uncharge(&mc.from->memsw, > + PAGE_SIZE * mc.moved_swap); > > for (i = 0; i < mc.moved_swap; i++) > css_put(&mc.from->css); > > - if (!mem_cgroup_is_root(mc.to)) { > - /* > - * we charged both to->res and to->memsw, so we should > - * uncharge to->res. > - */ > - res_counter_uncharge(&mc.to->res, > - PAGE_SIZE * mc.moved_swap); > - } > + /* > + * we charged both to->res and to->memsw, so we should > + * uncharge to->res. > + */ > + res_counter_uncharge(&mc.to->res, > + PAGE_SIZE * mc.moved_swap); > /* we've already done css_get(mc.to) */ > mc.moved_swap = 0; > } > -- > 1.9.3 > -- Michal Hocko SUSE Labs -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>