This is alternative solution for problem addressed in commit 815744d75152 ("mm: memcontrol: don't batch updates of local VM stats and events"). Instead of adding second set of percpu counters which wastes memory and slows down showing statistics in cgroup-v1 this patch use two arrays of atomic counters: local and nested statistics. Then update has the same amount of atomic operations: local update and one nested for each parent cgroup. Readers of hierarchical statistics have to sum two atomics which isn't a big deal. All updates are still batched using one set of percpu counters. Signed-off-by: Konstantin Khlebnikov <khlebnikov@xxxxxxxxxxxxxx> --- include/linux/memcontrol.h | 19 +++++++---------- mm/memcontrol.c | 48 +++++++++++++++++++------------------------- 2 files changed, 29 insertions(+), 38 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 44c41462be33..4dd75d50c200 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -269,16 +269,16 @@ struct mem_cgroup { atomic_t moving_account; struct task_struct *move_lock_task; - /* Legacy local VM stats and events */ - struct memcg_vmstats_percpu __percpu *vmstats_local; - /* Subtree VM stats and events (batched updates) */ struct memcg_vmstats_percpu __percpu *vmstats_percpu; MEMCG_PADDING(_pad2_); - atomic_long_t vmstats[MEMCG_NR_STAT]; - atomic_long_t vmevents[NR_VM_EVENT_ITEMS]; + atomic_long_t vmstats_local[MEMCG_NR_STAT]; + atomic_long_t vmstats_nested[MEMCG_NR_STAT]; + + atomic_long_t vmevents_local[NR_VM_EVENT_ITEMS]; + atomic_long_t vmevents_nested[NR_VM_EVENT_ITEMS]; /* memory.events */ atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS]; @@ -557,7 +557,8 @@ void unlock_page_memcg(struct page *page); */ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) { - long x = atomic_long_read(&memcg->vmstats[idx]); + long x = atomic_long_read(&memcg->vmstats_local[idx]) + + atomic_long_read(&memcg->vmstats_nested[idx]); #ifdef CONFIG_SMP if (x < 0) x = 0; @@ -572,11 +573,7 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) static inline unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx) { - long x = 0; - int cpu; - - for_each_possible_cpu(cpu) - x += per_cpu(memcg->vmstats_local->stat[idx], cpu); + long x = atomic_long_read(&memcg->vmstats_local[idx]); #ifdef CONFIG_SMP if (x < 0) x = 0; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 06d33dfc4ec4..97debc8e4120 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -695,14 +695,13 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val) if (mem_cgroup_disabled()) return; - __this_cpu_add(memcg->vmstats_local->stat[idx], val); - x = val + __this_cpu_read(memcg->vmstats_percpu->stat[idx]); if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { struct mem_cgroup *mi; - for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) - atomic_long_add(x, &mi->vmstats[idx]); + atomic_long_add(x, &memcg->vmstats_local[idx]); + for (mi = memcg; (mi = parent_mem_cgroup(mi)); ) + atomic_long_add(x, &mi->vmstats_nested[idx]); x = 0; } __this_cpu_write(memcg->vmstats_percpu->stat[idx], x); @@ -777,14 +776,13 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, if (mem_cgroup_disabled()) return; - __this_cpu_add(memcg->vmstats_local->events[idx], count); - x = count + __this_cpu_read(memcg->vmstats_percpu->events[idx]); if (unlikely(x > MEMCG_CHARGE_BATCH)) { struct mem_cgroup *mi; - for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) - atomic_long_add(x, &mi->vmevents[idx]); + atomic_long_add(x, &memcg->vmevents_local[idx]); + for (mi = memcg; (mi = parent_mem_cgroup(mi)); ) + atomic_long_add(x, &mi->vmevents_nested[idx]); x = 0; } __this_cpu_write(memcg->vmstats_percpu->events[idx], x); @@ -792,17 +790,13 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, static unsigned long memcg_events(struct mem_cgroup *memcg, int event) { - return atomic_long_read(&memcg->vmevents[event]); + return atomic_long_read(&memcg->vmevents_local[event]) + + atomic_long_read(&memcg->vmevents_nested[event]); } static unsigned long memcg_events_local(struct mem_cgroup *memcg, int event) { - long x = 0; - int cpu; - - for_each_possible_cpu(cpu) - x += per_cpu(memcg->vmstats_local->events[event], cpu); - return x; + return atomic_long_read(&memcg->vmevents_local[event]); } static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, @@ -2257,9 +2251,11 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu) long x; x = this_cpu_xchg(memcg->vmstats_percpu->stat[i], 0); - if (x) - for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) - atomic_long_add(x, &mi->vmstats[i]); + if (x) { + atomic_long_add(x, &memcg->vmstats_local[i]); + for (mi = memcg; (mi = parent_mem_cgroup(mi)); ) + atomic_long_add(x, &mi->vmstats_nested[i]); + } if (i >= NR_VM_NODE_STAT_ITEMS) continue; @@ -2280,9 +2276,11 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu) long x; x = this_cpu_xchg(memcg->vmstats_percpu->events[i], 0); - if (x) - for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) - atomic_long_add(x, &mi->vmevents[i]); + if (x) { + atomic_long_add(x, &memcg->vmevents_local[i]); + for (mi = memcg; (mi = parent_mem_cgroup(mi)); ) + atomic_long_add(x, &mi->vmevents_nested[i]); + } } } @@ -4085,7 +4083,8 @@ struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb) */ static unsigned long memcg_exact_page_state(struct mem_cgroup *memcg, int idx) { - long x = atomic_long_read(&memcg->vmstats[idx]); + long x = atomic_long_read(&memcg->vmstats_local[idx]) + + atomic_long_read(&memcg->vmstats_nested[idx]); int cpu; for_each_online_cpu(cpu) @@ -4638,7 +4637,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) for_each_node(node) free_mem_cgroup_per_node_info(memcg, node); free_percpu(memcg->vmstats_percpu); - free_percpu(memcg->vmstats_local); kfree(memcg); } @@ -4667,10 +4665,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void) if (memcg->id.id < 0) goto fail; - memcg->vmstats_local = alloc_percpu(struct memcg_vmstats_percpu); - if (!memcg->vmstats_local) - goto fail; - memcg->vmstats_percpu = alloc_percpu(struct memcg_vmstats_percpu); if (!memcg->vmstats_percpu) goto fail;