It's possible to remove a big chunk of the redundant code by making memcg_flush_offline_percpu() to take cpumask as an argument and flush percpu data on all cpus belonging to the mask instead of all possible cpus. Then memcg_hotplug_cpu_dead() can call it with a single CPU bit set. This approach allows to remove all duplicated code, but safe the performance optimization made in memcg_flush_offline_percpu(): only one atomic operation per data entry. for_each_data_entry() for_each_cpu(cpu. cpumask) sum_events() flush() Otherwise it would be one atomic operation per data entry per cpu: for_each_cpu(cpu) for_each_data_entry() flush() Signed-off-by: Roman Gushchin <guro@xxxxxx> --- mm/memcontrol.c | 61 ++++++++----------------------------------------- 1 file changed, 9 insertions(+), 52 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 0f18bf2afea8..92c80275d5eb 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2122,11 +2122,12 @@ static void drain_all_stock(struct mem_cgroup *root_memcg) /* * Flush all per-cpu stats and events into atomics. * Try to minimize the number of atomic writes by gathering data from - * all cpus locally, and then make one atomic update. + * all cpus in cpumask locally, and then make one atomic update. * No locking is required, because no one has an access to * the offlined percpu data. */ -static void memcg_flush_offline_percpu(struct mem_cgroup *memcg) +static void memcg_flush_offline_percpu(struct mem_cgroup *memcg, + const struct cpumask *cpumask) { struct memcg_vmstats_percpu __percpu *vmstats_percpu; struct lruvec_stat __percpu *lruvec_stat_cpu; @@ -2140,7 +2141,7 @@ static void memcg_flush_offline_percpu(struct mem_cgroup *memcg) int nid; x = 0; - for_each_possible_cpu(cpu) + for_each_cpu(cpu, cpumask) x += per_cpu(vmstats_percpu->stat[i], cpu); if (x) atomic_long_add(x, &memcg->vmstats[i]); @@ -2153,7 +2154,7 @@ static void memcg_flush_offline_percpu(struct mem_cgroup *memcg) lruvec_stat_cpu = pn->lruvec_stat_cpu_offlined; x = 0; - for_each_possible_cpu(cpu) + for_each_cpu(cpu, cpumask) x += per_cpu(lruvec_stat_cpu->count[i], cpu); if (x) atomic_long_add(x, &pn->lruvec_stat[i]); @@ -2162,7 +2163,7 @@ static void memcg_flush_offline_percpu(struct mem_cgroup *memcg) for (i = 0; i < NR_VM_EVENT_ITEMS; i++) { x = 0; - for_each_possible_cpu(cpu) + for_each_cpu(cpu, cpumask) x += per_cpu(vmstats_percpu->events[i], cpu); if (x) atomic_long_add(x, &memcg->vmevents[i]); @@ -2171,8 +2172,6 @@ static void memcg_flush_offline_percpu(struct mem_cgroup *memcg) static int memcg_hotplug_cpu_dead(unsigned int cpu) { - struct memcg_vmstats_percpu __percpu *vmstats_percpu; - struct lruvec_stat __percpu *lruvec_stat_cpu; struct memcg_stock_pcp *stock; struct mem_cgroup *memcg; @@ -2180,50 +2179,8 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu) drain_stock(stock); rcu_read_lock(); - for_each_mem_cgroup(memcg) { - int i; - - vmstats_percpu = (struct memcg_vmstats_percpu __percpu *) - rcu_dereference(memcg->vmstats_percpu); - - for (i = 0; i < MEMCG_NR_STAT; i++) { - int nid; - long x; - - if (vmstats_percpu) { - x = this_cpu_xchg(vmstats_percpu->stat[i], 0); - if (x) - atomic_long_add(x, &memcg->vmstats[i]); - } - - if (i >= NR_VM_NODE_STAT_ITEMS) - continue; - - for_each_node(nid) { - struct mem_cgroup_per_node *pn; - - pn = mem_cgroup_nodeinfo(memcg, nid); - - lruvec_stat_cpu = (struct lruvec_stat __percpu*) - rcu_dereference(pn->lruvec_stat_cpu); - if (!lruvec_stat_cpu) - continue; - x = this_cpu_xchg(lruvec_stat_cpu->count[i], 0); - if (x) - atomic_long_add(x, &pn->lruvec_stat[i]); - } - } - - for (i = 0; i < NR_VM_EVENT_ITEMS; i++) { - long x; - - if (vmstats_percpu) { - x = this_cpu_xchg(vmstats_percpu->events[i], 0); - if (x) - atomic_long_add(x, &memcg->vmevents[i]); - } - } - } + for_each_mem_cgroup(memcg) + memcg_flush_offline_percpu(memcg, get_cpu_mask(cpu)); rcu_read_unlock(); return 0; @@ -4668,7 +4625,7 @@ static void percpu_rcu_free(struct rcu_head *rcu) struct mem_cgroup *memcg = container_of(rcu, struct mem_cgroup, rcu); int node; - memcg_flush_offline_percpu(memcg); + memcg_flush_offline_percpu(memcg, cpu_possible_mask); for_each_node(node) { struct mem_cgroup_per_node *pn = memcg->nodeinfo[node]; -- 2.20.1