The patch titled Subject: memcg: factor out mem_cgroup_stat_aggregate() has been added to the -mm mm-unstable branch. Its filename is memcg-factor-out-mem_cgroup_stat_aggregate.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/memcg-factor-out-mem_cgroup_stat_aggregate.patch This patch will later appear in the mm-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Xiu Jianfeng <xiujianfeng@xxxxxxxxxxxxxxx> Subject: memcg: factor out mem_cgroup_stat_aggregate() Date: Sat, 26 Oct 2024 09:34:07 +0000 Currently mem_cgroup_css_rstat_flush() is used to flush the per-CPU statistics from a specified CPU into the global statistics of the memcg. It processes three kinds of data in three for loops using exactly the same method. Therefore, the for loop can be factored out and may make the code more clean. Link: https://lkml.kernel.org/r/20241026093407.310955-1-xiujianfeng@xxxxxxxxxxxxxxx Signed-off-by: Xiu Jianfeng <xiujianfeng@xxxxxxxxxx> Cc: Johannes Weiner <hannes@xxxxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxxxx> Cc: Muchun Song <muchun.song@xxxxxxxxx> Cc: Roman Gushchin <roman.gushchin@xxxxxxxxx> Cc: Shakeel Butt <shakeel.butt@xxxxxxxxx> Cc: Wang Weiyang <wangweiyang2@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- mm/memcontrol.c | 131 ++++++++++++++++++++++++---------------------- 1 file changed, 71 insertions(+), 60 deletions(-) --- a/mm/memcontrol.c~memcg-factor-out-mem_cgroup_stat_aggregate +++ a/mm/memcontrol.c @@ -3728,68 +3728,90 @@ static void mem_cgroup_css_reset(struct memcg_wb_domain_size_changed(memcg); } -static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu) +struct aggregate_control { + /* pointer to the aggregated (CPU and subtree aggregated) counters */ + long *aggregate; + /* pointer to the non-hierarchichal (CPU aggregated) counters */ + long *local; + /* pointer to the pending child counters during tree propagation */ + long *pending; + /* pointer to the parent's pending counters, could be NULL */ + long *ppending; + /* pointer to the percpu counters to be aggregated */ + long *cstat; + /* pointer to the percpu counters of the last aggregation*/ + long *cstat_prev; + /* size of the above counters */ + int size; +}; + +static void mem_cgroup_stat_aggregate(struct aggregate_control *ac) { - struct mem_cgroup *memcg = mem_cgroup_from_css(css); - struct mem_cgroup *parent = parent_mem_cgroup(memcg); - struct memcg_vmstats_percpu *statc; + int i; long delta, delta_cpu, v; - int i, nid; - - statc = per_cpu_ptr(memcg->vmstats_percpu, cpu); - for (i = 0; i < MEMCG_VMSTAT_SIZE; i++) { + for (i = 0; i < ac->size; i++) { /* * Collect the aggregated propagation counts of groups * below us. We're in a per-cpu loop here and this is * a global counter, so the first cycle will get them. */ - delta = memcg->vmstats->state_pending[i]; + delta = ac->pending[i]; if (delta) - memcg->vmstats->state_pending[i] = 0; + ac->pending[i] = 0; /* Add CPU changes on this level since the last flush */ delta_cpu = 0; - v = READ_ONCE(statc->state[i]); - if (v != statc->state_prev[i]) { - delta_cpu = v - statc->state_prev[i]; + v = READ_ONCE(ac->cstat[i]); + if (v != ac->cstat_prev[i]) { + delta_cpu = v - ac->cstat_prev[i]; delta += delta_cpu; - statc->state_prev[i] = v; + ac->cstat_prev[i] = v; } /* Aggregate counts on this level and propagate upwards */ if (delta_cpu) - memcg->vmstats->state_local[i] += delta_cpu; + ac->local[i] += delta_cpu; if (delta) { - memcg->vmstats->state[i] += delta; - if (parent) - parent->vmstats->state_pending[i] += delta; + ac->aggregate[i] += delta; + if (ac->ppending) + ac->ppending[i] += delta; } } +} - for (i = 0; i < NR_MEMCG_EVENTS; i++) { - delta = memcg->vmstats->events_pending[i]; - if (delta) - memcg->vmstats->events_pending[i] = 0; - - delta_cpu = 0; - v = READ_ONCE(statc->events[i]); - if (v != statc->events_prev[i]) { - delta_cpu = v - statc->events_prev[i]; - delta += delta_cpu; - statc->events_prev[i] = v; - } +static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(css); + struct mem_cgroup *parent = parent_mem_cgroup(memcg); + struct memcg_vmstats_percpu *statc; + struct aggregate_control ac; + int nid; - if (delta_cpu) - memcg->vmstats->events_local[i] += delta_cpu; + statc = per_cpu_ptr(memcg->vmstats_percpu, cpu); - if (delta) { - memcg->vmstats->events[i] += delta; - if (parent) - parent->vmstats->events_pending[i] += delta; - } - } + ac = (struct aggregate_control) { + .aggregate = memcg->vmstats->state, + .local = memcg->vmstats->state_local, + .pending = memcg->vmstats->state_pending, + .ppending = parent ? parent->vmstats->state_pending : NULL, + .cstat = statc->state, + .cstat_prev = statc->state_prev, + .size = MEMCG_VMSTAT_SIZE, + }; + mem_cgroup_stat_aggregate(&ac); + + ac = (struct aggregate_control) { + .aggregate = memcg->vmstats->events, + .local = memcg->vmstats->events_local, + .pending = memcg->vmstats->events_pending, + .ppending = parent ? parent->vmstats->events_pending : NULL, + .cstat = statc->events, + .cstat_prev = statc->events_prev, + .size = NR_MEMCG_EVENTS, + }; + mem_cgroup_stat_aggregate(&ac); for_each_node_state(nid, N_MEMORY) { struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid]; @@ -3802,28 +3824,17 @@ static void mem_cgroup_css_rstat_flush(s lstatc = per_cpu_ptr(pn->lruvec_stats_percpu, cpu); - for (i = 0; i < NR_MEMCG_NODE_STAT_ITEMS; i++) { - delta = lstats->state_pending[i]; - if (delta) - lstats->state_pending[i] = 0; - - delta_cpu = 0; - v = READ_ONCE(lstatc->state[i]); - if (v != lstatc->state_prev[i]) { - delta_cpu = v - lstatc->state_prev[i]; - delta += delta_cpu; - lstatc->state_prev[i] = v; - } - - if (delta_cpu) - lstats->state_local[i] += delta_cpu; - - if (delta) { - lstats->state[i] += delta; - if (plstats) - plstats->state_pending[i] += delta; - } - } + ac = (struct aggregate_control) { + .aggregate = lstats->state, + .local = lstats->state_local, + .pending = lstats->state_pending, + .ppending = plstats ? plstats->state_pending : NULL, + .cstat = lstatc->state, + .cstat_prev = lstatc->state_prev, + .size = NR_MEMCG_NODE_STAT_ITEMS, + }; + mem_cgroup_stat_aggregate(&ac); + } WRITE_ONCE(statc->stats_updates, 0); /* We are in a per-cpu loop here, only do the atomic write once */ _ Patches currently in -mm which might be from xiujianfeng@xxxxxxxxxxxxxxx are memcg-factor-out-mem_cgroup_stat_aggregate.patch