On Thu, May 28, 2020 at 04:25:06PM -0700, Roman Gushchin wrote: > Percpu memory can represent a noticeable chunk of the total > memory consumption, especially on big machines with many CPUs. > Let's track percpu memory usage for each memcg and display > it in memory.stat. > > A percpu allocation is usually scattered over multiple pages > (and nodes), and can be significantly smaller than a page. > So let's add a byte-sized counter on the memcg level: > MEMCG_PERCPU_B. Byte-sized vmstat infra created for slabs > can be perfectly reused for percpu case. > > Signed-off-by: Roman Gushchin <guro@xxxxxx> > --- > Documentation/admin-guide/cgroup-v2.rst | 4 ++++ > include/linux/memcontrol.h | 8 ++++++++ > mm/memcontrol.c | 4 +++- > mm/percpu.c | 10 ++++++++++ > 4 files changed, 25 insertions(+), 1 deletion(-) > > diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst > index fed4e1d2a343..aa8cb6dadadc 100644 > --- a/Documentation/admin-guide/cgroup-v2.rst > +++ b/Documentation/admin-guide/cgroup-v2.rst > @@ -1276,6 +1276,10 @@ PAGE_SIZE multiple when read back. > Amount of memory used for storing in-kernel data > structures. > > + percpu > + Amount of memory used for storing per-cpu kernel > + data structures. > + > sock > Amount of memory used in network transmission buffers > > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h > index 7a84d9164449..f62a95d472f7 100644 > --- a/include/linux/memcontrol.h > +++ b/include/linux/memcontrol.h > @@ -32,11 +32,19 @@ struct kmem_cache; > enum memcg_stat_item { > MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS, > MEMCG_SOCK, > + MEMCG_PERCPU_B, > /* XXX: why are these zone and not node counters? */ > MEMCG_KERNEL_STACK_KB, > MEMCG_NR_STAT, > }; > > +static __always_inline bool memcg_stat_item_in_bytes(enum memcg_stat_item item) > +{ > + if (item == MEMCG_PERCPU_B) > + return true; > + return vmstat_item_in_bytes(item); > +} > + > enum memcg_memory_event { > MEMCG_LOW, > MEMCG_HIGH, > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index 7bc3fd196210..5007d1585a4a 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -783,7 +783,7 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val) > if (mem_cgroup_disabled()) > return; > > - if (vmstat_item_in_bytes(idx)) > + if (memcg_stat_item_in_bytes(idx)) > threshold <<= PAGE_SHIFT; > > x = val + __this_cpu_read(memcg->vmstats_percpu->stat[idx]); > @@ -1490,6 +1490,8 @@ static char *memory_stat_format(struct mem_cgroup *memcg) > seq_buf_printf(&s, "slab %llu\n", > (u64)(memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B) + > memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE_B))); > + seq_buf_printf(&s, "percpu %llu\n", > + (u64)memcg_page_state(memcg, MEMCG_PERCPU_B)); > seq_buf_printf(&s, "sock %llu\n", > (u64)memcg_page_state(memcg, MEMCG_SOCK) * > PAGE_SIZE); > diff --git a/mm/percpu.c b/mm/percpu.c > index 85f5755c9114..b4b3e9c8a6d1 100644 > --- a/mm/percpu.c > +++ b/mm/percpu.c > @@ -1608,6 +1608,11 @@ static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg, > > if (chunk) { > chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = objcg; > + > + rcu_read_lock(); > + mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B, > + size * num_possible_cpus()); > + rcu_read_unlock(); > } else { > obj_cgroup_uncharge(objcg, size * num_possible_cpus()); > obj_cgroup_put(objcg); > @@ -1626,6 +1631,11 @@ static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size) > > obj_cgroup_uncharge(objcg, size * num_possible_cpus()); > > + rcu_read_lock(); > + mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B, > + -(size * num_possible_cpus())); > + rcu_read_unlock(); > + > obj_cgroup_put(objcg); > } > > -- > 2.25.4 > Acked-by: Dennis Zhou <dennis@xxxxxxxxxx> Thanks, Dennis