At the moment, the amount of memory allocated for stats related structs in the mem_cgroup corresponds to the size of enum node_stat_item. However not all fields in enum node_stat_item has corresponding memcg stats. The fields of enum node_stat_item is sorted in such a way that all the fields with corresponding memcg stats are at the start of the enum node_stat_item. So, let's just make an explicit boundary within enum node_stat_item and use that boundary to allocate memory for stats related structs of memcgs. For a given x86_64 config, the size of stats with and without patch is: structs size in bytes w/o with struct lruvec_stats 1128 648 struct lruvec_stats_percpu 752 432 struct memcg_vmstats 1832 1352 struct memcg_vmstats_percpu 1280 960 The memory savings is further compounded by the fact that these structs are allocated for each cpu and for node. To be precise, for each memcg, the memory saved would be: Memory saved = ((21 * 3 * NR_NODES) + (21 * 2 * NR_NODS * NR_CPUS) + (21 * 3) + (21 * 2 * NR_CPUS)) * sizeof(long) Where 21 is the number of fields eliminated. Signed-off-by: Shakeel Butt <shakeel.butt@xxxxxxxxx> --- include/linux/memcontrol.h | 12 ++++++------ include/linux/mmzone.h | 8 ++++++-- mm/memcontrol.c | 5 ++++- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 9aba0d0462ca..d68db7a0e829 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -32,7 +32,7 @@ struct kmem_cache; /* Cgroup-specific page state, on top of universal node page state */ enum memcg_stat_item { - MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS, + MEMCG_SWAP = NR_VM_NODE_MEMCG_STAT_ITEMS, MEMCG_SOCK, MEMCG_PERCPU_B, MEMCG_VMALLOC, @@ -92,21 +92,21 @@ struct mem_cgroup_reclaim_iter { struct lruvec_stats_percpu { /* Local (CPU and cgroup) state */ - long state[NR_VM_NODE_STAT_ITEMS]; + long state[NR_VM_NODE_MEMCG_STAT_ITEMS]; /* Delta calculation for lockless upward propagation */ - long state_prev[NR_VM_NODE_STAT_ITEMS]; + long state_prev[NR_VM_NODE_MEMCG_STAT_ITEMS]; }; struct lruvec_stats { /* Aggregated (CPU and subtree) state */ - long state[NR_VM_NODE_STAT_ITEMS]; + long state[NR_VM_NODE_MEMCG_STAT_ITEMS]; /* Non-hierarchical (CPU aggregated) state */ - long state_local[NR_VM_NODE_STAT_ITEMS]; + long state_local[NR_VM_NODE_MEMCG_STAT_ITEMS]; /* Pending child counts during tree propagation */ - long state_pending[NR_VM_NODE_STAT_ITEMS]; + long state_pending[NR_VM_NODE_MEMCG_STAT_ITEMS]; }; /* diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 989ca97402c6..59592f3c7d9b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -192,8 +192,12 @@ enum node_stat_item { NR_SHMEM_THPS, NR_FILE_THPS, NR_ANON_THPS, - /* No memcg stats for the following fields. */ - NR_SHMEM_PMDMAPPED, + /* + * No memcg stats for the following fields. Please add stats which have + * memcg counterpart above NR_VM_NODE_MEMCG_STAT_ITEMS. + */ + NR_VM_NODE_MEMCG_STAT_ITEMS, + NR_SHMEM_PMDMAPPED = NR_VM_NODE_MEMCG_STAT_ITEMS, NR_FILE_PMDMAPPED, NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ NR_VMSCAN_WRITE, diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 833d09c1d523..bb1bbf417a46 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1648,6 +1648,9 @@ static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) { int i; + /* Reduce by 1 for MEMCG_SWAP as that is not exposed in v2. */ + BUILD_BUG_ON(ARRAY_SIZE(memory_stats) != MEMCG_NR_STAT - 1); + /* * Provide statistics on the state of the memory subsystem as * well as cumulative event counters that show past behavior. @@ -5869,7 +5872,7 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu) lstatc = per_cpu_ptr(pn->lruvec_stats_percpu, cpu); - for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { + for (i = 0; i < NR_VM_NODE_MEMCG_STAT_ITEMS; i++) { delta = pn->lruvec_stats.state_pending[i]; if (delta) pn->lruvec_stats.state_pending[i] = 0; -- 2.43.0