From: Shakeel Butt <shakeelb@xxxxxxxxxx> Subject: memcg: add per-memcg vmalloc stat The kvmalloc* allocation functions can fallback to vmalloc allocations and more often on long running machines. In addition the kernel does have __GFP_ACCOUNT kvmalloc* calls. So, often on long running machines, the memory.stat does not tell the complete picture which type of memory is charged to the memcg. So add a per-memcg vmalloc stat. [shakeelb@xxxxxxxxxx: page_memcg() within rcu lock, per Muchun] Link: https://lkml.kernel.org/r/20211222052457.1960701-1-shakeelb@xxxxxxxxxx [akpm@xxxxxxxxxxxxxxxxxxxx: remove cast, per Muchun] [shakeelb@xxxxxxxxxx: remove area->page[0] checks and move to page by page accounting per Michal] Link: https://lkml.kernel.org/r/20220104222341.3972772-1-shakeelb@xxxxxxxxxx Link: https://lkml.kernel.org/r/20211221215336.1922823-1-shakeelb@xxxxxxxxxx Signed-off-by: Shakeel Butt <shakeelb@xxxxxxxxxx> Acked-by: Roman Gushchin <guro@xxxxxx> Reviewed-by: Muchun Song <songmuchun@xxxxxxxxxxxxx> Acked-by: Michal Hocko <mhocko@xxxxxxxx> Cc: Johannes Weiner <hannes@xxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- Documentation/admin-guide/cgroup-v2.rst | 3 +++ include/linux/memcontrol.h | 21 +++++++++++++++++++++ mm/memcontrol.c | 1 + mm/vmalloc.c | 13 +++++++++++-- 4 files changed, 36 insertions(+), 2 deletions(-) --- a/Documentation/admin-guide/cgroup-v2.rst~memcg-add-per-memcg-vmalloc-stat +++ a/Documentation/admin-guide/cgroup-v2.rst @@ -1314,6 +1314,9 @@ PAGE_SIZE multiple when read back. sock (npn) Amount of memory used in network transmission buffers + vmalloc (npn) + Amount of memory used for vmap backed memory. + shmem Amount of cached filesystem data that is swap-backed, such as tmpfs, shm segments, shared anonymous mmap()s --- a/include/linux/memcontrol.h~memcg-add-per-memcg-vmalloc-stat +++ a/include/linux/memcontrol.h @@ -33,6 +33,7 @@ enum memcg_stat_item { MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS, MEMCG_SOCK, MEMCG_PERCPU_B, + MEMCG_VMALLOC, MEMCG_NR_STAT, }; @@ -992,6 +993,21 @@ static inline void mod_memcg_state(struc local_irq_restore(flags); } +static inline void mod_memcg_page_state(struct page *page, + int idx, int val) +{ + struct mem_cgroup *memcg; + + if (mem_cgroup_disabled()) + return; + + rcu_read_lock(); + memcg = page_memcg(page); + if (memcg) + mod_memcg_state(memcg, idx, val); + rcu_read_unlock(); +} + static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) { return READ_ONCE(memcg->vmstats.state[idx]); @@ -1447,6 +1463,11 @@ static inline void mod_memcg_state(struc { } +static inline void mod_memcg_page_state(struct page *page, + int idx, int val) +{ +} + static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) { return 0; --- a/mm/memcontrol.c~memcg-add-per-memcg-vmalloc-stat +++ a/mm/memcontrol.c @@ -1375,6 +1375,7 @@ static const struct memory_stat memory_s { "pagetables", NR_PAGETABLE }, { "percpu", MEMCG_PERCPU_B }, { "sock", MEMCG_SOCK }, + { "vmalloc", MEMCG_VMALLOC }, { "shmem", NR_SHMEM }, { "file_mapped", NR_FILE_MAPPED }, { "file_dirty", NR_FILE_DIRTY }, --- a/mm/vmalloc.c~memcg-add-per-memcg-vmalloc-stat +++ a/mm/vmalloc.c @@ -31,6 +31,7 @@ #include <linux/kmemleak.h> #include <linux/atomic.h> #include <linux/compiler.h> +#include <linux/memcontrol.h> #include <linux/llist.h> #include <linux/bitops.h> #include <linux/rbtree_augmented.h> @@ -2623,12 +2624,13 @@ static void __vunmap(const void *addr, i if (deallocate_pages) { unsigned int page_order = vm_area_page_order(area); - int i; + int i, step = 1U << page_order; - for (i = 0; i < area->nr_pages; i += 1U << page_order) { + for (i = 0; i < area->nr_pages; i += step) { struct page *page = area->pages[i]; BUG_ON(!page); + mod_memcg_page_state(page, MEMCG_VMALLOC, -step); __free_pages(page, page_order); cond_resched(); } @@ -2955,6 +2957,13 @@ static void *__vmalloc_area_node(struct page_order, nr_small_pages, area->pages); atomic_long_add(area->nr_pages, &nr_vmalloc_pages); + if (gfp_mask & __GFP_ACCOUNT) { + int i, step = 1U << page_order; + + for (i = 0; i < area->nr_pages; i += step) + mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC, + step); + } /* * If not enough pages were obtained to accomplish an _