Today we account each page individually to the memcg, which works well enough, if a little inefficiently (N atomic operations per page instead of N per allocation). Unfortunately, the stats can get out of sync when i915 calls vmap() with VM_MAP_PUT_PAGES. The pages being passed were not allocated by vmalloc, so the MEMCG_VMALLOC counter was never incremented. But it is decremented when the pages are freed with vfree(). Solve all of this by tracking the memcg at the vm_struct level. This logic has to live in the memcontrol file as it calls several functions which are currently static. Fixes: b944afc9d64d (mm: add a VM_MAP_PUT_PAGES flag for vmap) Cc: stable@xxxxxxxxxxxxxxx Signed-off-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx> --- include/linux/memcontrol.h | 7 ++++++ include/linux/vmalloc.h | 3 +++ mm/memcontrol.c | 46 ++++++++++++++++++++++++++++++++++++++ mm/vmalloc.c | 14 ++++++------ 4 files changed, 63 insertions(+), 7 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5502aa8e138e..83ebcadebba6 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1676,6 +1676,10 @@ static inline struct obj_cgroup *get_obj_cgroup_from_current(void) int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size); void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size); +int obj_cgroup_charge_vmalloc(struct obj_cgroup **objcgp, + unsigned int nr_pages, gfp_t gfp); +void obj_cgroup_uncharge_vmalloc(struct obj_cgroup *objcgp, + unsigned int nr_pages); extern struct static_key_false memcg_bpf_enabled_key; static inline bool memcg_bpf_enabled(void) @@ -1756,6 +1760,9 @@ static inline void __memcg_kmem_uncharge_page(struct page *page, int order) { } +/* Must be macros to avoid dereferencing objcg in vm_struct */ +#define obj_cgroup_charge_vmalloc(objcgp, nr_pages, gfp) 0 +#define obj_cgroup_uncharge_vmalloc(objcg, nr_pages) do { } while (0) static inline struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio) { return NULL; diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 31e9ffd936e3..ec7c2d607382 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -60,6 +60,9 @@ struct vm_struct { #endif unsigned int nr_pages; phys_addr_t phys_addr; +#ifdef CONFIG_MEMCG + struct obj_cgroup *objcg; +#endif const void *caller; }; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7b3503d12aaf..629bffc3e26d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5472,4 +5472,50 @@ static int __init mem_cgroup_swap_init(void) } subsys_initcall(mem_cgroup_swap_init); +/** + * obj_cgroup_charge_vmalloc - Charge vmalloc memory + * @objcgp: Pointer to an object cgroup + * @nr_pages: Number of pages + * @gfp: Memory allocation flags + * + * Return: 0 on success, negative errno on failure. + */ +int obj_cgroup_charge_vmalloc(struct obj_cgroup **objcgp, + unsigned int nr_pages, gfp_t gfp) +{ + struct obj_cgroup *objcg; + int err; + + if (mem_cgroup_disabled() || !(gfp & __GFP_ACCOUNT)) + return 0; + + objcg = current_obj_cgroup(); + if (!objcg) + return 0; + + err = obj_cgroup_charge_pages(objcg, gfp, nr_pages); + if (err) + return err; + obj_cgroup_get(objcg); + mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_VMALLOC, nr_pages); + *objcgp = objcg; + + return 0; +} + +/** + * obj_cgroup_uncharge_vmalloc - Uncharge vmalloc memory + * @objcg: The object cgroup + * @nr_pages: Number of pages + */ +void obj_cgroup_uncharge_vmalloc(struct obj_cgroup *objcg, + unsigned int nr_pages) +{ + if (!objcg) + return; + mod_memcg_state(objcg->memcg, MEMCG_VMALLOC, 0L - nr_pages); + obj_cgroup_uncharge_pages(objcg, nr_pages); + obj_cgroup_put(objcg); +} + #endif /* CONFIG_SWAP */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index f009b21705c1..438995d2f9f8 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3374,7 +3374,6 @@ void vfree(const void *addr) struct page *page = vm->pages[i]; BUG_ON(!page); - mod_memcg_page_state(page, MEMCG_VMALLOC, -1); /* * High-order allocs for huge vmallocs are split, so * can be freed as an array of order-0 allocations @@ -3383,6 +3382,7 @@ void vfree(const void *addr) cond_resched(); } atomic_long_sub(vm->nr_pages, &nr_vmalloc_pages); + obj_cgroup_uncharge_vmalloc(vm->objcg, vm->nr_pages); kvfree(vm->pages); kfree(vm); } @@ -3536,6 +3536,9 @@ vm_area_alloc_pages(gfp_t gfp, int nid, struct page *page; int i; + /* Accounting handled in caller */ + gfp &= ~__GFP_ACCOUNT; + /* * For order-0 pages we make use of bulk allocator, if * the page array is partly or not at all populated due @@ -3669,12 +3672,9 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, node, page_order, nr_small_pages, area->pages); atomic_long_add(area->nr_pages, &nr_vmalloc_pages); - if (gfp_mask & __GFP_ACCOUNT) { - int i; - - for (i = 0; i < area->nr_pages; i++) - mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC, 1); - } + ret = obj_cgroup_charge_vmalloc(&area->objcg, gfp_mask, area->nr_pages); + if (ret) + goto fail; /* * If not enough pages were obtained to accomplish an -- 2.45.2