Switch to per-object accounting of non-root slab objects. Charging is performed using subpage charging API in pre_alloc hook. If the amount of memory has been charged successfully, we proceed with the actual allocation. Otherwise, -ENOMEM is returned. In post_alloc hook we do check if the actual allocation succeeded. If so, corresponding vmstats are bumped and memcg membership information is recorded. Otherwise, the charge is canceled. On free path we do look for memcg membership information, decrement stats and do uncharge. No operations are performed with root kmem_caches. Global per-node slab-related vmstats NR_SLAB_(UN)RECLAIMABLE_B are still modified from (un)charge_slab_page() functions. The idea is to keep all slab pages accounted as slab pages on system level. Memcg and lruvec counters are now representing only memory used by actual slab objects and do not include free space. Free space is shared and doesn't belong to any specific cgroup. Signed-off-by: Roman Gushchin <guro@xxxxxx> --- mm/slab.h | 152 ++++++++++++++++++++---------------------------------- 1 file changed, 57 insertions(+), 95 deletions(-) diff --git a/mm/slab.h b/mm/slab.h index 28feabed1e9a..0f2f712de77a 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -340,72 +340,6 @@ static inline struct mem_cgroup *memcg_from_slab_page(struct page *page) return NULL; } -/* - * Charge the slab page belonging to the non-root kmem_cache. - * Can be called for non-root kmem_caches only. - */ -static __always_inline int memcg_charge_slab(struct page *page, - gfp_t gfp, int order, - struct kmem_cache *s) -{ - struct mem_cgroup *memcg; - struct lruvec *lruvec; - int ret; - - rcu_read_lock(); - memcg = READ_ONCE(s->memcg_params.memcg); - while (memcg && !css_tryget_online(&memcg->css)) - memcg = parent_mem_cgroup(memcg); - rcu_read_unlock(); - - if (unlikely(!memcg || mem_cgroup_is_root(memcg))) { - mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s), - (PAGE_SIZE << order)); - percpu_ref_get_many(&s->memcg_params.refcnt, 1 << order); - return 0; - } - - ret = memcg_kmem_charge_memcg(page, gfp, order, memcg); - if (ret) - goto out; - - lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg); - mod_lruvec_state(lruvec, cache_vmstat_idx(s), PAGE_SIZE << order); - - /* transer try_charge() page references to kmem_cache */ - percpu_ref_get_many(&s->memcg_params.refcnt, 1 << order); - css_put_many(&memcg->css, 1 << order); -out: - css_put(&memcg->css); - return ret; -} - -/* - * Uncharge a slab page belonging to a non-root kmem_cache. - * Can be called for non-root kmem_caches only. - */ -static __always_inline void memcg_uncharge_slab(struct page *page, int order, - struct kmem_cache *s) -{ - struct mem_cgroup *memcg; - struct lruvec *lruvec; - - rcu_read_lock(); - memcg = READ_ONCE(s->memcg_params.memcg); - if (likely(!mem_cgroup_is_root(memcg))) { - lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg); - mod_lruvec_state(lruvec, cache_vmstat_idx(s), - -(PAGE_SIZE << order)); - memcg_kmem_uncharge_memcg(page, order, memcg); - } else { - mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s), - -(PAGE_SIZE << order)); - } - rcu_read_unlock(); - - percpu_ref_put_many(&s->memcg_params.refcnt, 1 << order); -} - static inline int memcg_alloc_page_memcg_vec(struct page *page, gfp_t gfp, unsigned int objects) { @@ -423,11 +357,31 @@ static inline void memcg_free_page_memcg_vec(struct page *page) page->mem_cgroup_vec = NULL; } +static inline struct kmem_cache *memcg_slab_pre_alloc_hook(struct kmem_cache *s, + struct mem_cgroup **memcgp, + size_t size, gfp_t flags) +{ + struct kmem_cache *cachep; + + cachep = memcg_kmem_get_cache(s, memcgp); + if (is_root_cache(cachep)) + return s; + + if (__memcg_kmem_charge_subpage(*memcgp, size * s->size, flags)) { + mem_cgroup_put(*memcgp); + memcg_kmem_put_cache(cachep); + cachep = NULL; + } + + return cachep; +} + static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, struct mem_cgroup *memcg, size_t size, void **p) { struct mem_cgroup_ptr *memcg_ptr; + struct lruvec *lruvec; struct page *page; unsigned long off; size_t i; @@ -439,6 +393,11 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, off = obj_to_index(s, page, p[i]); mem_cgroup_ptr_get(memcg_ptr); page->mem_cgroup_vec[off] = memcg_ptr; + lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg); + mod_lruvec_memcg_state(lruvec, cache_vmstat_idx(s), + s->size); + } else { + __memcg_kmem_uncharge_subpage(memcg, s->size); } } mem_cgroup_ptr_put(memcg_ptr); @@ -451,6 +410,8 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s, struct page *page, void *p) { struct mem_cgroup_ptr *memcg_ptr; + struct mem_cgroup *memcg; + struct lruvec *lruvec; unsigned int off; if (!memcg_kmem_enabled() || is_root_cache(s)) @@ -459,6 +420,14 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s, struct page *page, off = obj_to_index(s, page, p); memcg_ptr = page->mem_cgroup_vec[off]; page->mem_cgroup_vec[off] = NULL; + rcu_read_lock(); + memcg = memcg_ptr->memcg; + if (likely(!mem_cgroup_is_root(memcg))) { + __memcg_kmem_uncharge_subpage(memcg, s->size); + lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg); + mod_lruvec_memcg_state(lruvec, cache_vmstat_idx(s), -s->size); + } + rcu_read_unlock(); mem_cgroup_ptr_put(memcg_ptr); } @@ -500,17 +469,6 @@ static inline struct mem_cgroup *memcg_from_slab_page(struct page *page) return NULL; } -static inline int memcg_charge_slab(struct page *page, gfp_t gfp, int order, - struct kmem_cache *s) -{ - return 0; -} - -static inline void memcg_uncharge_slab(struct page *page, int order, - struct kmem_cache *s) -{ -} - static inline int memcg_alloc_page_memcg_vec(struct page *page, gfp_t gfp, unsigned int objects) { @@ -521,6 +479,13 @@ static inline void memcg_free_page_memcg_vec(struct page *page) { } +static inline struct kmem_cache *memcg_slab_pre_alloc_hook(struct kmem_cache *s, + struct mem_cgroup **memcgp, + size_t size, gfp_t flags) +{ + return NULL; +} + static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, struct mem_cgroup *memcg, size_t size, void **p) @@ -561,30 +526,27 @@ static __always_inline int charge_slab_page(struct page *page, { int ret; - if (is_root_cache(s)) { - mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s), - PAGE_SIZE << order); - return 0; - } - - ret = memcg_alloc_page_memcg_vec(page, gfp, objects); - if (ret) - return ret; + if (!is_root_cache(s)) { + ret = memcg_alloc_page_memcg_vec(page, gfp, objects); + if (ret) + return ret; - return memcg_charge_slab(page, gfp, order, s); + percpu_ref_get_many(&s->memcg_params.refcnt, 1 << order); + } + mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s), + PAGE_SIZE << order); + return 0; } static __always_inline void uncharge_slab_page(struct page *page, int order, struct kmem_cache *s) { - if (is_root_cache(s)) { - mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s), - -(PAGE_SIZE << order)); - return; + if (!is_root_cache(s)) { + memcg_free_page_memcg_vec(page); + percpu_ref_put_many(&s->memcg_params.refcnt, 1 << order); } - - memcg_free_page_memcg_vec(page); - memcg_uncharge_slab(page, order, s); + mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s), + -(PAGE_SIZE << order)); } static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x) @@ -656,7 +618,7 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, if (memcg_kmem_enabled() && ((flags & __GFP_ACCOUNT) || (s->flags & SLAB_ACCOUNT))) - return memcg_kmem_get_cache(s, memcgp); + return memcg_slab_pre_alloc_hook(s, memcgp, size, flags); return s; } -- 2.21.0