The patch titled Subject: mm: memcg/slab: stop setting page->mem_cgroup pointer for slab pages has been added to the -mm tree. Its filename is mm-stop-setting-page-mem_cgroup-pointer-for-slab-pages.patch This patch should soon appear at http://ozlabs.org/~akpm/mmots/broken-out/mm-stop-setting-page-mem_cgroup-pointer-for-slab-pages.patch and later at http://ozlabs.org/~akpm/mmotm/broken-out/mm-stop-setting-page-mem_cgroup-pointer-for-slab-pages.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: Roman Gushchin <guro@xxxxxx> Subject: mm: memcg/slab: stop setting page->mem_cgroup pointer for slab pages Every slab page charged to a non-root memory cgroup has a pointer to the memory cgroup and holds a reference to it, which protects a non-empty memory cgroup from being released. At the same time the page has a pointer to the corresponding kmem_cache, and also hold a reference to the kmem_cache. And kmem_cache by itself holds a reference to the cgroup. So there is clearly some redundancy, which allows to stop setting the page->mem_cgroup pointer and rely on getting memcg pointer indirectly via kmem_cache. Further it will allow to change this pointer easier, without a need to go over all charged pages. So let's stop setting page->mem_cgroup pointer for slab pages, and stop using the css refcounter directly for protecting the memory cgroup from going away. Instead rely on kmem_cache as an intermediate object. Make sure that vmstats and shrinker lists are working as previously, as well as /proc/kpagecgroup interface. Link: http://lkml.kernel.org/r/20190611231813.3148843-10-guro@xxxxxx Signed-off-by: Roman Gushchin <guro@xxxxxx> Acked-by: Vladimir Davydov <vdavydov.dev@xxxxxxxxx> Cc: Christoph Lameter <cl@xxxxxxxxx> Cc: Johannes Weiner <hannes@xxxxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxx> Cc: Shakeel Butt <shakeelb@xxxxxxxxxx> Cc: Waiman Long <longman@xxxxxxxxxx> Cc: David Rientjes <rientjes@xxxxxxxxxx> Cc: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx> Cc: Pekka Enberg <penberg@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- mm/list_lru.c | 3 + mm/memcontrol.c | 12 ++++--- mm/slab.h | 74 +++++++++++++++++++++++++++++++++++++--------- 3 files changed, 70 insertions(+), 19 deletions(-) --- a/mm/list_lru.c~mm-stop-setting-page-mem_cgroup-pointer-for-slab-pages +++ a/mm/list_lru.c @@ -12,6 +12,7 @@ #include <linux/slab.h> #include <linux/mutex.h> #include <linux/memcontrol.h> +#include "slab.h" #ifdef CONFIG_MEMCG_KMEM static LIST_HEAD(list_lrus); @@ -63,7 +64,7 @@ static __always_inline struct mem_cgroup if (!memcg_kmem_enabled()) return NULL; page = virt_to_head_page(ptr); - return page->mem_cgroup; + return memcg_from_slab_page(page); } static inline struct list_lru_one * --- a/mm/memcontrol.c~mm-stop-setting-page-mem_cgroup-pointer-for-slab-pages +++ a/mm/memcontrol.c @@ -486,7 +486,10 @@ ino_t page_cgroup_ino(struct page *page) unsigned long ino = 0; rcu_read_lock(); - memcg = READ_ONCE(page->mem_cgroup); + if (PageHead(page) && PageSlab(page)) + memcg = memcg_from_slab_page(page); + else + memcg = READ_ONCE(page->mem_cgroup); while (memcg && !(memcg->css.flags & CSS_ONLINE)) memcg = parent_mem_cgroup(memcg); if (memcg) @@ -2802,9 +2805,6 @@ int __memcg_kmem_charge_memcg(struct pag cancel_charge(memcg, nr_pages); return -ENOMEM; } - - page->mem_cgroup = memcg; - return 0; } @@ -2827,8 +2827,10 @@ int __memcg_kmem_charge(struct page *pag memcg = get_mem_cgroup_from_current(); if (!mem_cgroup_is_root(memcg)) { ret = __memcg_kmem_charge_memcg(page, gfp, order, memcg); - if (!ret) + if (!ret) { + page->mem_cgroup = memcg; __SetPageKmemcg(page); + } } css_put(&memcg->css); return ret; --- a/mm/slab.h~mm-stop-setting-page-mem_cgroup-pointer-for-slab-pages +++ a/mm/slab.h @@ -255,30 +255,67 @@ static inline struct kmem_cache *memcg_r return s->memcg_params.root_cache; } +/* + * Expects a pointer to a slab page. Please note, that PageSlab() check + * isn't sufficient, as it returns true also for tail compound slab pages, + * which do not have slab_cache pointer set. + * So this function assumes that the page can pass PageHead() and PageSlab() + * checks. + */ +static inline struct mem_cgroup *memcg_from_slab_page(struct page *page) +{ + struct kmem_cache *s; + + s = READ_ONCE(page->slab_cache); + if (s && !is_root_cache(s)) + return s->memcg_params.memcg; + + return NULL; +} + +/* + * Charge the slab page belonging to the non-root kmem_cache. + * Can be called for non-root kmem_caches only. + */ static __always_inline int memcg_charge_slab(struct page *page, gfp_t gfp, int order, struct kmem_cache *s) { + struct mem_cgroup *memcg; + struct lruvec *lruvec; int ret; - if (is_root_cache(s)) - return 0; - - ret = memcg_kmem_charge_memcg(page, gfp, order, s->memcg_params.memcg); + memcg = s->memcg_params.memcg; + ret = memcg_kmem_charge_memcg(page, gfp, order, memcg); if (ret) return ret; + lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg); + mod_lruvec_state(lruvec, cache_vmstat_idx(s), 1 << order); + + /* transer try_charge() page references to kmem_cache */ percpu_ref_get_many(&s->memcg_params.refcnt, 1 << order); + css_put_many(&memcg->css, 1 << order); return 0; } +/* + * Uncharge a slab page belonging to a non-root kmem_cache. + * Can be called for non-root kmem_caches only. + */ static __always_inline void memcg_uncharge_slab(struct page *page, int order, struct kmem_cache *s) { - if (!is_root_cache(s)) - percpu_ref_put_many(&s->memcg_params.refcnt, 1 << order); - memcg_kmem_uncharge(page, order); + struct mem_cgroup *memcg; + struct lruvec *lruvec; + + memcg = s->memcg_params.memcg; + lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg); + mod_lruvec_state(lruvec, cache_vmstat_idx(s), -(1 << order)); + memcg_kmem_uncharge_memcg(page, order, memcg); + + percpu_ref_put_many(&s->memcg_params.refcnt, 1 << order); } extern void slab_init_memcg_params(struct kmem_cache *); @@ -314,6 +351,11 @@ static inline struct kmem_cache *memcg_r return s; } +static inline struct mem_cgroup *memcg_from_slab_page(struct page *page) +{ + return NULL; +} + static inline int memcg_charge_slab(struct page *page, gfp_t gfp, int order, struct kmem_cache *s) { @@ -351,18 +393,24 @@ static __always_inline int charge_slab_p gfp_t gfp, int order, struct kmem_cache *s) { - int ret = memcg_charge_slab(page, gfp, order, s); - - if (!ret) - mod_lruvec_page_state(page, cache_vmstat_idx(s), 1 << order); + if (is_root_cache(s)) { + mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s), + 1 << order); + return 0; + } - return ret; + return memcg_charge_slab(page, gfp, order, s); } static __always_inline void uncharge_slab_page(struct page *page, int order, struct kmem_cache *s) { - mod_lruvec_page_state(page, cache_vmstat_idx(s), -(1 << order)); + if (is_root_cache(s)) { + mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s), + -(1 << order)); + return; + } + memcg_uncharge_slab(page, order, s); } _ Patches currently in -mm which might be from guro@xxxxxx are mm-postpone-kmem_cache-memcg-pointer-initialization-to-memcg_link_cache.patch mm-rename-slab-delayed-deactivation-functions-and-fields.patch mm-generalize-postponed-non-root-kmem_cache-deactivation.patch mm-introduce-__memcg_kmem_uncharge_memcg.patch mm-unify-slab-and-slub-page-accounting.patch mm-dont-check-the-dying-flag-on-kmem_cache-creation.patch mm-synchronize-access-to-kmem_cache-dying-flag-using-a-spinlock.patch mm-rework-non-root-kmem_cache-lifecycle-management.patch mm-stop-setting-page-mem_cgroup-pointer-for-slab-pages.patch mm-reparent-memcg-kmem_caches-on-cgroup-removal.patch