If the kmem_cache refcount is greater than one, we should not mark the root kmem_cache as dying. If we mark the root kmem_cache dying incorrectly, the non-root kmem_cache can never be destroyed. It resulted in memory leak when memcg was destroyed. We can use the following steps to reproduce. 1) Use kmem_cache_create() to create a new kmem_cache named A. 2) Coincidentally, the kmem_cache A is an alias for kmem_cache B, so the refcount of B is just increased. 3) Use kmem_cache_destroy() to destroy the kmem_cache A, just decrease the B's refcount but mark the B as dying. 4) Create a new memory cgroup and alloc memory from the kmem_cache A. It leads to create a non-root kmem_cache for allocating. 5) When destroy the memory cgroup created in the step 4), the non-root kmem_cache can never be destroyed. If we repeat steps 4) and 5), this will cause a lot of memory leak. So only when refcount reach zero, we mark the root kmem_cache as dying. Fixes: 92ee383f6daa ("mm: fix race between kmem_cache destroy, create and deactivate") Signed-off-by: Muchun Song <songmuchun@xxxxxxxxxxxxx> --- mm/slab_common.c | 43 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 8c1ffbf7de45..83ee6211aec7 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -258,6 +258,11 @@ static void memcg_unlink_cache(struct kmem_cache *s) list_del(&s->memcg_params.kmem_caches_node); } } + +static inline bool memcg_kmem_cache_dying(struct kmem_cache *s) +{ + return is_root_cache(s) && s->memcg_params.dying; +} #else static inline int init_memcg_params(struct kmem_cache *s, struct kmem_cache *root_cache) @@ -272,6 +277,11 @@ static inline void destroy_memcg_params(struct kmem_cache *s) static inline void memcg_unlink_cache(struct kmem_cache *s) { } + +static inline bool memcg_kmem_cache_dying(struct kmem_cache *s) +{ + return false; +} #endif /* CONFIG_MEMCG_KMEM */ /* @@ -326,6 +336,13 @@ int slab_unmergeable(struct kmem_cache *s) if (s->refcount < 0) return 1; + /* + * If the kmem_cache is dying. We should also skip this + * kmem_cache. + */ + if (memcg_kmem_cache_dying(s)) + return 1; + return 0; } @@ -944,8 +961,6 @@ void kmem_cache_destroy(struct kmem_cache *s) if (unlikely(!s)) return; - flush_memcg_workqueue(s); - get_online_cpus(); get_online_mems(); @@ -955,6 +970,30 @@ void kmem_cache_destroy(struct kmem_cache *s) if (s->refcount) goto out_unlock; +#ifdef CONFIG_MEMCG_KMEM + mutex_unlock(&slab_mutex); + + put_online_mems(); + put_online_cpus(); + + flush_memcg_workqueue(s); + + get_online_cpus(); + get_online_mems(); + + mutex_lock(&slab_mutex); + + if (WARN(s->refcount, + "kmem_cache_destroy %s: Slab cache is still referenced\n", + s->name)) { + /* + * Reset the dying flag setted by flush_memcg_workqueue(). + */ + s->memcg_params.dying = false; + goto out_unlock; + } +#endif + err = shutdown_memcg_caches(s); if (!err) err = shutdown_cache(s); -- 2.11.0