[PATCH -mm 03/12] memcg: fix root vs memcg cache destruction race

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



When a root cache is being destroyed and we are about to initiate
destruction of its child caches (see kmem_cache_destroy_memcg_children),
we should handle races with pending memcg cache destruction works
somehow. Currently, we simply cancel the memcg_params::destroy work
before calling kmem_cache_destroy for a memcg cache, but that's totally
wrong, because the work handler may destroy and free the cache resulting
in a use-after-free in cancel_work_sync. Furthermore, we do not handle
the case when memcg cache destruction is scheduled after we start to
destroy the root cache - that's possible, because nothing prevents
memcgs from going offline while we are destroying a root cache. In that
case we are likely to get a use-after-free in the work handler.

This patch resolves the race as follows:

1) It makes kmem_cache_destroy silently exit if it is called for a memcg
   cache while the corresponding root cache is being destroyed, leaving
   the destruction to kmem_cache_destroy_memcg_children. That makes call
   to cancel_work_sync safe if it is called from the root cache
   destruction path.

2) It moves cancel_work_sync to be called after we unregistered a child
   cache from its memcg. That prevents the work from being rescheduled
   from memcg offline path.

Signed-off-by: Vladimir Davydov <vdavydov@xxxxxxxxxxxxx>
Cc: Johannes Weiner <hannes@xxxxxxxxxxx>
Cc: Michal Hocko <mhocko@xxxxxxx>
Cc: Glauber Costa <glommer@xxxxxxxxx>
---
 include/linux/memcontrol.h |    2 +-
 include/linux/slab.h       |    1 +
 mm/memcontrol.c            |   22 ++-----------
 mm/slab_common.c           |   77 ++++++++++++++++++++++++++++++++++++--------
 4 files changed, 69 insertions(+), 33 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e54fb469a908..6128323ea453 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -512,7 +512,7 @@ void memcg_update_array_size(int num_groups);
 struct kmem_cache *
 __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
 
-int __kmem_cache_destroy_memcg_children(struct kmem_cache *s);
+int kmem_cache_destroy_memcg_children(struct kmem_cache *s);
 
 /**
  * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 3ed53de256ea..ee9f1b0382ac 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -117,6 +117,7 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
 			void (*)(void *));
 #ifdef CONFIG_MEMCG_KMEM
 void kmem_cache_create_memcg(struct mem_cgroup *, struct kmem_cache *);
+void kmem_cache_destroy_memcg(struct kmem_cache *, bool);
 #endif
 void kmem_cache_destroy(struct kmem_cache *);
 int kmem_cache_shrink(struct kmem_cache *);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 416da5dc3d2a..1b9634090454 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3345,10 +3345,10 @@ static void kmem_cache_destroy_work_func(struct work_struct *w)
 			return;
 	}
 
-	kmem_cache_destroy(cachep);
+	kmem_cache_destroy_memcg(cachep, false);
 }
 
-int __kmem_cache_destroy_memcg_children(struct kmem_cache *s)
+int kmem_cache_destroy_memcg_children(struct kmem_cache *s)
 {
 	struct kmem_cache *c;
 	int i, failed = 0;
@@ -3378,23 +3378,7 @@ int __kmem_cache_destroy_memcg_children(struct kmem_cache *s)
 		if (!c)
 			continue;
 
-		/*
-		 * We will now manually delete the caches, so to avoid races
-		 * we need to cancel all pending destruction workers and
-		 * proceed with destruction ourselves.
-		 *
-		 * kmem_cache_destroy() will call kmem_cache_shrink internally,
-		 * and that could spawn the workers again: it is likely that
-		 * the cache still have active pages until this very moment.
-		 * This would lead us back to memcg_release_pages().
-		 *
-		 * But that will not execute at all if the refcount is > 0, so
-		 * increment it to guarantee we are in control.
-		 */
-		atomic_inc(&c->memcg_params->refcount);
-		cancel_work_sync(&c->memcg_params->destroy);
-		kmem_cache_destroy(c);
-
+		kmem_cache_destroy_memcg(c, true);
 		if (cache_from_memcg_idx(s, i))
 			failed++;
 	}
diff --git a/mm/slab_common.c b/mm/slab_common.c
index f3cfccf76dda..05ba3cd1b507 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -302,28 +302,70 @@ out_unlock:
 	put_online_cpus();
 }
 
-static int kmem_cache_destroy_memcg_children(struct kmem_cache *s)
+static void __kmem_cache_destroy(struct kmem_cache *, bool);
+
+void kmem_cache_destroy_memcg(struct kmem_cache *s, bool destroying_root)
+{
+	BUG_ON(is_root_cache(s));
+	__kmem_cache_destroy(s, destroying_root);
+}
+
+static int __kmem_cache_shutdown_memcg(struct kmem_cache *s,
+				       bool destroying_root)
 {
-	int rc;
+	int rc = 0;
 
-	if (!s->memcg_params ||
-	    !s->memcg_params->is_root_cache)
+	if (!destroying_root) {
+		struct kmem_cache *root;
+
+		root = memcg_root_cache(s);
+		BUG_ON(root == s);
+		/*
+		 * If we are racing with the root cache destruction, let
+		 * kmem_cache_destroy_memcg_children() finish with this cache.
+		 */
+		if (!root->refcount) {
+			s->refcount++;
+			return 1;
+		}
+	}
+
+	if (!s->memcg_params)
 		return 0;
 
 	mutex_unlock(&slab_mutex);
-	rc = __kmem_cache_destroy_memcg_children(s);
+	if (s->memcg_params->is_root_cache) {
+		rc = kmem_cache_destroy_memcg_children(s);
+	} else {
+		/*
+		 * There might be a destruction work pending, which needs to be
+		 * cancelled before we start to destroy the cache.
+		 *
+		 * This should be done after we deleted all the references to
+		 * this cache, otherwise the work could be rescheduled.
+		 *
+		 * __kmem_cache_shutdown() will call kmem_cache_shrink()
+		 * internally, and that could spawn the worker again. We
+		 * increment the refcount to avoid that.
+		 */
+		if (destroying_root) {
+			cancel_work_sync(&s->memcg_params->destroy);
+			atomic_inc(&s->memcg_params->refcount);
+		}
+	}
 	mutex_lock(&slab_mutex);
 
 	return rc;
 }
 #else
-static int kmem_cache_destroy_memcg_children(struct kmem_cache *s)
+static int __kmem_cache_shutdown_memcg(struct kmem_cache *s,
+				       bool destroying_root)
 {
 	return 0;
 }
 #endif /* CONFIG_MEMCG_KMEM */
 
-void kmem_cache_destroy(struct kmem_cache *s)
+static void __kmem_cache_destroy(struct kmem_cache *s, bool destroying_root)
 {
 	get_online_cpus();
 	mutex_lock(&slab_mutex);
@@ -332,19 +374,17 @@ void kmem_cache_destroy(struct kmem_cache *s)
 	if (s->refcount)
 		goto out_unlock;
 
-	if (kmem_cache_destroy_memcg_children(s) != 0)
-		goto out_unlock;
-
 	list_del(&s->list);
 	memcg_unregister_cache(s);
 
+	if (__kmem_cache_shutdown_memcg(s, destroying_root) != 0)
+		goto out_undelete;
+
 	if (__kmem_cache_shutdown(s) != 0) {
-		list_add(&s->list, &slab_caches);
-		memcg_register_cache(s);
 		printk(KERN_ERR "kmem_cache_destroy %s: "
 		       "Slab cache still has objects\n", s->name);
 		dump_stack();
-		goto out_unlock;
+		goto out_undelete;
 	}
 
 	mutex_unlock(&slab_mutex);
@@ -360,6 +400,17 @@ out_unlock:
 	mutex_unlock(&slab_mutex);
 out_put_cpus:
 	put_online_cpus();
+	return;
+out_undelete:
+	list_add(&s->list, &slab_caches);
+	memcg_register_cache(s);
+	goto out_unlock;
+}
+
+void kmem_cache_destroy(struct kmem_cache *s)
+{
+	BUG_ON(!is_root_cache(s));
+	__kmem_cache_destroy(s, true);
 }
 EXPORT_SYMBOL(kmem_cache_destroy);
 
-- 
1.7.10.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]