Re: [PATCH vfs/for-next v2] cgroup: fix top cgroup refcnt leak

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



David Howells <dhowells@xxxxxxxxxx> wrote:

> 	umount $d/a1
> 	umount $d/a2
> 
> then a cgroup gets leaked at the end.  Just retaining the final:
> 
> 	for i in `seq 2`; do 
> 		umount $d/a$i
> 	done
> 
> in place of the two final expanded umount commands fixes the issue.

The issue also goes away if I set PERCPU_REF_INIT_ATOMIC on the refcount
object when initialising it:-/

For reference, the sequence of css_get/tryget/put calls is:

*** 00000000e2655b5f RIN   1: cgroup1_get_tree+0x5de/0x73b
*** 00000000e2655b5f GET   2: cgroup1_get_tree+0x6e1/0x73b
*** 00000000e2655b5f PUT   1: cgroup_fs_context_free+0x12c/0x15f

*** 00000000e2655b5f TRYO  2: cgroup1_get_tree+0x4aa/0x73b
*** 00000000e2655b5f PUT   1: cgroup_do_get_tree+0x1c5/0x1fa
*** 00000000e2655b5f GET   2: cgroup1_get_tree+0x6e1/0x73b
*** 00000000e2655b5f PUT   1: cgroup_fs_context_free+0x12c/0x15f

*** 00000000e2655b5f TRY   2: cgroup_kn_lock_live+0x145/0x18a
*** 00000000e2655b5f GET   3: cgroup_mkdir+0x241/0x454
*** 00000000e2655b5f PUT   2: cgroup_mkdir+0xad/0x454
*** 00000000e2655b5f PUT   1: cgroup_kill_sb+0x134/0x161

*** 00000000e2655b5f TRYO  2: cgroup1_get_tree+0x4aa/0x73b
*** 00000000e2655b5f GET   3: cgroup1_get_tree+0x6e1/0x73b
*** 00000000e2655b5f PUT   2: cgroup_fs_context_free+0x12c/0x15f

*** 00000000e2655b5f TRYO  3: cgroup1_get_tree+0x4aa/0x73b
*** 00000000e2655b5f PUT   2: cgroup_do_get_tree+0x1c5/0x1fa
*** 00000000e2655b5f GET   3: cgroup1_get_tree+0x6e1/0x73b
*** 00000000e2655b5f PUT   2: cgroup_fs_context_free+0x12c/0x15f

*** 00000000e2655b5f KIL   2: cgroup_kill_sb+0x146/0x161
*** 00000000e2655b5f PUT   0: css_free_rwork_fn+0x399/0x5ec

As obtained with the attached patch.

I'm not sure the refcount is correct after the second block (at the end of the
second mount).  Should it have one ref per active sb?

David
---
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 5e1694fe035b..23d022eb669f 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -165,6 +165,8 @@ struct cgroup_subsys_state {
 	 * fields of the containing structure.
 	 */
 	struct cgroup_subsys_state *parent;
+
+	bool debug;
 };
 
 /*
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index bb0c7da50ed2..76ca2be4986d 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -295,6 +295,8 @@ void css_task_iter_end(struct css_task_iter *it);
  * Inline functions.
  */
 
+extern void css_refcount(struct cgroup_subsys_state *css, const char *op);
+
 /**
  * css_get - obtain a reference on the specified css
  * @css: target css
@@ -303,8 +305,10 @@ void css_task_iter_end(struct css_task_iter *it);
  */
 static inline void css_get(struct cgroup_subsys_state *css)
 {
-	if (!(css->flags & CSS_NO_REF))
+	if (!(css->flags & CSS_NO_REF)) {
 		percpu_ref_get(&css->refcnt);
+		css_refcount(css, "GET");
+	}
 }
 
 /**
@@ -316,8 +320,10 @@ static inline void css_get(struct cgroup_subsys_state *css)
  */
 static inline void css_get_many(struct cgroup_subsys_state *css, unsigned int n)
 {
-	if (!(css->flags & CSS_NO_REF))
+	if (!(css->flags & CSS_NO_REF)) {
 		percpu_ref_get_many(&css->refcnt, n);
+		css_refcount(css, "GETM");
+	}
 }
 
 /**
@@ -333,8 +339,11 @@ static inline void css_get_many(struct cgroup_subsys_state *css, unsigned int n)
  */
 static inline bool css_tryget(struct cgroup_subsys_state *css)
 {
-	if (!(css->flags & CSS_NO_REF))
-		return percpu_ref_tryget(&css->refcnt);
+	if (css->flags & CSS_NO_REF)
+		return true;
+	if (!percpu_ref_tryget(&css->refcnt))
+		return false;
+	css_refcount(css, "TRY");
 	return true;
 }
 
@@ -350,8 +359,11 @@ static inline bool css_tryget(struct cgroup_subsys_state *css)
  */
 static inline bool css_tryget_online(struct cgroup_subsys_state *css)
 {
-	if (!(css->flags & CSS_NO_REF))
-		return percpu_ref_tryget_live(&css->refcnt);
+	if (css->flags & CSS_NO_REF)
+		return true;
+	if (!percpu_ref_tryget_live(&css->refcnt))
+		return false;
+	css_refcount(css, "TRYO");
 	return true;
 }
 
@@ -383,8 +395,10 @@ static inline bool css_is_dying(struct cgroup_subsys_state *css)
  */
 static inline void css_put(struct cgroup_subsys_state *css)
 {
-	if (!(css->flags & CSS_NO_REF))
+	if (!(css->flags & CSS_NO_REF)) {
 		percpu_ref_put(&css->refcnt);
+		css_refcount(css, "PUT");
+	}
 }
 
 /**
@@ -396,8 +410,10 @@ static inline void css_put(struct cgroup_subsys_state *css)
  */
 static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n)
 {
-	if (!(css->flags & CSS_NO_REF))
+	if (!(css->flags & CSS_NO_REF)) {
 		percpu_ref_put_many(&css->refcnt, n);
+		css_refcount(css, "PUTM");
+	}
 }
 
 static inline void cgroup_get(struct cgroup *cgrp)
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index de7d625ec077..d87a0b8c3441 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -1175,7 +1175,7 @@ int cgroup1_get_tree(struct fs_context *fc)
 		    ss->root == &cgrp_dfl_root)
 			continue;
 
-		if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) {
+		if (!css_tryget_online(&ss->root->cgrp.self)) {
 			mutex_unlock(&cgroup_mutex);
 			goto err_restart;
 		}
@@ -1228,7 +1228,7 @@ int cgroup1_get_tree(struct fs_context *fc)
 		 */
 		pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
 		if (IS_ERR(pinned_sb) ||
-		    !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
+		    !css_tryget_online(&root->cgrp.self)) {
 			mutex_unlock(&cgroup_mutex);
 			if (!IS_ERR_OR_NULL(pinned_sb))
 				deactivate_super(pinned_sb);
@@ -1284,6 +1284,7 @@ int cgroup1_get_tree(struct fs_context *fc)
 	if (new_root) {
 		mutex_lock(&cgroup_mutex);
 		percpu_ref_reinit(&root->cgrp.self.refcnt);
+		css_refcount(&root->cgrp.self, "RIN");
 		mutex_unlock(&cgroup_mutex);
 	}
 	cgroup_get(&root->cgrp);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 7dbe71b23941..299e3fb30731 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1904,8 +1904,11 @@ void init_cgroup_root(struct cgroup_fs_context *ctx)
 	root->flags = ctx->flags;
 	if (ctx->release_agent)
 		strscpy(root->release_agent_path, ctx->release_agent, PATH_MAX);
-	if (ctx->name)
+	if (ctx->name) {
 		strscpy(root->name, ctx->name, MAX_CGROUP_ROOT_NAMELEN);
+		if (strcmp(root->name, "xxxxy") == 0)
+			cgrp->self.debug = true;
+	}
 	if (ctx->cpuset_clone_children)
 		set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
 }
@@ -1927,7 +1930,7 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags)
 	root_cgrp->ancestor_ids[0] = ret;
 
 	ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release,
-			      ref_flags, GFP_KERNEL);
+			      ref_flags | PERCPU_REF_INIT_ATOMIC, GFP_KERNEL);
 	if (ret)
 		goto out;
 
@@ -2166,8 +2169,10 @@ static void cgroup_kill_sb(struct super_block *sb)
 	if (!list_empty(&root->cgrp.self.children) ||
 	    root == &cgrp_dfl_root)
 		cgroup_put(&root->cgrp);
-	else
+	else {
+		css_refcount(&root->cgrp.self, "KIL");
 		percpu_ref_kill(&root->cgrp.self.refcnt);
+	}
 
 	kernfs_kill_sb(sb);
 }
@@ -4891,7 +4896,7 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
 
 	init_and_link_css(css, ss, cgrp);
 
-	err = percpu_ref_init(&css->refcnt, css_release, 0, GFP_KERNEL);
+	err = percpu_ref_init(&css->refcnt, css_release, PERCPU_REF_INIT_ATOMIC, GFP_KERNEL);
 	if (err)
 		goto err_free_css;
 
@@ -4946,7 +4951,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
 	if (!cgrp)
 		return ERR_PTR(-ENOMEM);
 
-	ret = percpu_ref_init(&cgrp->self.refcnt, css_release, 0, GFP_KERNEL);
+	ret = percpu_ref_init(&cgrp->self.refcnt, css_release, PERCPU_REF_INIT_ATOMIC, GFP_KERNEL);
 	if (ret)
 		goto out_free_cgrp;
 
@@ -5194,6 +5199,7 @@ static void kill_css(struct cgroup_subsys_state *css)
 	 * Use percpu_ref_kill_and_confirm() to get notifications as each
 	 * css is confirmed to be seen as killed on all CPUs.
 	 */
+	css_refcount(css, "KLC");
 	percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn);
 }
 
@@ -5278,6 +5284,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	cgroup1_check_for_release(parent);
 
 	/* put the base reference */
+	css_refcount(&cgrp->self, "KIL");
 	percpu_ref_kill(&cgrp->self.refcnt);
 
 	return 0;
@@ -6106,3 +6113,11 @@ static int __init cgroup_sysfs_init(void)
 }
 subsys_initcall(cgroup_sysfs_init);
 #endif /* CONFIG_SYSFS */
+
+noinline void css_refcount(struct cgroup_subsys_state *css, const char *op)
+{
+	if (css->debug)
+		printk("*** %p %s %2ld: %pSR\n",
+		       css, op, atomic_long_read(&css->refcnt.count),
+		       __builtin_return_address(0));
+}



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux