Currently, while rebinding, cgroup_dummy_root serves as the anchor point. In addition to the target root, rebind_subsystems() takes @added_mask and @removed_mask. The subsystems specified in the former are expected to be on the dummy root and then moved to the target root. The ones in the latter are moved from non-dummy root to dummy. Now that the dummy root is a fully functional one and we're planning to use it for the default unified hierarchy, this level of distinction between dummy and non-dummy roots is quite awkward. This patch updates rebind_subsystems() to take the target root and one subsystem mask and move the specified subsystmes to the target root which may or may not be the dummy root. IOW, unbinding now becomes moving the subsystems to the dummy root and binding to non-dummy root. This makes the dummy root mostly equivalent to other hierarchies in terms of the mechanism of moving subsystems around; however, we still retain all the semantical restrictions so that this patch doesn't introduce any visible behavior differences. Another noteworthy detail is that rebind_subsystems() guarantees that moving a subsystem to the dummy root never fails so that valid unmounting attempts always succeed. This unifies binding and unbinding of subsystems. The invocation points of ->bind() were inconsistent between the two and now moved after whole rebinding is complete. This doesn't break the current users and generally makes more sense. All rebind_subsystems() users are converted accordingly. Note that cgroup_remount() now makes two calls to rebind_subsystems() to bind and then unbind the requested subsystems. This will allow repurposing of the dummy hierarchy as the default unified hierarchy and shouldn't make any userland visible behavior difference. Signed-off-by: Tejun Heo <tj@xxxxxxxxxx> --- kernel/cgroup.c | 100 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 56 insertions(+), 44 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2483f4e..9b9a294 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -175,8 +175,8 @@ static int need_forkexit_callback __read_mostly; static struct cftype cgroup_base_files[]; static void cgroup_put(struct cgroup *cgrp); -static int rebind_subsystems(struct cgroupfs_root *root, - unsigned long added_mask, unsigned removed_mask); +static int rebind_subsystems(struct cgroupfs_root *dst_root, + unsigned long ss_mask); static void cgroup_destroy_css_killed(struct cgroup *cgrp); static int cgroup_destroy_locked(struct cgroup *cgrp); static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], @@ -739,7 +739,7 @@ static void cgroup_destroy_root(struct cgroupfs_root *root) BUG_ON(!list_empty(&cgrp->children)); /* Rebind all subsystems back to the default hierarchy */ - WARN_ON(rebind_subsystems(root, 0, root->subsys_mask)); + rebind_subsystems(&cgroup_dummy_root, root->subsys_mask); /* * Release all the links from cset_links to this hierarchy's @@ -976,69 +976,77 @@ static void cgroup_clear_dir(struct cgroup *cgrp, unsigned long subsys_mask) } } -static int rebind_subsystems(struct cgroupfs_root *root, - unsigned long added_mask, unsigned removed_mask) +static int rebind_subsystems(struct cgroupfs_root *dst_root, + unsigned long ss_mask) { - struct cgroup *cgrp = &root->top_cgroup; + struct cgroup *dst_top = &dst_root->top_cgroup; struct cgroup_subsys *ss; - int i, ret; + int ssid, ret; lockdep_assert_held(&cgroup_tree_mutex); lockdep_assert_held(&cgroup_mutex); - /* Check that any added subsystems are currently free */ - for_each_subsys(ss, i) - if ((added_mask & (1 << i)) && ss->root != &cgroup_dummy_root) + for_each_subsys(ss, ssid) { + if (!(ss_mask & (1 << ssid))) + continue; + + /* if @ss is on the dummy_root, we can always move it */ + if (ss->root == &cgroup_dummy_root) + continue; + + /* if @ss has non-root cgroups attached to it, can't move */ + if (!list_empty(&ss->root->top_cgroup.children)) return -EBUSY; - ret = cgroup_populate_dir(cgrp, added_mask); - if (ret) - return ret; + /* can't move between two non-dummy roots either */ + if (dst_root != &cgroup_dummy_root) + return -EBUSY; + } + + if (dst_root != &cgroup_dummy_root) { + ret = cgroup_populate_dir(dst_top, ss_mask); + if (ret) + return ret; + } /* * Nothing can fail from this point on. Remove files for the * removed subsystems and rebind each subsystem. */ mutex_unlock(&cgroup_mutex); - cgroup_clear_dir(cgrp, removed_mask); + for_each_subsys(ss, ssid) + if ((ss_mask & (1 << ssid)) && ss->root != &cgroup_dummy_root) + cgroup_clear_dir(&ss->root->top_cgroup, 1 << ssid); mutex_lock(&cgroup_mutex); - for_each_subsys(ss, i) { - unsigned long bit = 1UL << i; - - if (bit & added_mask) { - /* We're binding this subsystem to this hierarchy */ - BUG_ON(cgroup_css(cgrp, ss)); - BUG_ON(!cgroup_css(cgroup_dummy_top, ss)); - BUG_ON(cgroup_css(cgroup_dummy_top, ss)->cgroup != cgroup_dummy_top); + for_each_subsys(ss, ssid) { + struct cgroupfs_root *src_root; + struct cgroup *src_top; + struct cgroup_subsys_state *css; - rcu_assign_pointer(cgrp->subsys[i], - cgroup_css(cgroup_dummy_top, ss)); - cgroup_css(cgrp, ss)->cgroup = cgrp; + if (!(ss_mask & (1 << ssid))) + continue; - ss->root = root; - if (ss->bind) - ss->bind(cgroup_css(cgrp, ss)); + src_root = ss->root; + src_top = &src_root->top_cgroup; + css = cgroup_css(src_top, ss); - /* refcount was already taken, and we're keeping it */ - root->subsys_mask |= bit; - } else if (bit & removed_mask) { - /* We're removing this subsystem */ - BUG_ON(cgroup_css(cgrp, ss) != cgroup_css(cgroup_dummy_top, ss)); - BUG_ON(cgroup_css(cgrp, ss)->cgroup != cgrp); + WARN_ON(!css || cgroup_css(dst_top, ss)); - if (ss->bind) - ss->bind(cgroup_css(cgroup_dummy_top, ss)); + RCU_INIT_POINTER(src_top->subsys[ssid], NULL); + rcu_assign_pointer(dst_top->subsys[ssid], css); + ss->root = dst_root; + css->cgroup = dst_top; - cgroup_css(cgroup_dummy_top, ss)->cgroup = cgroup_dummy_top; - RCU_INIT_POINTER(cgrp->subsys[i], NULL); + src_root->subsys_mask &= ~(1 << ssid); + dst_root->subsys_mask |= 1 << ssid; - cgroup_subsys[i]->root = &cgroup_dummy_root; - root->subsys_mask &= ~bit; - } + if (ss->bind) + ss->bind(css); } - kernfs_activate(cgrp->kn); + if (dst_root != &cgroup_dummy_root) + kernfs_activate(dst_top->kn); return 0; } @@ -1277,10 +1285,12 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) goto out_unlock; } - ret = rebind_subsystems(root, added_mask, removed_mask); + ret = rebind_subsystems(root, added_mask); if (ret) goto out_unlock; + rebind_subsystems(&cgroup_dummy_root, removed_mask); + if (opts.release_agent) { spin_lock(&release_agent_path_lock); strcpy(root->release_agent_path, opts.release_agent); @@ -1420,7 +1430,7 @@ static int cgroup_setup_root(struct cgroupfs_root *root, unsigned long ss_mask) if (ret) goto destroy_root; - ret = rebind_subsystems(root, ss_mask, 0); + ret = rebind_subsystems(root, ss_mask); if (ret) goto destroy_root; @@ -4026,6 +4036,8 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) BUG_ON(online_css(css)); + cgroup_dummy_root.subsys_mask |= 1 << ss->id; + mutex_unlock(&cgroup_mutex); mutex_unlock(&cgroup_tree_mutex); } -- 1.8.5.3 _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/containers