We no longer take cgroup_mutex in cgroup_exit and the exiting tasks are not moved to init_css_set, reflect that in several comments to prevent confusion. Signed-off-by: Michal Koutný <mkoutny@xxxxxxxx> --- kernel/cgroup/cgroup.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 753afbca549f..1488bb732902 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -899,8 +899,7 @@ static void css_set_move_task(struct task_struct *task, /* * We are synchronized through cgroup_threadgroup_rwsem * against PF_EXITING setting such that we can't race - * against cgroup_exit() changing the css_set to - * init_css_set and dropping the old one. + * against cgroup_exit()/cgroup_free() dropping the css_set. */ WARN_ON_ONCE(task->flags & PF_EXITING); @@ -1430,9 +1429,8 @@ struct cgroup *task_cgroup_from_root(struct task_struct *task, struct cgroup_root *root) { /* - * No need to lock the task - since we hold cgroup_mutex the - * task can't change groups, so the only thing that can happen - * is that it exits and its css is set back to init_css_set. + * No need to lock the task - since we hold css_set_lock the + * task can't change groups. */ return cset_cgroup_from_root(task_css_set(task), root); } @@ -6020,7 +6018,7 @@ void cgroup_post_fork(struct task_struct *child) struct css_set *cset; spin_lock_irq(&css_set_lock); - cset = task_css_set(current); + cset = task_css_set(current); /* current is @child's parent */ if (list_empty(&child->cg_list)) { get_css_set(cset); cset->nr_tasks++; @@ -6063,20 +6061,8 @@ void cgroup_post_fork(struct task_struct *child) * cgroup_exit - detach cgroup from exiting task * @tsk: pointer to task_struct of exiting process * - * Description: Detach cgroup from @tsk and release it. - * - * Note that cgroups marked notify_on_release force every task in - * them to take the global cgroup_mutex mutex when exiting. - * This could impact scaling on very large systems. Be reluctant to - * use notify_on_release cgroups where very high task exit scaling - * is required on large systems. + * Description: Detach cgroup from @tsk. * - * We set the exiting tasks cgroup to the root cgroup (top_cgroup). We - * call cgroup_exit() while the task is still competent to handle - * notify_on_release(), then leave the task attached to the root cgroup in - * each hierarchy for the remainder of its exit. No need to bother with - * init_css_set refcnting. init_css_set never goes away and we can't race - * with migration path - PF_EXITING is visible to migration path. */ void cgroup_exit(struct task_struct *tsk) { @@ -6086,7 +6072,8 @@ void cgroup_exit(struct task_struct *tsk) /* * Unlink from @tsk from its css_set. As migration path can't race - * with us, we can check css_set and cg_list without synchronization. + * with us (thanks to cgroup_threadgroup_rwsem), we can check css_set + * and cg_list without synchronization. */ cset = task_css_set(tsk); @@ -6102,6 +6089,8 @@ void cgroup_exit(struct task_struct *tsk) spin_unlock_irq(&css_set_lock); } else { + /* Take reference to avoid freeing init_css_set in cgroup_free, + * see cgroup_fork(). */ get_css_set(cset); } -- 2.21.0