This is a note to let you know that I've just added the patch titled cgroup, cpuset: replace cpuset_post_attach_flush() with cgroup_subsys->post_attach callback to the 4.5-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: cgroup-cpuset-replace-cpuset_post_attach_flush-with-cgroup_subsys-post_attach-callback.patch and it can be found in the queue-4.5 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. >From 5cf1cacb49aee39c3e02ae87068fc3c6430659b0 Mon Sep 17 00:00:00 2001 From: Tejun Heo <tj@xxxxxxxxxx> Date: Thu, 21 Apr 2016 19:06:48 -0400 Subject: cgroup, cpuset: replace cpuset_post_attach_flush() with cgroup_subsys->post_attach callback From: Tejun Heo <tj@xxxxxxxxxx> commit 5cf1cacb49aee39c3e02ae87068fc3c6430659b0 upstream. Since e93ad19d0564 ("cpuset: make mm migration asynchronous"), cpuset kicks off asynchronous NUMA node migration if necessary during task migration and flushes it from cpuset_post_attach_flush() which is called at the end of __cgroup_procs_write(). This is to avoid performing migration with cgroup_threadgroup_rwsem write-locked which can lead to deadlock through dependency on kworker creation. memcg has a similar issue with charge moving, so let's convert it to an official callback rather than the current one-off cpuset specific function. This patch adds cgroup_subsys->post_attach callback and makes cpuset register cpuset_post_attach_flush() as its ->post_attach. The conversion is mostly one-to-one except that the new callback is called under cgroup_mutex. This is to guarantee that no other migration operations are started before ->post_attach callbacks are finished. cgroup_mutex is one of the outermost mutex in the system and has never been and shouldn't be a problem. We can add specialized synchronization around __cgroup_procs_write() but I don't think there's any noticeable benefit. Signed-off-by: Tejun Heo <tj@xxxxxxxxxx> Cc: Li Zefan <lizefan@xxxxxxxxxx> Cc: Johannes Weiner <hannes@xxxxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- include/linux/cgroup-defs.h | 1 + include/linux/cpuset.h | 6 ------ kernel/cgroup.c | 7 +++++-- kernel/cpuset.c | 4 ++-- 4 files changed, 8 insertions(+), 10 deletions(-) --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -442,6 +442,7 @@ struct cgroup_subsys { int (*can_attach)(struct cgroup_taskset *tset); void (*cancel_attach)(struct cgroup_taskset *tset); void (*attach)(struct cgroup_taskset *tset); + void (*post_attach)(void); int (*can_fork)(struct task_struct *task); void (*cancel_fork)(struct task_struct *task); void (*fork)(struct task_struct *task); --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -137,8 +137,6 @@ static inline void set_mems_allowed(node task_unlock(current); } -extern void cpuset_post_attach_flush(void); - #else /* !CONFIG_CPUSETS */ static inline bool cpusets_enabled(void) { return false; } @@ -245,10 +243,6 @@ static inline bool read_mems_allowed_ret return false; } -static inline void cpuset_post_attach_flush(void) -{ -} - #endif /* !CONFIG_CPUSETS */ #endif /* _LINUX_CPUSET_H */ --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2697,9 +2697,10 @@ static ssize_t __cgroup_procs_write(stru size_t nbytes, loff_t off, bool threadgroup) { struct task_struct *tsk; + struct cgroup_subsys *ss; struct cgroup *cgrp; pid_t pid; - int ret; + int ssid, ret; if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) return -EINVAL; @@ -2747,8 +2748,10 @@ out_unlock_rcu: rcu_read_unlock(); out_unlock_threadgroup: percpu_up_write(&cgroup_threadgroup_rwsem); + for_each_subsys(ss, ssid) + if (ss->post_attach) + ss->post_attach(); cgroup_kn_unlock(of->kn); - cpuset_post_attach_flush(); return ret ?: nbytes; } --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -58,7 +58,6 @@ #include <asm/uaccess.h> #include <linux/atomic.h> #include <linux/mutex.h> -#include <linux/workqueue.h> #include <linux/cgroup.h> #include <linux/wait.h> @@ -1016,7 +1015,7 @@ static void cpuset_migrate_mm(struct mm_ } } -void cpuset_post_attach_flush(void) +static void cpuset_post_attach(void) { flush_workqueue(cpuset_migrate_mm_wq); } @@ -2087,6 +2086,7 @@ struct cgroup_subsys cpuset_cgrp_subsys .can_attach = cpuset_can_attach, .cancel_attach = cpuset_cancel_attach, .attach = cpuset_attach, + .post_attach = cpuset_post_attach, .bind = cpuset_bind, .legacy_cftypes = files, .early_init = 1, Patches currently in stable-queue which might be from tj@xxxxxxxxxx are queue-4.5/sched-cgroup-fix-cleanup-cgroup-teardown-init.patch queue-4.5/workqueue-fix-ghost-pending-flag-while-doing-mq-io.patch queue-4.5/cgroup-cpuset-replace-cpuset_post_attach_flush-with-cgroup_subsys-post_attach-callback.patch queue-4.5/memcg-relocate-charge-moving-from-attach-to-post_attach.patch -- To unsubscribe from this list: send the line "unsubscribe stable" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html