This is a note to let you know that I've just added the patch titled cgroup/cpuset: Change cpuset_rwsem and hotplug lock order to the 4.19-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: cgroup-cpuset-change-cpuset_rwsem-and-hotplug-lock-order.patch and it can be found in the queue-4.19 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. >From caixinchen1@xxxxxxxxxx Mon Mar 20 02:21:45 2023 From: Cai Xinchen <caixinchen1@xxxxxxxxxx> Date: Mon, 20 Mar 2023 01:15:05 +0000 Subject: cgroup/cpuset: Change cpuset_rwsem and hotplug lock order To: <longman@xxxxxxxxxx>, <lizefan.x@xxxxxxxxxxxxx>, <tj@xxxxxxxxxx>, <hannes@xxxxxxxxxxx>, <gregkh@xxxxxxxxxxxxxxxxxxx>, <sashal@xxxxxxxxxx> Cc: <mkoutny@xxxxxxxx>, <zhangqiao22@xxxxxxxxxx>, <juri.lelli@xxxxxxxxxx>, <penguin-kernel@xxxxxxxxxxxxxxxxxxx>, <stable@xxxxxxxxxxxxxxx>, <cgroups@xxxxxxxxxxxxxxx>, <linux-kernel@xxxxxxxxxxxxxxx> Message-ID: <20230320011507.129441-2-caixinchen1@xxxxxxxxxx> From: Juri Lelli <juri.lelli@xxxxxxxxxx> commit d74b27d63a8bebe2fe634944e4ebdc7b10db7a39 upstream. commit 1243dc518c9da ("cgroup/cpuset: Convert cpuset_mutex to percpu_rwsem") is performance patch which is not backport. So convert percpu_rwsem to cpuset_mutex. commit aa44002e7db25 ("cpuset: Fix unsafe lock order between cpuset lock and cpuslock") makes lock order keep cpuset_mutex ->cpu_hotplug_lock. We should change lock order in cpuset_attach. original commit message: cpuset_rwsem is going to be acquired from sched_setscheduler() with a following patch. There are however paths (e.g., spawn_ksoftirqd) in which sched_scheduler() is eventually called while holding hotplug lock; this creates a dependecy between hotplug lock (to be always acquired first) and cpuset_rwsem (to be always acquired after hotplug lock). Fix paths which currently take the two locks in the wrong order (after a following patch is applied). Tested-by: Dietmar Eggemann <dietmar.eggemann@xxxxxxx> Signed-off-by: Juri Lelli <juri.lelli@xxxxxxxxxx> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: bristot@xxxxxxxxxx Cc: claudio@xxxxxxxxxxxxxxx Cc: lizefan@xxxxxxxxxx Cc: longman@xxxxxxxxxx Cc: luca.abeni@xxxxxxxxxxxxxxx Cc: mathieu.poirier@xxxxxxxxxx Cc: rostedt@xxxxxxxxxxx Cc: tj@xxxxxxxxxx Cc: tommaso.cucinotta@xxxxxxxxxxxxxxx Link: https://lkml.kernel.org/r/20190719140000.31694-7-juri.lelli@xxxxxxxxxx Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx> Signed-off-by: Cai Xinchen <caixinchen1@xxxxxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- v2: * Change get_online_cpus/put_online_cpus lock order in cpuset_attach to keep cpuset_mutex and hotplug lock order Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- include/linux/cpuset.h | 8 ++++---- kernel/cgroup/cpuset.c | 24 +++++++++++++++++------- 2 files changed, 21 insertions(+), 11 deletions(-) --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -40,14 +40,14 @@ static inline bool cpusets_enabled(void) static inline void cpuset_inc(void) { - static_branch_inc(&cpusets_pre_enable_key); - static_branch_inc(&cpusets_enabled_key); + static_branch_inc_cpuslocked(&cpusets_pre_enable_key); + static_branch_inc_cpuslocked(&cpusets_enabled_key); } static inline void cpuset_dec(void) { - static_branch_dec(&cpusets_enabled_key); - static_branch_dec(&cpusets_pre_enable_key); + static_branch_dec_cpuslocked(&cpusets_enabled_key); + static_branch_dec_cpuslocked(&cpusets_pre_enable_key); } extern int cpuset_init(void); --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -830,8 +830,8 @@ static void rebuild_sched_domains_locked cpumask_var_t *doms; int ndoms; + lockdep_assert_cpus_held(); lockdep_assert_held(&cpuset_mutex); - get_online_cpus(); /* * We have raced with CPU hotplug. Don't do anything to avoid @@ -839,15 +839,13 @@ static void rebuild_sched_domains_locked * Anyways, hotplug work item will rebuild sched domains. */ if (!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask)) - goto out; + return; /* Generate domain masks and attrs */ ndoms = generate_sched_domains(&doms, &attr); /* Have scheduler rebuild the domains */ partition_sched_domains(ndoms, doms, attr); -out: - put_online_cpus(); } #else /* !CONFIG_SMP */ static void rebuild_sched_domains_locked(void) @@ -857,9 +855,11 @@ static void rebuild_sched_domains_locked void rebuild_sched_domains(void) { + get_online_cpus(); mutex_lock(&cpuset_mutex); rebuild_sched_domains_locked(); mutex_unlock(&cpuset_mutex); + put_online_cpus(); } /** @@ -1528,13 +1528,13 @@ static void cpuset_attach(struct cgroup_ cgroup_taskset_first(tset, &css); cs = css_cs(css); - mutex_lock(&cpuset_mutex); - /* * It should hold cpus lock because a cpu offline event can * cause set_cpus_allowed_ptr() failed. */ get_online_cpus(); + mutex_lock(&cpuset_mutex); + /* prepare for attach */ if (cs == &top_cpuset) cpumask_copy(cpus_attach, cpu_possible_mask); @@ -1553,7 +1553,6 @@ static void cpuset_attach(struct cgroup_ cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to); cpuset_update_task_spread_flag(cs, task); } - put_online_cpus(); /* * Change mm for all threadgroup leaders. This is expensive and may @@ -1589,6 +1588,7 @@ static void cpuset_attach(struct cgroup_ wake_up(&cpuset_attach_wq); mutex_unlock(&cpuset_mutex); + put_online_cpus(); } /* The various types of files and directories in a cpuset file system */ @@ -1617,6 +1617,7 @@ static int cpuset_write_u64(struct cgrou cpuset_filetype_t type = cft->private; int retval = 0; + get_online_cpus(); mutex_lock(&cpuset_mutex); if (!is_cpuset_online(cs)) { retval = -ENODEV; @@ -1654,6 +1655,7 @@ static int cpuset_write_u64(struct cgrou } out_unlock: mutex_unlock(&cpuset_mutex); + put_online_cpus(); return retval; } @@ -1664,6 +1666,7 @@ static int cpuset_write_s64(struct cgrou cpuset_filetype_t type = cft->private; int retval = -ENODEV; + get_online_cpus(); mutex_lock(&cpuset_mutex); if (!is_cpuset_online(cs)) goto out_unlock; @@ -1678,6 +1681,7 @@ static int cpuset_write_s64(struct cgrou } out_unlock: mutex_unlock(&cpuset_mutex); + put_online_cpus(); return retval; } @@ -1716,6 +1720,7 @@ static ssize_t cpuset_write_resmask(stru kernfs_break_active_protection(of->kn); flush_work(&cpuset_hotplug_work); + get_online_cpus(); mutex_lock(&cpuset_mutex); if (!is_cpuset_online(cs)) goto out_unlock; @@ -1741,6 +1746,7 @@ static ssize_t cpuset_write_resmask(stru free_trial_cpuset(trialcs); out_unlock: mutex_unlock(&cpuset_mutex); + put_online_cpus(); kernfs_unbreak_active_protection(of->kn); css_put(&cs->css); flush_workqueue(cpuset_migrate_mm_wq); @@ -1985,6 +1991,7 @@ static int cpuset_css_online(struct cgro if (!parent) return 0; + get_online_cpus(); mutex_lock(&cpuset_mutex); set_bit(CS_ONLINE, &cs->flags); @@ -2035,6 +2042,7 @@ static int cpuset_css_online(struct cgro spin_unlock_irq(&callback_lock); out_unlock: mutex_unlock(&cpuset_mutex); + put_online_cpus(); return 0; } @@ -2048,6 +2056,7 @@ static void cpuset_css_offline(struct cg { struct cpuset *cs = css_cs(css); + get_online_cpus(); mutex_lock(&cpuset_mutex); if (is_sched_load_balance(cs)) @@ -2057,6 +2066,7 @@ static void cpuset_css_offline(struct cg clear_bit(CS_ONLINE, &cs->flags); mutex_unlock(&cpuset_mutex); + put_online_cpus(); } static void cpuset_css_free(struct cgroup_subsys_state *css) Patches currently in stable-queue which might be from caixinchen1@xxxxxxxxxx are queue-4.19/cgroup-cpuset-change-cpuset_rwsem-and-hotplug-lock-order.patch queue-4.19/cgroup-fix-threadgroup_rwsem-cpus_read_lock-deadlock.patch queue-4.19/cgroup-add-missing-cpus_read_lock-to-cgroup_attach_task_all.patch