The cgroup_subsys_state of cgroup subsystems (not cgroup->self) use both kill and release callbacks on their release path (see comment for css_free_rwork_fn()). When the last reference is also the base reference, we run into issues when active work_struct (1) is re-initialized from css_release (2). // ref=1: only base reference kill_css() css_get() // fuse, ref+=1 == 2 percpu_ref_kill_and_confirm // ref -= 1 == 1: kill base references [via rcu] css_killed_ref_fn == refcnt.confirm_switch queue_work(css->destroy_work) (1) [via css->destroy_work] css_killed_work_fn == wq.func offline_css() // needs fuse css_put // ref -= 1 == 0: de-fuse, was last ... percpu_ref_put_many css_release queue_work(css->destroy_work) (2) [via css->destroy_work] css_release_work_fn == wq.func Despite we take a fuse reference in css_killed_work_fn() it serves for pinning the css until only after offline_css(). We could check inside css_release whether destroy_work is active (WORK_STRUCT_PENDING_BIT) and daisy-chain css_release_work_fn from css_release(). In order to avoid clashes with various stages of the work item processing, we just spend some space in css (my config's css grows to 232B + 32B) and create a separate work entry for each user. Reported-by: syzbot+e42ae441c3b10acf9e9d@xxxxxxxxxxxxxxxxxxxxxxxxx Reported-by: Tadeusz Struk <tadeusz.struk@xxxxxxxxxx> Link: https://lore.kernel.org/r/20220412192459.227740-1-tadeusz.struk@xxxxxxxxxx/ Signed-off-by: Tadeusz Struk <tadeusz.struk@xxxxxxxxxx> Signed-off-by: Michal Koutný <mkoutny@xxxxxxxx> --- include/linux/cgroup-defs.h | 5 +++-- kernel/cgroup/cgroup.c | 14 +++++++------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 1bfcfb1af352..16b99aa04305 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -178,8 +178,9 @@ struct cgroup_subsys_state { */ atomic_t online_cnt; - /* percpu_ref killing and RCU release */ - struct work_struct destroy_work; + /* percpu_ref killing, css release, and RCU release work structs */ + struct work_struct killed_ref_work; + struct work_struct release_work; struct rcu_work destroy_rwork; /* diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index a5b0d5d54fbc..33b3a44391d7 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5102,7 +5102,7 @@ static struct cftype cgroup_base_files[] = { * css_free_work_fn(). * * It is actually hairier because both step 2 and 4 require process context - * and thus involve punting to css->destroy_work adding two additional + * and thus involve punting to css->release_work adding two additional * steps to the already complex sequence. */ static void css_free_rwork_fn(struct work_struct *work) @@ -5157,7 +5157,7 @@ static void css_free_rwork_fn(struct work_struct *work) static void css_release_work_fn(struct work_struct *work) { struct cgroup_subsys_state *css = - container_of(work, struct cgroup_subsys_state, destroy_work); + container_of(work, struct cgroup_subsys_state, release_work); struct cgroup_subsys *ss = css->ss; struct cgroup *cgrp = css->cgroup; @@ -5213,8 +5213,8 @@ static void css_release(struct percpu_ref *ref) struct cgroup_subsys_state *css = container_of(ref, struct cgroup_subsys_state, refcnt); - INIT_WORK(&css->destroy_work, css_release_work_fn); - queue_work(cgroup_destroy_wq, &css->destroy_work); + INIT_WORK(&css->release_work, css_release_work_fn); + queue_work(cgroup_destroy_wq, &css->release_work); } static void init_and_link_css(struct cgroup_subsys_state *css, @@ -5549,7 +5549,7 @@ int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode) static void css_killed_work_fn(struct work_struct *work) { struct cgroup_subsys_state *css = - container_of(work, struct cgroup_subsys_state, destroy_work); + container_of(work, struct cgroup_subsys_state, killed_ref_work); mutex_lock(&cgroup_mutex); @@ -5570,8 +5570,8 @@ static void css_killed_ref_fn(struct percpu_ref *ref) container_of(ref, struct cgroup_subsys_state, refcnt); if (atomic_dec_and_test(&css->online_cnt)) { - INIT_WORK(&css->destroy_work, css_killed_work_fn); - queue_work(cgroup_destroy_wq, &css->destroy_work); + INIT_WORK(&css->killed_ref_work, css_killed_work_fn); + queue_work(cgroup_destroy_wq, &css->killed_ref_work); } } -- 2.35.3