Introduce a new "isolation.rcu_nocb" file within a cgroup2/cpuset directory which provides support for a set of CPUs to either enable ("1") or disable ("0") RCU callbacks offloading (aka. RCU NOCB). This can overwrite previous boot settings towards "rcu_nocbs=" kernel parameter. The file is only writeable on "root" type partitions to exclude any overlap. The deepest root type partition has the highest priority. This means that given the following setting: Top cpuset (CPUs: 0-7) cpuset.isolation.rcu_nocb = 0 | | Subdirectory A (CPUs: 5-7) cpuset.cpus.partition = root cpuset.isolation.rcu_nocb = 0 | | Subdirectory B (CPUs: 7) cpuset.cpus.partition = root cpuset.isolation.rcu_nocb = 1 the result is that only CPU 7 is in rcu_nocb mode. Note that "rcu_nocbs" kernel parameter must be passed on boot, even without a cpulist, so that nocb support is enabled. Signed-off-by: Frederic Weisbecker <frederic@xxxxxxxxxx> Cc: Zefan Li <lizefan.x@xxxxxxxxxxxxx> Cc: Tejun Heo <tj@xxxxxxxxxx> Cc: Johannes Weiner <hannes@xxxxxxxxxxx> Cc: Paul E. McKenney <paulmck@xxxxxxxxxx> Cc: Phil Auld <pauld@xxxxxxxxxx> Cc: Nicolas Saenz Julienne <nsaenz@xxxxxxxxxx> Cc: Marcelo Tosatti <mtosatti@xxxxxxxxxx> Cc: Paul Gortmaker <paul.gortmaker@xxxxxxxxxxxxx> Cc: Waiman Long <longman@xxxxxxxxxx> Cc: Daniel Bristot de Oliveira <bristot@xxxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> --- kernel/cgroup/cpuset.c | 95 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 92 insertions(+), 3 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 9390bfd9f1cd..2d9f019bb590 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -225,6 +225,7 @@ typedef enum { CS_SCHED_LOAD_BALANCE, CS_SPREAD_PAGE, CS_SPREAD_SLAB, + CS_RCU_NOCB, } cpuset_flagbits_t; /* convenient tests for these bits */ @@ -268,6 +269,11 @@ static inline int is_spread_slab(const struct cpuset *cs) return test_bit(CS_SPREAD_SLAB, &cs->flags); } +static inline int is_rcu_nocb(const struct cpuset *cs) +{ + return test_bit(CS_RCU_NOCB, &cs->flags); +} + static inline int is_partition_root(const struct cpuset *cs) { return cs->partition_root_state > 0; @@ -590,6 +596,62 @@ static inline void free_cpuset(struct cpuset *cs) kfree(cs); } +#ifdef CONFIG_RCU_NOCB_CPU +static int cpuset_rcu_nocb_apply(struct cpuset *root) +{ + int err; + + if (is_rcu_nocb(root)) + err = housekeeping_cpumask_set(root->effective_cpus, HK_TYPE_RCU); + else + err = housekeeping_cpumask_clear(root->effective_cpus, HK_TYPE_RCU); + + return err; +} + +static int cpuset_rcu_nocb_update(struct cpuset *cur, struct cpuset *trialcs) +{ + struct cgroup_subsys_state *des_css; + struct cpuset *des; + int err; + + if (cur->partition_root_state != PRS_ENABLED) + return -EINVAL; + + err = cpuset_rcu_nocb_apply(trialcs); + if (err < 0) + return err; + + rcu_read_lock(); + cpuset_for_each_descendant_pre(des, des_css, cur) { + if (des == cur) + continue; + if (des->partition_root_state == PRS_ENABLED) + break; + spin_lock_irq(&callback_lock); + if (is_rcu_nocb(trialcs)) + set_bit(CS_RCU_NOCB, &des->flags); + else + clear_bit(CS_RCU_NOCB, &des->flags); + spin_unlock_irq(&callback_lock); + } + rcu_read_unlock(); + + return 0; +} +#else +static inline int cpuset_rcu_nocb_apply(struct cpuset *root) +{ + return 0; +} + +static inline int cpuset_rcu_nocb_update(struct cpuset *cur, + struct cpuset *trialcs) +{ + return 0; +} +#endif /* #ifdef CONFIG_RCU_NOCB_CPU */ + /* * validate_change_legacy() - Validate conditions specific to legacy (v1) * behavior. @@ -1655,6 +1717,9 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, if (cs->partition_root_state) { struct cpuset *parent = parent_cs(cs); + WARN_ON_ONCE(cpuset_rcu_nocb_apply(parent) < 0); + WARN_ON_ONCE(cpuset_rcu_nocb_apply(cs) < 0); + /* * For partition root, update the cpumasks of sibling * cpusets if they use parent's effective_cpus. @@ -2012,6 +2077,12 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs)) || (is_spread_page(cs) != is_spread_page(trialcs))); + if (is_rcu_nocb(cs) != is_rcu_nocb(trialcs)) { + err = cpuset_rcu_nocb_update(cs, trialcs); + if (err < 0) + goto out; + } + spin_lock_irq(&callback_lock); cs->flags = trialcs->flags; spin_unlock_irq(&callback_lock); @@ -2365,6 +2436,7 @@ typedef enum { FILE_MEMORY_PRESSURE, FILE_SPREAD_PAGE, FILE_SPREAD_SLAB, + FILE_RCU_NOCB, } cpuset_filetype_t; static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, @@ -2406,6 +2478,9 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, case FILE_SPREAD_SLAB: retval = update_flag(CS_SPREAD_SLAB, cs, val); break; + case FILE_RCU_NOCB: + retval = update_flag(CS_RCU_NOCB, cs, val); + break; default: retval = -EINVAL; break; @@ -2573,6 +2648,8 @@ static u64 cpuset_read_u64(struct cgroup_subsys_state *css, struct cftype *cft) return is_spread_page(cs); case FILE_SPREAD_SLAB: return is_spread_slab(cs); + case FILE_RCU_NOCB: + return is_rcu_nocb(cs); default: BUG(); } @@ -2803,7 +2880,14 @@ static struct cftype dfl_files[] = { .private = FILE_SUBPARTS_CPULIST, .flags = CFTYPE_DEBUG, }, - +#ifdef CONFIG_RCU_NOCB_CPU + { + .name = "isolation.rcu_nocb", + .read_u64 = cpuset_read_u64, + .write_u64 = cpuset_write_u64, + .private = FILE_RCU_NOCB, + }, +#endif { } /* terminate */ }; @@ -2861,6 +2945,8 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) set_bit(CS_SPREAD_PAGE, &cs->flags); if (is_spread_slab(parent)) set_bit(CS_SPREAD_SLAB, &cs->flags); + if (is_rcu_nocb(parent)) + set_bit(CS_RCU_NOCB, &cs->flags); cpuset_inc(); @@ -3227,12 +3313,15 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) if (mems_updated) check_insane_mems_config(&new_mems); - if (is_in_v2_mode()) + if (is_in_v2_mode()) { hotplug_update_tasks(cs, &new_cpus, &new_mems, cpus_updated, mems_updated); - else + if (cpus_updated) + WARN_ON_ONCE(cpuset_rcu_nocb_apply(cs) < 0); + } else { hotplug_update_tasks_legacy(cs, &new_cpus, &new_mems, cpus_updated, mems_updated); + } percpu_up_write(&cpuset_rwsem); } -- 2.25.1