[PATCH v2 2/6] cgroup/cpuset: Improve temporary cpumasks handling

Waiman Long <longman@xxxxxxxxxx> · Wed, 31 May 2023 12:34:01 -0400

The limitation that update_parent_subparts_cpumask() can only use
addmask & delmask in the given tmp cpumasks is fragile and may lead to
unexpected error. Add a new statically allocated cs_tmp_cpus cpumask
(protected by cpuset_mutex) for internal use so that all the three
temporary cpumasks can be freely used.

With this change, we can move the update_tasks_cpumask() for the
parent and update_sibling_cpumasks() for the sibling to inside
update_parent_subparts_cpumask().

Also add a init_tmpmasks() helper to handle initialization of the tmpmasks
structure when cpumasks are too big to be statically allocated on stack.

Signed-off-by: Waiman Long <longman@xxxxxxxxxx>
---
 kernel/cgroup/cpuset.c | 66 ++++++++++++++++++++++++------------------
 1 file changed, 38 insertions(+), 28 deletions(-)

diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 12a0b583aca4..8604c919e1e4 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -208,6 +208,8 @@ struct cpuset {
 	struct cgroup_file partition_file;
 };
 
+static cpumask_var_t	cs_tmp_cpus;	/* Temp cpumask for partition */
+
 /*
  * Partition root states:
  *
@@ -668,6 +670,24 @@ static inline void free_cpumasks(struct cpuset *cs, struct tmpmasks *tmp)
 	}
 }
 
+/*
+ * init_tmpmasks - Initialize the cpumasks in tmpmasks with the given ones
+ */
+#ifdef CONFIG_CPUMASK_OFFSTACK
+static inline void
+init_tmpmasks(struct tmpmasks *tmp, struct cpumask *new_cpus,
+	      struct cpumask *addmask, struct cpumask *delmask)
+{
+	tmp->new_cpus = new_cpus;
+	tmp->addmask  = addmask;
+	tmp->delmask  = delmask;
+}
+#else
+static inline void
+init_tmpmasks(struct tmpmasks *tmp, struct cpumask *new_cpus,
+	      struct cpumask *addmask, struct cpumask *delmask) { }
+#endif
+
 /**
  * alloc_trial_cpuset - allocate a trial cpuset
  * @cs: the cpuset that the trial cpuset duplicates
@@ -1300,6 +1320,8 @@ enum subparts_cmd {
 
 static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
 		       int turning_on);
+static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs,
+				    struct tmpmasks *tmp);
 
 /*
  * Update partition exclusive flag
@@ -1463,7 +1485,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
 		adding = cpumask_andnot(tmp->addmask, tmp->addmask,
 					parent->subparts_cpus);
 		/*
-		 * Empty cpumask is not allewed
+		 * Empty cpumask is not allowed
 		 */
 		if (cpumask_empty(newmask)) {
 			part_error = PERR_CPUSEMPTY;
@@ -1583,8 +1605,11 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
 
 	spin_unlock_irq(&callback_lock);
 
-	if (adding || deleting)
+	if (adding || deleting) {
 		update_tasks_cpumask(parent, tmp->addmask);
+		if (parent->child_ecpus_count)
+			update_sibling_cpumasks(parent, cs, tmp);
+	}
 
 	/*
 	 * For partcmd_update without newmask, it is being called from
@@ -1839,18 +1864,13 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
 	if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed))
 		return 0;
 
-#ifdef CONFIG_CPUMASK_OFFSTACK
 	/*
 	 * Use the cpumasks in trialcs for tmpmasks when they are pointers
-	 * to allocated cpumasks.
-	 *
-	 * Note that update_parent_subparts_cpumask() uses only addmask &
-	 * delmask, but not new_cpus.
+	 * to allocated cpumasks & save the newmask into cs_tmp_cpus.
 	 */
-	tmp.addmask  = trialcs->subparts_cpus;
-	tmp.delmask  = trialcs->effective_cpus;
-	tmp.new_cpus = NULL;
-#endif
+	cpumask_copy(cs_tmp_cpus, trialcs->cpus_allowed);
+	init_tmpmasks(&tmp, trialcs->cpus_allowed, trialcs->subparts_cpus,
+		      trialcs->effective_cpus);
 
 	retval = validate_change(cs, trialcs);
 
@@ -1870,7 +1890,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
 		parent = parent_cs(cs);
 		cpuset_for_each_child(cp, css, parent)
 			if (is_partition_valid(cp) &&
-			    cpumask_intersects(trialcs->cpus_allowed, cp->cpus_allowed)) {
+			    cpumask_intersects(cs_tmp_cpus, cp->cpus_allowed)) {
 				rcu_read_unlock();
 				update_parent_subparts_cpumask(cp, partcmd_invalidate, NULL, &tmp);
 				rcu_read_lock();
@@ -1887,13 +1907,15 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
 						       NULL, &tmp);
 		else
 			update_parent_subparts_cpumask(cs, partcmd_update,
-						trialcs->cpus_allowed, &tmp);
+						cs_tmp_cpus, &tmp);
 	}
 
+	/* Restore trialcs->cpus_allowed */
+	cpumask_copy(trialcs->cpus_allowed, cs_tmp_cpus);
 	compute_effective_cpumask(trialcs->effective_cpus, trialcs,
 				  parent_cs(cs));
 	spin_lock_irq(&callback_lock);
-	cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
+	cpumask_copy(cs->cpus_allowed, cs_tmp_cpus);
 
 	/*
 	 * Make sure that subparts_cpus, if not empty, is a subset of
@@ -1914,11 +1936,6 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
 	}
 	spin_unlock_irq(&callback_lock);
 
-#ifdef CONFIG_CPUMASK_OFFSTACK
-	/* Now trialcs->cpus_allowed is available */
-	tmp.new_cpus = trialcs->cpus_allowed;
-#endif
-
 	/* effective_cpus will be updated here */
 	update_cpumasks_hier(cs, &tmp, false);
 
@@ -2343,13 +2360,11 @@ static int update_prstate(struct cpuset *cs, int new_prs)
 
 		err = update_parent_subparts_cpumask(cs, partcmd_enable,
 						     NULL, &tmpmask);
-		if (err)
-			goto out;
 	} else if (old_prs && new_prs) {
 		/*
 		 * A change in load balance state only, no change in cpumasks.
 		 */
-		goto out;
+		;
 	} else {
 		/*
 		 * Switching back to member is always allowed even if it
@@ -2369,12 +2384,6 @@ static int update_prstate(struct cpuset *cs, int new_prs)
 			spin_unlock_irq(&callback_lock);
 		}
 	}
-
-	update_tasks_cpumask(parent, tmpmask.new_cpus);
-
-	if (parent->child_ecpus_count)
-		update_sibling_cpumasks(parent, cs, &tmpmask);
-
 out:
 	/*
 	 * Make partition invalid & disable CS_CPU_EXCLUSIVE if an error
@@ -3500,6 +3509,7 @@ int __init cpuset_init(void)
 	BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL));
 	BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL));
 	BUG_ON(!zalloc_cpumask_var(&top_cpuset.subparts_cpus, GFP_KERNEL));
+	BUG_ON(!zalloc_cpumask_var(&cs_tmp_cpus, GFP_KERNEL));
 
 	cpumask_setall(top_cpuset.cpus_allowed);
 	nodes_setall(top_cpuset.mems_allowed);
-- 
2.31.1