[tip:core/rcu] rcu: Compute jiffies_till_sched_qs from other kernel parameters

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Commit-ID:  c06aed0e31008a248c1841f1b7fc80e9ee242a31
Gitweb:     https://git.kernel.org/tip/c06aed0e31008a248c1841f1b7fc80e9ee242a31
Author:     Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
AuthorDate: Wed, 25 Jul 2018 11:25:23 -0700
Committer:  Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
CommitDate: Thu, 30 Aug 2018 16:03:43 -0700

rcu: Compute jiffies_till_sched_qs from other kernel parameters

The jiffies_till_sched_qs value used to determine how old a grace period
must be before RCU enlists the help of the scheduler to force a quiescent
state on the holdout CPU.  Currently, this defaults to HZ/10 regardless of
system size and may be set only at boot time.  This can be a problem for
very large systems, because if the values of the jiffies_till_first_fqs
and jiffies_till_next_fqs kernel parameters are left at their defaults,
they are calculated to increase as the number of CPUs actually configured
on the system increases.  Thus, on a sufficiently large system, RCU would
enlist the help of the scheduler before the grace-period kthread had a
chance to scan for idle CPUs, which wastes CPU time.

This commit therefore allows jiffies_till_sched_qs to be set, if desired,
but if left as default, computes is as jiffies_till_first_fqs plus twice
jiffies_till_next_fqs, thus allowing three force-quiescent-state scans
for idle CPUs.  This scales with the number of CPUs, providing sensible
default values.

Signed-off-by: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
---
 Documentation/admin-guide/kernel-parameters.txt |  9 +++-
 kernel/rcu/tree.c                               | 63 ++++++++++++++++++-------
 kernel/rcu/tree_plugin.h                        |  2 +
 3 files changed, 57 insertions(+), 17 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index aa96e669bcb8..6153fb62abe1 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3595,7 +3595,14 @@
 			Set required age in jiffies for a
 			given grace period before RCU starts
 			soliciting quiescent-state help from
-			rcu_note_context_switch().
+			rcu_note_context_switch().  If not specified, the
+			kernel will calculate a value based on the most
+			recent settings of rcutree.jiffies_till_first_fqs
+			and rcutree.jiffies_till_next_fqs.
+			This calculated value may be viewed in
+			rcutree.jiffies_to_sched_qs.  Any attempt to
+			set rcutree.jiffies_to_sched_qs will be
+			cheerfully overwritten.
 
 	rcutree.jiffies_till_first_fqs= [KNL]
 			Set delay from grace-period initialization to
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index bc42c600027c..6bd0951a5f3a 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -396,13 +396,47 @@ static ulong jiffies_till_first_fqs = ULONG_MAX;
 static ulong jiffies_till_next_fqs = ULONG_MAX;
 static bool rcu_kick_kthreads;
 
+/*
+ * How long the grace period must be before we start recruiting
+ * quiescent-state help from rcu_note_context_switch().
+ */
+static ulong jiffies_till_sched_qs = ULONG_MAX;
+module_param(jiffies_till_sched_qs, ulong, 0444);
+static ulong jiffies_to_sched_qs; /* Adjusted version of above if not default */
+module_param(jiffies_to_sched_qs, ulong, 0444); /* Display only! */
+
+/*
+ * Make sure that we give the grace-period kthread time to detect any
+ * idle CPUs before taking active measures to force quiescent states.
+ * However, don't go below 100 milliseconds, adjusted upwards for really
+ * large systems.
+ */
+static void adjust_jiffies_till_sched_qs(void)
+{
+	unsigned long j;
+
+	/* If jiffies_till_sched_qs was specified, respect the request. */
+	if (jiffies_till_sched_qs != ULONG_MAX) {
+		WRITE_ONCE(jiffies_to_sched_qs, jiffies_till_sched_qs);
+		return;
+	}
+	j = READ_ONCE(jiffies_till_first_fqs) +
+		      2 * READ_ONCE(jiffies_till_next_fqs);
+	if (j < HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV)
+		j = HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV;
+	pr_info("RCU calculated value of scheduler-enlistment delay is %ld jiffies.\n", j);
+	WRITE_ONCE(jiffies_to_sched_qs, j);
+}
+
 static int param_set_first_fqs_jiffies(const char *val, const struct kernel_param *kp)
 {
 	ulong j;
 	int ret = kstrtoul(val, 0, &j);
 
-	if (!ret)
+	if (!ret) {
 		WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : j);
+		adjust_jiffies_till_sched_qs();
+	}
 	return ret;
 }
 
@@ -411,8 +445,10 @@ static int param_set_next_fqs_jiffies(const char *val, const struct kernel_param
 	ulong j;
 	int ret = kstrtoul(val, 0, &j);
 
-	if (!ret)
+	if (!ret) {
 		WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : (j ?: 1));
+		adjust_jiffies_till_sched_qs();
+	}
 	return ret;
 }
 
@@ -430,13 +466,6 @@ module_param_cb(jiffies_till_first_fqs, &first_fqs_jiffies_ops, &jiffies_till_fi
 module_param_cb(jiffies_till_next_fqs, &next_fqs_jiffies_ops, &jiffies_till_next_fqs, 0644);
 module_param(rcu_kick_kthreads, bool, 0644);
 
-/*
- * How long the grace period must be before we start recruiting
- * quiescent-state help from rcu_note_context_switch().
- */
-static ulong jiffies_till_sched_qs = HZ / 10;
-module_param(jiffies_till_sched_qs, ulong, 0444);
-
 static void force_qs_rnp(int (*f)(struct rcu_data *rdp));
 static void force_quiescent_state(void);
 static int rcu_pending(void);
@@ -1041,16 +1070,16 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 
 	/*
 	 * A CPU running for an extended time within the kernel can
-	 * delay RCU grace periods: (1) At age jiffies_till_sched_qs,
-	 * set .rcu_urgent_qs, (2) At age 2*jiffies_till_sched_qs, set
+	 * delay RCU grace periods: (1) At age jiffies_to_sched_qs,
+	 * set .rcu_urgent_qs, (2) At age 2*jiffies_to_sched_qs, set
 	 * both .rcu_need_heavy_qs and .rcu_urgent_qs.  Note that the
 	 * unsynchronized assignments to the per-CPU rcu_need_heavy_qs
 	 * variable are safe because the assignments are repeated if this
 	 * CPU failed to pass through a quiescent state.  This code
-	 * also checks .jiffies_resched in case jiffies_till_sched_qs
+	 * also checks .jiffies_resched in case jiffies_to_sched_qs
 	 * is set way high.
 	 */
-	jtsq = jiffies_till_sched_qs;
+	jtsq = READ_ONCE(jiffies_to_sched_qs);
 	ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu);
 	rnhqp = &per_cpu(rcu_dynticks.rcu_need_heavy_qs, rdp->cpu);
 	if (!READ_ONCE(*rnhqp) &&
@@ -1236,7 +1265,7 @@ static void print_other_cpu_stall(unsigned long gp_seq)
 			gpa = READ_ONCE(rcu_state.gp_activity);
 			pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n",
 			       rcu_state.name, j - gpa, j, gpa,
-			       jiffies_till_next_fqs,
+			       READ_ONCE(jiffies_till_next_fqs),
 			       rcu_get_root()->qsmask);
 			/* In this case, the current CPU might be at fault. */
 			sched_show_task(current);
@@ -1874,7 +1903,7 @@ static void rcu_gp_fqs_loop(void)
 	struct rcu_node *rnp = rcu_get_root();
 
 	first_gp_fqs = true;
-	j = jiffies_till_first_fqs;
+	j = READ_ONCE(jiffies_till_first_fqs);
 	ret = 0;
 	for (;;) {
 		if (!ret) {
@@ -1908,7 +1937,7 @@ static void rcu_gp_fqs_loop(void)
 			cond_resched_tasks_rcu_qs();
 			WRITE_ONCE(rcu_state.gp_activity, jiffies);
 			ret = 0; /* Force full wait till next FQS. */
-			j = jiffies_till_next_fqs;
+			j = READ_ONCE(jiffies_till_next_fqs);
 		} else {
 			/* Deal with stray signal. */
 			cond_resched_tasks_rcu_qs();
@@ -3579,6 +3608,8 @@ static void __init rcu_init_geometry(void)
 		jiffies_till_first_fqs = d;
 	if (jiffies_till_next_fqs == ULONG_MAX)
 		jiffies_till_next_fqs = d;
+	if (jiffies_till_sched_qs == ULONG_MAX)
+		adjust_jiffies_till_sched_qs();
 
 	/* If the compile-time values are accurate, just leave. */
 	if (rcu_fanout_leaf == RCU_FANOUT_LEAF &&
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 726d57708849..7ec366268e2e 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -105,6 +105,8 @@ static void __init rcu_bootup_announce_oddness(void)
 		pr_info("\tBoot-time adjustment of first FQS scan delay to %ld jiffies.\n", jiffies_till_first_fqs);
 	if (jiffies_till_next_fqs != ULONG_MAX)
 		pr_info("\tBoot-time adjustment of subsequent FQS scan delay to %ld jiffies.\n", jiffies_till_next_fqs);
+	if (jiffies_till_sched_qs != ULONG_MAX)
+		pr_info("\tBoot-time adjustment of scheduler-enlistment delay to %ld jiffies.\n", jiffies_till_sched_qs);
 	if (rcu_kick_kthreads)
 		pr_info("\tKick kthreads if too-long grace period.\n");
 	if (IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD))



[Index of Archives]     [Linux Stable Commits]     [Linux Stable Kernel]     [Linux Kernel]     [Linux USB Devel]     [Linux Video &Media]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux