As Paul pointed out "The tick_dep_clear() is SMP-safe because it uses atomic operations, but the problem is that if there are multiple nohz_full CPUs going offline concurrently, the first CPU to invoke rcutree_dead_cpu() will turn the tick off. This might require an atomically manipulated counter to mediate the calls to rcutree_dead_cpu(). " This patch introduces a new member ->dying to rcu_node, which reflects the number of concurrent offlining cpu. TICK_DEP_BIT_RCU is set by the first entrance and cleared by the last. Note: now, tick_dep_set() is put under the rnp->lock, but since it takes no lock, no extra locking order is introduced. Suggested-by: "Paul E. McKenney" <paulmck@xxxxxxxxxx> Signed-off-by: Pingfan Liu <kernelfans@xxxxxxxxx> Cc: "Paul E. McKenney" <paulmck@xxxxxxxxxx> Cc: David Woodhouse <dwmw@xxxxxxxxxxxx> Cc: Frederic Weisbecker <frederic@xxxxxxxxxx> Cc: Neeraj Upadhyay <quic_neeraju@xxxxxxxxxxx> Cc: Josh Triplett <josh@xxxxxxxxxxxxxxxx> Cc: Steven Rostedt <rostedt@xxxxxxxxxxx> Cc: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx> Cc: Lai Jiangshan <jiangshanlai@xxxxxxxxx> Cc: Joel Fernandes <joel@xxxxxxxxxxxxxxxxx> Cc: "Jason A. Donenfeld" <Jason@xxxxxxxxx> To: rcu@xxxxxxxxxxxxxxx --- kernel/rcu/tree.c | 19 ++++++++++++++----- kernel/rcu/tree.h | 1 + 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 8a829b64f5b2..f8bd0fc5fd2f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2164,13 +2164,19 @@ int rcutree_dead_cpu(unsigned int cpu) { struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ + unsigned long flags; + u8 dying; if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) return 0; WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus - 1); - // Stop-machine done, so allow nohz_full to disable tick. - tick_dep_clear(TICK_DEP_BIT_RCU); + raw_spin_lock_irqsave_rcu_node(rnp, flags); + dying = --rnp->dying; + if (!dying) + // Stop-machine done, so allow nohz_full to disable tick. + tick_dep_clear(TICK_DEP_BIT_RCU); + raw_spin_lock_irqsave_rcu_node(rnp, flags); return 0; } @@ -4020,17 +4026,20 @@ int rcutree_offline_cpu(unsigned int cpu) unsigned long flags; struct rcu_data *rdp; struct rcu_node *rnp; + u8 dying; rdp = per_cpu_ptr(&rcu_data, cpu); rnp = rdp->mynode; raw_spin_lock_irqsave_rcu_node(rnp, flags); rnp->ffmask &= ~rdp->grpmask; + /* Let rcutree_dead_cpu() know a new offlining. */ + dying = rnp->dying++; + if (!dying) + // nohz_full CPUs need the tick for stop-machine to work quickly + tick_dep_set(TICK_DEP_BIT_RCU); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); rcutree_affinity_setting(cpu, cpu); - - // nohz_full CPUs need the tick for stop-machine to work quickly - tick_dep_set(TICK_DEP_BIT_RCU); return 0; } diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index d4a97e40ea9c..b508a12ac953 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -81,6 +81,7 @@ struct rcu_node { int grphi; /* highest-numbered CPU here. */ u8 grpnum; /* group number for next level up. */ u8 level; /* root is at level 0. */ + u8 dying; /* num of concurrent rdp offlining */ bool wait_blkd_tasks;/* Necessary to wait for blocked tasks to */ /* exit RCU read-side critical sections */ /* before propagating offline up the */ -- 2.31.1