On Wed, Jan 04, 2023 at 07:47:22PM +0800, Zqiang wrote: > @@ -586,39 +586,19 @@ static bool synchronize_rcu_expedited_wait_once(long tlimit) > static void synchronize_rcu_expedited_wait(void) > { > int cpu; > - unsigned long j; > unsigned long jiffies_stall; > unsigned long jiffies_start; > unsigned long mask; > int ndetected; > - struct rcu_data *rdp; > struct rcu_node *rnp; > struct rcu_node *rnp_root = rcu_get_root(); > - unsigned long flags; > > trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("startwait")); > jiffies_stall = rcu_exp_jiffies_till_stall_check(); > jiffies_start = jiffies; > - if (tick_nohz_full_enabled() && rcu_inkernel_boot_has_ended()) { > - if (synchronize_rcu_expedited_wait_once(1)) > - return; > - rcu_for_each_leaf_node(rnp) { > - raw_spin_lock_irqsave_rcu_node(rnp, flags); > - mask = READ_ONCE(rnp->expmask); > - for_each_leaf_node_cpu_mask(rnp, cpu, mask) { > - rdp = per_cpu_ptr(&rcu_data, cpu); > - if (rdp->rcu_forced_tick_exp) > - continue; > - rdp->rcu_forced_tick_exp = true; > - if (cpu_online(cpu)) > - tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP); > - } > - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); > - } > - j = READ_ONCE(jiffies_till_first_fqs); > - if (synchronize_rcu_expedited_wait_once(j + HZ)) > - return; > - } > + > + if (synchronize_rcu_expedited_wait_once(1)) > + return; Do we still need this? This is immediately followed by the same call with a higher time limit. > > for (;;) { > if (synchronize_rcu_expedited_wait_once(jiffies_stall)) > @@ -653,8 +633,6 @@ static void synchronize_rcu_expedited_wait(void) > if (ndetected) { > pr_err("blocking rcu_node structures (internal RCU debug):"); > rcu_for_each_node_breadth_first(rnp) { > - if (rnp == rnp_root) > - continue; /* printed unconditionally */ Is that chunk from another patch? > if (sync_rcu_exp_done_unlocked(rnp)) > continue; > pr_cont(" l=%u:%d-%d:%#lx/%c", > @@ -729,6 +707,17 @@ static void rcu_exp_sel_wait_wake(unsigned long s) > > #ifdef CONFIG_PREEMPT_RCU > > +static void rcu_exp_set_tickdep(int cpu) > +{ > + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); This is only used locally so you can use this_cpu_ptr() or even better you can pass the rdp from rcu_exp_handler() > + > + if (tick_nohz_full_enabled() && rcu_inkernel_boot_has_ended() && You can directly test tick_nohz_full_cpu(smp_processor_id()), this has the advantage to only evaluate smp_processor_id() if nohz_full is current running (using static key). It's a micro optimization, but still... > + cpu_online(cpu)) { Right good point, this could happen if rcu_exp_handler() is called from smpcfd_dying_cpu(). In which case we can't requeue an IPI or an IRQ work. Which is not a problem since rcu_report_dead() -> rcu_preempt_deferred_qs() then does the QS report. Can you add a comment about that? > + rdp->rcu_forced_tick_exp = true; > + tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP); > + } > +} > + > /* > * Remote handler for smp_call_function_single(). If there is an > * RCU read-side critical section in effect, request that the > @@ -743,6 +732,7 @@ static void rcu_exp_handler(void *unused) > struct rcu_data *rdp = this_cpu_ptr(&rcu_data); > struct rcu_node *rnp = rdp->mynode; > struct task_struct *t = current; > + int cpu = raw_smp_processor_id(); And then no need to fetch smp_processor_id() here. Thanks.