On Wed, Apr 13, 2022 at 03:44:11PM +0800, Zqiang wrote: > If the rcutree.use_softirq is configured, when RCU Stall event > happened, dump status of all rcuc kthreads who due to starvation > prevented grace period ends on CPUs that not report quiescent state. > > Signed-off-by: Zqiang <qiang1.zhang@xxxxxxxxx> >This could be a good improvement! But a few questions and comments below. > > Thanx, Paul > --- > kernel/rcu/tree_stall.h | 8 +++++--- > 1 file changed, 5 insertions(+), 3 deletions(-) > > diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index > d7956c03edbd..e6ecc32cfe23 100644 > --- a/kernel/rcu/tree_stall.h > +++ b/kernel/rcu/tree_stall.h > @@ -348,6 +348,7 @@ static int rcu_print_task_stall(struct rcu_node > *rnp, unsigned long flags) } #endif /* #else #ifdef > CONFIG_PREEMPT_RCU */ > > +static void rcuc_kthread_dump(struct rcu_data *rdp); > /* > * Dump stacks of all tasks running on stalled CPUs. First try using > * NMIs, but fall back to manual remote stack tracing on > architectures @@ -368,6 +369,7 @@ static void rcu_dump_cpu_stacks(void) > pr_err("Offline CPU %d blocking current GP.\n", cpu); > else if (!trigger_single_cpu_backtrace(cpu)) > dump_cpu_task(cpu); > + rcuc_kthread_dump(per_cpu_ptr(&rcu_data, cpu)); >Was this addition inspired by the chasing of a bug? If so, please let me know exactly what information was (or would have been) the most helpful. > >For example, the starvation information might be more compactly represented in the information printed by print_cpu_stall_info(). >Unless the stack trace was quite valuable, it might be best to omit it. >After all, RCU CPU stall warnings are already infamously heavy on the text output. Is it better to modify it like this? diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index d7956c03edbd..37444ff00787 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -465,27 +465,38 @@ static void print_cpu_stall_info(int cpu) falsepositive ? " (false positive?)" : ""); } -static void rcuc_kthread_dump(struct rcu_data *rdp) +static void rcuc_kthread_dump(void) { int cpu; unsigned long j; + unsigned long flags + struct rcu_node *rnp; struct task_struct *rcuc; + struct rcu_data *rdp; - rcuc = rdp->rcu_cpu_kthread_task; - if (!rcuc) - return; + rcu_for_each_leaf_node(rnp) { + raw_spin_lock_irqsave_rcu_node(rnp, flags); + for_each_leaf_node_possible_cpu(rnp, cpu) + if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) { + rdp = per_cpu_ptr(&rcu_data, cpu); + rcuc = rdp->rcu_cpu_kthread_task; + if (!rcuc) + return; - cpu = task_cpu(rcuc); - if (cpu_is_offline(cpu) || idle_cpu(cpu)) - return; + cpu = task_cpu(rcuc); + if (cpu_is_offline(cpu) || idle_cpu(cpu)) + return; - if (!rcu_is_rcuc_kthread_starving(rdp, &j)) - return; + if (!rcu_is_rcuc_kthread_starving(rdp, &j)) + return; - pr_err("%s kthread starved for %ld jiffies\n", rcuc->comm, j); - sched_show_task(rcuc); - if (!trigger_single_cpu_backtrace(cpu)) - dump_cpu_task(cpu); + pr_err("%s kthread starved for %ld jiffies\n", rcuc->comm, j); + sched_show_task(rcuc); + if (!trigger_single_cpu_backtrace(cpu)) + dump_cpu_task(cpu); + } + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + } } /* Complain about starvation of grace-period kthread. */ @@ -595,6 +606,9 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) smp_processor_id(), (long)(jiffies - gps), (long)rcu_seq_current(&rcu_state.gp_seq), totqlen, rcu_state.n_online_cpus); if (ndetected) { + if (!use_softirq) + rcuc_kthread_dump(); + rcu_dump_cpu_stacks(); /* Complain about tasks blocking the grace period. */ @@ -660,7 +674,7 @@ static void print_cpu_stall(unsigned long gps) rcu_check_gp_kthread_starvation(); if (!use_softirq) - rcuc_kthread_dump(rdp); + rcuc_kthread_dump(); rcu_dump_cpu_stacks(); Thanks Zqiang > } > raw_spin_unlock_irqrestore_rcu_node(rnp, flags); > } > @@ -471,6 +473,9 @@ static void rcuc_kthread_dump(struct rcu_data *rdp) > unsigned long j; > struct task_struct *rcuc; > > + if (use_softirq) > + return; > + > rcuc = rdp->rcu_cpu_kthread_task; > if (!rcuc) >The checks for use_softirq on the one hand and the checks for non-NULL >rdp->rcu_cpu_kthread_task are a bit "interesting". Not your fault or >problem, of course! Yes the ' if (!use_softirq)' is redundant, the if rcu_cpu_kthread_task Is completely enough > > return; > @@ -659,9 +664,6 @@ static void print_cpu_stall(unsigned long gps) > rcu_check_gp_kthread_expired_fqs_timer(); > rcu_check_gp_kthread_starvation(); > > - if (!use_softirq) > >In particular, I am wondering if this "if" is redundant. Yes the ' if (!use_softirq)' is also redundant > > - rcuc_kthread_dump(rdp); > - > rcu_dump_cpu_stacks(); > > raw_spin_lock_irqsave_rcu_node(rnp, flags); > -- > 2.25.1 >