On Wed, Apr 13, 2022 at 03:44:11PM +0800, Zqiang wrote: > If the rcutree.use_softirq is configured, when RCU Stall event > happened, dump status of all rcuc kthreads who due to starvation > prevented grace period ends on CPUs that not report quiescent state. > > Signed-off-by: Zqiang <qiang1.zhang@xxxxxxxxx> >This could be a good improvement! But a few questions and comments below. > > Thanx, Paul > --- > kernel/rcu/tree_stall.h | 8 +++++--- > 1 file changed, 5 insertions(+), 3 deletions(-) > > diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index > d7956c03edbd..e6ecc32cfe23 100644 > --- a/kernel/rcu/tree_stall.h > +++ b/kernel/rcu/tree_stall.h > @@ -348,6 +348,7 @@ static int rcu_print_task_stall(struct rcu_node > *rnp, unsigned long flags) } #endif /* #else #ifdef > CONFIG_PREEMPT_RCU */ > > +static void rcuc_kthread_dump(struct rcu_data *rdp); > /* > * Dump stacks of all tasks running on stalled CPUs. First try using > * NMIs, but fall back to manual remote stack tracing on > architectures @@ -368,6 +369,7 @@ static void rcu_dump_cpu_stacks(void) > pr_err("Offline CPU %d blocking current GP.\n", cpu); > else if (!trigger_single_cpu_backtrace(cpu)) > dump_cpu_task(cpu); > + rcuc_kthread_dump(per_cpu_ptr(&rcu_data, cpu)); >Was this addition inspired by the chasing of a bug? If so, please let me know exactly what information was (or would have been) the most helpful. > >For example, the starvation information might be more compactly represented in the information printed by print_cpu_stall_info(). >Unless the stack trace was quite valuable, it might be best to omit it. >After all, RCU CPU stall warnings are already infamously heavy on the text output. >Is it better to modify it like this? I'm so sorry, the following modifications are problematic, I'll resend v2 Thanks Zqiang >diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index d7956c03edbd..37444ff00787 100644 >--- a/kernel/rcu/tree_stall.h >+++ b/kernel/rcu/tree_stall.h >@@ -465,27 +465,38 @@ static void print_cpu_stall_info(int cpu) > falsepositive ? " (false positive?)" : ""); } > >-static void rcuc_kthread_dump(struct rcu_data *rdp) >+static void rcuc_kthread_dump(void) > { > int cpu; > unsigned long j; >+ unsigned long flags >+ struct rcu_node *rnp; > struct task_struct *rcuc; >+ struct rcu_data *rdp; > >- rcuc = rdp->rcu_cpu_kthread_task; >- if (!rcuc) >- return; >+ rcu_for_each_leaf_node(rnp) { >+ raw_spin_lock_irqsave_rcu_node(rnp, flags); >+ for_each_leaf_node_possible_cpu(rnp, cpu) >+ if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) { >+ rdp = per_cpu_ptr(&rcu_data, cpu); >+ rcuc = rdp->rcu_cpu_kthread_task; >+ if (!rcuc) >+ return; > >- cpu = task_cpu(rcuc); >- if (cpu_is_offline(cpu) || idle_cpu(cpu)) >- return; >+ cpu = task_cpu(rcuc); >+ if (cpu_is_offline(cpu) || idle_cpu(cpu)) >+ return; > >- if (!rcu_is_rcuc_kthread_starving(rdp, &j)) >- return; >+ if (!rcu_is_rcuc_kthread_starving(rdp, &j)) >+ return; > >- pr_err("%s kthread starved for %ld jiffies\n", rcuc->comm, j); >- sched_show_task(rcuc); >- if (!trigger_single_cpu_backtrace(cpu)) >- dump_cpu_task(cpu); >+ pr_err("%s kthread starved for %ld jiffies\n", rcuc->comm, j); >+ sched_show_task(rcuc); >+ if (!trigger_single_cpu_backtrace(cpu)) >+ dump_cpu_task(cpu); >+ } >+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags); >+ } > } > >/* Complain about starvation of grace-period kthread. */ @@ -595,6 +606,9 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) > smp_processor_id(), (long)(jiffies - gps), > (long)rcu_seq_current(&rcu_state.gp_seq), totqlen, rcu_state.n_online_cpus); > if (ndetected) { >+ if (!use_softirq) >+ rcuc_kthread_dump(); >+ > rcu_dump_cpu_stacks(); > > /* Complain about tasks blocking the grace period. */ @@ -660,7 +674,7 @@ static void print_cpu_stall(unsigned long gps) > rcu_check_gp_kthread_starvation(); > > if (!use_softirq) >- rcuc_kthread_dump(rdp); >+ rcuc_kthread_dump(); > > rcu_dump_cpu_stacks(); > >Thanks >Zqiang > } > raw_spin_unlock_irqrestore_rcu_node(rnp, flags); > } > @@ -471,6 +473,9 @@ static void rcuc_kthread_dump(struct rcu_data *rdp) > unsigned long j; > struct task_struct *rcuc; > > + if (use_softirq) > + return; > + > rcuc = rdp->rcu_cpu_kthread_task; > if (!rcuc) >The checks for use_softirq on the one hand and the checks for non-NULL >rdp->rcu_cpu_kthread_task are a bit "interesting". Not your fault or >problem, of course! >Yes the ' if (!use_softirq)' is redundant, the if rcu_cpu_kthread_task Is completely enough > > return; > @@ -659,9 +664,6 @@ static void print_cpu_stall(unsigned long gps) > rcu_check_gp_kthread_expired_fqs_timer(); > rcu_check_gp_kthread_starvation(); > > - if (!use_softirq) > >In particular, I am wondering if this "if" is redundant. >Yes the ' if (!use_softirq)' is also redundant > > - rcuc_kthread_dump(rdp); > - > rcu_dump_cpu_stacks(); > > raw_spin_lock_irqsave_rcu_node(rnp, flags); > -- > 2.25.1 >