On 2022/11/4 1:28, Paul E. McKenney wrote: > On Thu, Nov 03, 2022 at 08:35:15PM +0800, Zhen Lei wrote: >> In some extreme cases, such as the I/O pressure test, the CPU usage may >> be 100%, causing RCU stall. In this case, the printed information about >> current is not useful. Displays the number and usage of hard interrupts, >> soft interrupts, and context switches that are generated within half of >> the CPU stall timeout, can help us make a general judgment. In other >> cases, we can preliminarily determine whether an infinite loop occurs >> when local_irq, local_bh or preempt is disabled. >> >> For example: >> rcu: INFO: rcu_preempt self-detected stall on CPU >> rcu: 0-....: (1250 ticks this GP) <omitted> >> rcu: hardirqs softirqs csw/system >> rcu: number: 624 45 0 >> rcu: cputime: 69 1 2425 ==> 2500(ms) >> >> The example above shows that the number of hard and soft interrupts is >> small, there is zero context switching, and the system takes up a lot of >> time. We can quickly conclude that the current task is infinitely looped >> with preempt_disable(). >> >> The impact on system performance is negligible because snapshot is >> recorded only one time after 1/2 CPU stall timeout. >> >> This enhanced debugging information is suppressed by default and can be >> enabled by CONFIG_RCU_CPU_STALL_DEEP_DEBUG=y or >> rcupdate.rcu_cpu_stall_deep_debug=1. >> >> Signed-off-by: Zhen Lei <thunder.leizhen@xxxxxxxxxx> > > Much better, thank you! > > How about rcu_cpu_stall_cputime instead of rcu_cpu_stall_deep_debug and > RCU_CPU_STALL_CPUTIME instead of RCU_CPU_STALL_DEEP_DEBUG? Very well. I'll update and post v5 today. > > Just to forestall things like rcu_cpu_stall_deeper_debug, to say nothing > of rcu_cpu_stall_deepest_debug or rcu_cpu_stall_abyssal_debug. ;-) Ha ha! Right. > > Thanx, Paul > >> --- >> .../admin-guide/kernel-parameters.txt | 3 ++ >> kernel/rcu/Kconfig.debug | 10 ++++++ >> kernel/rcu/rcu.h | 1 + >> kernel/rcu/tree.c | 16 ++++++++++ >> kernel/rcu/tree.h | 17 ++++++++++ >> kernel/rcu/tree_stall.h | 31 +++++++++++++++++++ >> kernel/rcu/update.c | 2 ++ >> 7 files changed, 80 insertions(+) >> >> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt >> index a465d5242774af8..fc619e9e8d056da 100644 >> --- a/Documentation/admin-guide/kernel-parameters.txt >> +++ b/Documentation/admin-guide/kernel-parameters.txt >> @@ -5082,6 +5082,9 @@ >> rcupdate.rcu_cpu_stall_timeout to be used (after >> conversion from seconds to milliseconds). >> >> + rcupdate.rcu_cpu_stall_deep_debug= [KNL] >> + Provide additional rcu stall debug information. >> + >> rcupdate.rcu_expedited= [KNL] >> Use expedited grace-period primitives, for >> example, synchronize_rcu_expedited() instead >> diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug >> index 1b0c41d490f0588..7b18763c574cb13 100644 >> --- a/kernel/rcu/Kconfig.debug >> +++ b/kernel/rcu/Kconfig.debug >> @@ -95,6 +95,16 @@ config RCU_EXP_CPU_STALL_TIMEOUT >> says to use the RCU_CPU_STALL_TIMEOUT value converted from >> seconds to milliseconds. >> >> +config RCU_CPU_STALL_DEEP_DEBUG >> + bool "Provide additional rcu stall debug information" >> + depends on RCU_STALL_COMMON >> + default n >> + help >> + Statistics during the period from RCU_CPU_STALL_TIMEOUT/2 to >> + RCU_CPU_STALL_TIMEOUT, such as the number of (hard interrupts, soft >> + interrupts, task switches) and the cputime of (hard interrupts, soft >> + interrupts, kerenl tasks) are added to the rcu stall report. >> + >> config RCU_TRACE >> bool "Enable tracing for RCU" >> depends on DEBUG_KERNEL >> diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h >> index be5979da07f5985..2445b0c58348400 100644 >> --- a/kernel/rcu/rcu.h >> +++ b/kernel/rcu/rcu.h >> @@ -224,6 +224,7 @@ extern int rcu_cpu_stall_ftrace_dump; >> extern int rcu_cpu_stall_suppress; >> extern int rcu_cpu_stall_timeout; >> extern int rcu_exp_cpu_stall_timeout; >> +extern int rcu_cpu_stall_deep_debug; >> int rcu_jiffies_till_stall_check(void); >> int rcu_exp_jiffies_till_stall_check(void); >> >> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c >> index 93416afebd59c7a..a90d2e2e5bbd99e 100644 >> --- a/kernel/rcu/tree.c >> +++ b/kernel/rcu/tree.c >> @@ -931,6 +931,22 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) >> rdp->rcu_iw_gp_seq = rnp->gp_seq; >> irq_work_queue_on(&rdp->rcu_iw, rdp->cpu); >> } >> + >> + if (rcu_cpu_stall_deep_debug && rdp->snap_record.gp_seq != rdp->gp_seq) { >> + u64 *cpustat; >> + struct rcu_snap_record *rsrp; >> + >> + cpustat = kcpustat_cpu(rdp->cpu).cpustat; >> + >> + rsrp = &rdp->snap_record; >> + rsrp->cputime_irq = cpustat[CPUTIME_IRQ]; >> + rsrp->cputime_softirq = cpustat[CPUTIME_SOFTIRQ]; >> + rsrp->cputime_system = cpustat[CPUTIME_SYSTEM]; >> + rsrp->nr_hardirqs = kstat_cpu_irqs_sum(rdp->cpu); >> + rsrp->nr_softirqs = kstat_cpu_softirqs_sum(rdp->cpu); >> + rsrp->nr_csw = nr_context_switches_cpu(rdp->cpu); >> + rsrp->gp_seq = rdp->gp_seq; >> + } >> } >> >> return 0; >> diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h >> index d4a97e40ea9c3e2..44d49819a31f050 100644 >> --- a/kernel/rcu/tree.h >> +++ b/kernel/rcu/tree.h >> @@ -158,6 +158,22 @@ union rcu_noqs { >> u16 s; /* Set of bits, aggregate OR here. */ >> }; >> >> +/* >> + * Record the snapshot of the core stats at 1/2 rcu stall timeout. The member >> + * gp_seq is used to ensure that all members are updated only once during the >> + * second half period. The snapshot is taken only if this gp_seq is not equal >> + * to rdp->gp_seq. >> + */ >> +struct rcu_snap_record { >> + unsigned long gp_seq; /* Track rdp->gp_seq counter */ >> + u64 cputime_irq; /* Accumulated cputime of hard irqs */ >> + u64 cputime_softirq;/* Accumulated cputime of soft irqs */ >> + u64 cputime_system; /* Accumulated cputime of kernel tasks */ >> + unsigned long nr_hardirqs; /* Accumulated number of hard irqs */ >> + unsigned int nr_softirqs; /* Accumulated number of soft irqs */ >> + unsigned long long nr_csw; /* Accumulated number of task switches */ >> +}; >> + >> /* Per-CPU data for read-copy update. */ >> struct rcu_data { >> /* 1) quiescent-state and grace-period handling : */ >> @@ -262,6 +278,7 @@ struct rcu_data { >> short rcu_onl_gp_flags; /* ->gp_flags at last online. */ >> unsigned long last_fqs_resched; /* Time of last rcu_resched(). */ >> unsigned long last_sched_clock; /* Jiffies of last rcu_sched_clock_irq(). */ >> + struct rcu_snap_record snap_record; /* Snapshot of core stats at 1/2 rcu stall timeout */ >> >> int cpu; >> }; >> diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h >> index 5653560573e22d6..5b258bcdb25ba04 100644 >> --- a/kernel/rcu/tree_stall.h >> +++ b/kernel/rcu/tree_stall.h >> @@ -428,6 +428,35 @@ static bool rcu_is_rcuc_kthread_starving(struct rcu_data *rdp, unsigned long *jp >> return j > 2 * HZ; >> } >> >> +static void print_cpu_stat_info(int cpu) >> +{ >> + u64 *cpustat; >> + unsigned long half_timeout; >> + struct rcu_snap_record *rsrp; >> + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); >> + >> + if (!rcu_cpu_stall_deep_debug) >> + return; >> + >> + rsrp = &rdp->snap_record; >> + if (rsrp->gp_seq != rdp->gp_seq) >> + return; >> + >> + cpustat = kcpustat_cpu(cpu).cpustat; >> + half_timeout = rcu_jiffies_till_stall_check() / 2; >> + >> + pr_err(" hardirqs softirqs csw/system\n"); >> + pr_err(" number: %8ld %10d %12lld\n", >> + kstat_cpu_irqs_sum(cpu) - rsrp->nr_hardirqs, >> + kstat_cpu_softirqs_sum(cpu) - rsrp->nr_softirqs, >> + nr_context_switches_cpu(cpu) - rsrp->nr_csw); >> + pr_err("cputime: %8lld %10lld %12lld ==> %lld(ms)\n", >> + div_u64(cpustat[CPUTIME_IRQ] - rsrp->cputime_irq, NSEC_PER_MSEC), >> + div_u64(cpustat[CPUTIME_SOFTIRQ] - rsrp->cputime_softirq, NSEC_PER_MSEC), >> + div_u64(cpustat[CPUTIME_SYSTEM] - rsrp->cputime_system, NSEC_PER_MSEC), >> + jiffies64_to_msecs(half_timeout)); >> +} >> + >> /* >> * Print out diagnostic information for the specified stalled CPU. >> * >> @@ -484,6 +513,8 @@ static void print_cpu_stall_info(int cpu) >> data_race(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart, >> rcuc_starved ? buf : "", >> falsepositive ? " (false positive?)" : ""); >> + >> + print_cpu_stat_info(cpu); >> } >> >> /* Complain about starvation of grace-period kthread. */ >> diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c >> index 738842c4886b235..9c68c88a700f185 100644 >> --- a/kernel/rcu/update.c >> +++ b/kernel/rcu/update.c >> @@ -508,6 +508,8 @@ int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; >> module_param(rcu_cpu_stall_timeout, int, 0644); >> int rcu_exp_cpu_stall_timeout __read_mostly = CONFIG_RCU_EXP_CPU_STALL_TIMEOUT; >> module_param(rcu_exp_cpu_stall_timeout, int, 0644); >> +int rcu_cpu_stall_deep_debug __read_mostly = IS_ENABLED(CONFIG_RCU_CPU_STALL_DEEP_DEBUG); >> +module_param(rcu_cpu_stall_deep_debug, int, 0644); >> #endif /* #ifdef CONFIG_RCU_STALL_COMMON */ >> >> // Suppress boot-time RCU CPU stall warnings and rcutorture writer stall >> -- >> 2.25.1 >> > . > -- Regards, Zhen Lei