Re: [PATCH rcu 2/3] rcu: Stop stall warning from dumping stacks if grace period ends

Joel Fernandes <joel@xxxxxxxxxxxxxxxxx> · Tue, 15 Oct 2024 14:48:07 -0400

On Wed, Oct 09, 2024 at 11:05:08AM -0700, Paul E. McKenney wrote:
> Currently, once an RCU CPU stall warning decides to dump the stalling
> CPUs' stacks, the rcu_dump_cpu_stacks() function persists until it
> has gone through the full list.  Unfortunately, if the stalled grace
> periods ends midway through, this function will be dumping stacks of
> innocent-bystander CPUs that happen to be blocking not the old grace
> period, but instead the new one.  This can cause serious confusion.
> 
> This commit therefore stops dumping stacks if and when the stalled grace
> period ends.
> 
> Signed-off-by: Paul E. McKenney <paulmck@xxxxxxxxxx>
> ---
>  kernel/rcu/tree_stall.h | 17 +++++++++++------
>  1 file changed, 11 insertions(+), 6 deletions(-)
> 
> diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
> index d7cdd535e50b1..9d79133377ff6 100644
> --- a/kernel/rcu/tree_stall.h
> +++ b/kernel/rcu/tree_stall.h
> @@ -335,13 +335,17 @@ static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags)
>   * that don't support NMI-based stack dumps.  The NMI-triggered stack
>   * traces are more accurate because they are printed by the target CPU.
>   */
> -static void rcu_dump_cpu_stacks(void)
> +static void rcu_dump_cpu_stacks(unsigned long gp_seq)
>  {
>  	int cpu;
>  	unsigned long flags;
>  	struct rcu_node *rnp;
>  
>  	rcu_for_each_leaf_node(rnp) {
> +		if (gp_seq != rcu_state.gp_seq) {
> +			pr_err("INFO: Stall ended during stack backtracing.\n");
> +			return;
> +		}

small nit, this also needs data_race() like you did in next patch? Although
you did delete this code in the next patch.

thanks,

 - Joel

>  		printk_deferred_enter();
>  		raw_spin_lock_irqsave_rcu_node(rnp, flags);
>  		for_each_leaf_node_possible_cpu(rnp, cpu)
> @@ -608,7 +612,7 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
>  	       (long)rcu_seq_current(&rcu_state.gp_seq), totqlen,
>  	       data_race(rcu_state.n_online_cpus)); // Diagnostic read
>  	if (ndetected) {
> -		rcu_dump_cpu_stacks();
> +		rcu_dump_cpu_stacks(gp_seq);
>  
>  		/* Complain about tasks blocking the grace period. */
>  		rcu_for_each_leaf_node(rnp)
> @@ -640,7 +644,7 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
>  	rcu_force_quiescent_state();  /* Kick them all. */
>  }
>  
> -static void print_cpu_stall(unsigned long gps)
> +static void print_cpu_stall(unsigned long gp_seq, unsigned long gps)
>  {
>  	int cpu;
>  	unsigned long flags;
> @@ -677,7 +681,7 @@ static void print_cpu_stall(unsigned long gps)
>  	rcu_check_gp_kthread_expired_fqs_timer();
>  	rcu_check_gp_kthread_starvation();
>  
> -	rcu_dump_cpu_stacks();
> +	rcu_dump_cpu_stacks(gp_seq);
>  
>  	raw_spin_lock_irqsave_rcu_node(rnp, flags);
>  	/* Rewrite if needed in case of slow consoles. */
> @@ -759,7 +763,8 @@ static void check_cpu_stall(struct rcu_data *rdp)
>  	gs2 = READ_ONCE(rcu_state.gp_seq);
>  	if (gs1 != gs2 ||
>  	    ULONG_CMP_LT(j, js) ||
> -	    ULONG_CMP_GE(gps, js))
> +	    ULONG_CMP_GE(gps, js) ||
> +	    !rcu_seq_state(gs2))
>  		return; /* No stall or GP completed since entering function. */
>  	rnp = rdp->mynode;
>  	jn = jiffies + ULONG_MAX / 2;
> @@ -780,7 +785,7 @@ static void check_cpu_stall(struct rcu_data *rdp)
>  			pr_err("INFO: %s detected stall, but suppressed full report due to a stuck CSD-lock.\n", rcu_state.name);
>  		} else if (self_detected) {
>  			/* We haven't checked in, so go dump stack. */
> -			print_cpu_stall(gps);
> +			print_cpu_stall(gs2, gps);
>  		} else {
>  			/* They had a few time units to dump stack, so complain. */
>  			print_other_cpu_stall(gs2, gps);
> -- 
> 2.40.1
> 
>