RE: [PATCH] rcu: Fix bind rcu related kthreads to housekeeping CPUs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



>Did the testing and checked the CPU affinities of RCU kthreads by running "ps -eo psr,comm| grep rcu" [1]:
>
>Without Zqiang's patch:
>    ...
>    0 rcu_preempt
>    0 rcuog/0
>    0 rcuop/0
>    0 rcuop/1
>    0 rcuog/2
>    0 rcuop/2
>    2 rcuop/3
>
>With Zqiang's patch:
>    ...
>    3 rcu_preempt   // on housekeeping CPU 3
>    2 rcuog/0            // on housekeeping CPU 2
>    3 rcuop/0            // on housekeeping CPU 3
>    3 rcuop/1            // on housekeeping CPU 3
>    0 rcuog/2            // on non-housekeeping CPU 0.  [2]
>    0 rcuop/2            // on non-housekeeping CPU 0.  [2]
>    2 rcuop/3
>
>[1] The 1st column of the output is for CPU IDs and the 2nd column is for thread names. 
>[2] Added debug messages into the two threads, and found that the two threads didn't run after all CPUs were online.
>     So if they run again after all CPUs are online, they will also be moved to housekeeping CPUs by Zqiang's patch. 
>
> From: Zqiang <qiang1.zhang@xxxxxxxxx>
> Sent: Wednesday, February 8, 2023 11:34 AM
> To: paulmck@xxxxxxxxxx; frederic@xxxxxxxxxx; joel@xxxxxxxxxxxxxxxxx
> Cc: rcu@xxxxxxxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx
> Subject: [PATCH] rcu: Fix bind rcu related kthreads to housekeeping CPUs
> 
> For kernels built with CONFIG_NO_HZ_FULL=y and CONFIG_RCU_NOCB_CPU=y,
> run the following tests:
> 
> runqemu kvm slirp nographic qemuparams="-m 1024 -smp 4"
> bootparams="console=ttyS0 isolcpus=0,1 nohz_full=0,1 rcu_nocbs=0,1"
> 
> root@qemux86-64:~# ps -ef | grep "rcu_preempt" | grep -v grep | awk '{print
> $2}'
> 15
> root@qemux86-64:~# ps -ef | grep "rcuop/0" | grep -v grep | awk '{print $2}'
> 17
> root@qemux86-64:~# taskset -p 15
> pid 15's current affinity mask: 1
> root@qemux86-64:~# taskset -p 17
> pid 17's current affinity mask: 1
> 
> The affinity of these rcu related kthreads is not set to housekeeping cpumask,
> even if called rcu_bind_gp_kthread() when the rcu related kthread starts.
> 
> set_cpus_allowed_ptr()
>  ->__set_cpus_allowed_ptr()
>    ->__set_cpus_allowed_ptr_locked
>      {
> 		bool kthread = p->flags & PF_KTHREAD;
> 		....
> 		if (kthread || is_migration_disabled(p))
> 			cpu_valid_mask = cpu_online_mask;
> 		....
> 		dest_cpu = cpumask_any_and_distribute(cpu_valid_mask, ctx-
> >new_mask);
> 		if (dest_cpu >= nr_cpu_ids) {
> 			ret = -EINVAL;
> 			goto out;
> 		}
> 		....
>      }
> 
> Due to these rcu related kthreads be created before bringup other CPUS, so
> when they running and set hosekeeping cpus affinity, found that only CPU0 is
> online at this time and CPU0 is set to no_hz_full CPU, the ctx->new_mask not
> contain CPU0 and this will cause dest_cpu in the above code snippet to be an
> illegal value and return directly, ultimately, these rcu related kthreads failed to
> bind housekeeping CPUS.
>
>s/so//
>s/hosekeeping/housekeeping/
>...
>
> This commit therefore rebind these rcu related kthreads to housekeeping CPUs
>
>s/rebind/rebinds/
>
>Could you tweak all the commit messages to fix the typos and the grammar errors? 😊
>Other than that, 


Thanks for testing,  I ignore the cpu hotplug scenario and exp kworker cpu affinity setting,
I will add Tested-by tags in next version.

Thanks
Zqiang


>
>    Tested-by: Qiuxu Zhuo <qiuxu.zhuo@xxxxxxxxx>
>
> after the kernel boot sequence ends, at this point all CPUs are online.
> 
> Signed-off-by: Zqiang <qiang1.zhang@xxxxxxxxx>
> ---
>  kernel/rcu/tasks.h       |  7 +++++--
>  kernel/rcu/tree.c        |  7 ++++++-
>  kernel/rcu/tree.h        |  1 -
>  kernel/rcu/tree_nocb.h   | 13 ++++++++++++-
>  kernel/rcu/tree_plugin.h |  9 ---------
>  5 files changed, 23 insertions(+), 14 deletions(-)
> 
> diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index
> baf7ec178155..8b3530cca291 100644
> --- a/kernel/rcu/tasks.h
> +++ b/kernel/rcu/tasks.h
> @@ -544,9 +544,8 @@ static void rcu_tasks_one_gp(struct rcu_tasks *rtp, bool
> midboot)  static int __noreturn rcu_tasks_kthread(void *arg)  {
>  	struct rcu_tasks *rtp = arg;
> +	bool rcu_setaffinity_setup = false;
> 
> -	/* Run on housekeeping CPUs by default.  Sysadm can move if desired.
> */
> -	housekeeping_affine(current, HK_TYPE_RCU);
>  	WRITE_ONCE(rtp->kthread_ptr, current); // Let GPs start!
> 
>  	/*
> @@ -556,6 +555,10 @@ static int __noreturn rcu_tasks_kthread(void *arg)
>  	 * This loop is terminated by the system going down.  ;-)
>  	 */
>  	for (;;) {
> +		if (!rcu_setaffinity_setup && rcu_inkernel_boot_has_ended()) {
> +			set_cpus_allowed_ptr(current,
> housekeeping_cpumask(HK_TYPE_RCU));
> +			rcu_setaffinity_setup = true;
> +		}
>  		// Wait for one grace period and invoke any callbacks
>  		// that are ready.
>  		rcu_tasks_one_gp(rtp, false);
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index
> ee27a03d7576..0ac47a773e13 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -1781,8 +1781,13 @@ static noinline void rcu_gp_cleanup(void)
>   */
>  static int __noreturn rcu_gp_kthread(void *unused)  {
> -	rcu_bind_gp_kthread();
> +	bool rcu_setaffinity_setup = false;
> +
>  	for (;;) {
> +		if (!rcu_setaffinity_setup && rcu_inkernel_boot_has_ended()) {
> +			set_cpus_allowed_ptr(current,
> housekeeping_cpumask(HK_TYPE_RCU));
> +			rcu_setaffinity_setup = true;
> +		}
> 
>  		/* Handle grace-period start. */
>  		for (;;) {
> diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index
> 192536916f9a..391e3fae4ff5 100644
> --- a/kernel/rcu/tree.h
> +++ b/kernel/rcu/tree.h
> @@ -495,7 +495,6 @@ do {
> 		\
>  #define rcu_nocb_lock_irqsave(rdp, flags) local_irq_save(flags)  #endif /* #else
> #ifdef CONFIG_RCU_NOCB_CPU */
> 
> -static void rcu_bind_gp_kthread(void);
>  static bool rcu_nohz_full_cpu(void);
> 
>  /* Forward declarations for tree_stall.h */ diff --git a/kernel/rcu/tree_nocb.h
> b/kernel/rcu/tree_nocb.h index f2280616f9d5..254d0f631d57 100644
> --- a/kernel/rcu/tree_nocb.h
> +++ b/kernel/rcu/tree_nocb.h
> @@ -894,8 +894,14 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
> static int rcu_nocb_gp_kthread(void *arg)  {
>  	struct rcu_data *rdp = arg;
> +	bool rcu_setaffinity_setup = false;
> 
>  	for (;;) {
> +		if (!rcu_setaffinity_setup && rcu_inkernel_boot_has_ended()) {
> +			set_cpus_allowed_ptr(current,
> housekeeping_cpumask(HK_TYPE_RCU));
> +			rcu_setaffinity_setup = true;
> +		}
> +
>  		WRITE_ONCE(rdp->nocb_gp_loops, rdp->nocb_gp_loops + 1);
>  		nocb_gp_wait(rdp);
>  		cond_resched_tasks_rcu_qs();
> @@ -1002,10 +1008,15 @@ static void nocb_cb_wait(struct rcu_data *rdp)
> static int rcu_nocb_cb_kthread(void *arg)  {
>  	struct rcu_data *rdp = arg;
> -
> +	bool rcu_setaffinity_setup = false;
>  	// Each pass through this loop does one callback batch, and,
>  	// if there are no more ready callbacks, waits for them.
>  	for (;;) {
> +		if (!rcu_setaffinity_setup && rcu_inkernel_boot_has_ended()) {
> +			set_cpus_allowed_ptr(current,
> housekeeping_cpumask(HK_TYPE_RCU));
> +			rcu_setaffinity_setup = true;
> +		}
> +
>  		nocb_cb_wait(rdp);
>  		cond_resched_tasks_rcu_qs();
>  	}
> diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index
> 7b0fe741a088..fdde71ebb83e 100644
> --- a/kernel/rcu/tree_plugin.h
> +++ b/kernel/rcu/tree_plugin.h
> @@ -1294,12 +1294,3 @@ static bool rcu_nohz_full_cpu(void)
>  	return false;
>  }
> 
> -/*
> - * Bind the RCU grace-period kthreads to the housekeeping CPU.
> - */
> -static void rcu_bind_gp_kthread(void)
> -{
> -	if (!tick_nohz_full_enabled())
> -		return;
> -	housekeeping_affine(current, HK_TYPE_RCU);
> -}
> --
> 2.25.1





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux