Re: get_state_synchronize_rcu() at boot time

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Nov 25, 2022 at 10:45:09AM -0800, Paul E. McKenney wrote:
> On Fri, Nov 25, 2022 at 07:33:46PM +0100, Uladzislau Rezki wrote:
> > > Hello, Uladzislau,
> > > 
> > > Here is where I placed that probe code and the corresponding output.
> > > Where should I be placing it instead?
> > > 
> > > 						Thanx, Paul
> > > 
> > > ------------------------------------------------------------------------
> > > 
> > > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > > index 219b4b516f387..308cbc5e68a64 100644
> > > --- a/kernel/rcu/tree.c
> > > +++ b/kernel/rcu/tree.c
> > > @@ -4546,8 +4546,17 @@ void rcu_scheduler_starting(void)
> > >  	local_irq_restore(flags);
> > >  
> > >  	// Switch out of early boot mode.
> > > +	pr_err("-> [%d] Invoke synchronize_rcu(): seq number: %lu\n", current->pid, get_state_synchronize_rcu());
> > > +	synchronize_rcu();
> > > +	pr_err("-> [%d] Done. A seq number: %lu\n", current->pid, get_state_synchronize_rcu());
> > >  	rcu_scheduler_active = RCU_SCHEDULER_INIT;
> > > +	pr_err("-> [%d] Invoke synchronize_rcu(): seq number: %lu\n", current->pid, get_state_synchronize_rcu());
> > > +	synchronize_rcu();
> > > +	pr_err("-> [%d] Done. A seq number: %lu\n", current->pid, get_state_synchronize_rcu());
> > >  	rcu_test_sync_prims();
> > > +	pr_err("-> [%d] Invoke synchronize_rcu(): seq number: %lu\n", current->pid, get_state_synchronize_rcu());
> > > +	synchronize_rcu();
> > > +	pr_err("-> [%d] Done. A seq number: %lu\n", current->pid, get_state_synchronize_rcu());
> > >  }
> > >  
> > >  /*
> > > @@ -4817,6 +4826,9 @@ void __init rcu_init(void)
> > >  
> > >  	kfree_rcu_batch_init();
> > >  	rcu_bootup_announce();
> > > +	pr_err("-> [%d] Invoke synchronize_rcu(): seq number: %lu\n", current->pid, get_state_synchronize_rcu());
> > > +	synchronize_rcu();
> > > +	pr_err("-> [%d] Done. A seq number: %lu\n", current->pid, get_state_synchronize_rcu());
> > >  	sanitize_kthread_prio();
> > >  	rcu_init_geometry();
> > >  	rcu_init_one();
> > > diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
> > > index 7b1e183b0eb23..2993be46f81d5 100644
> > > --- a/kernel/rcu/update.c
> > > +++ b/kernel/rcu/update.c
> > > @@ -557,9 +557,12 @@ struct early_boot_kfree_rcu {
> > >  static void early_boot_test_call_rcu(void)
> > >  {
> > >  	static struct rcu_head head;
> > > +	int idx;
> > >  	static struct rcu_head shead;
> > >  	struct early_boot_kfree_rcu *rhp;
> > >  
> > > +	idx = srcu_down_read(&early_srcu);
> > > +	srcu_up_read(&early_srcu, idx);
> > >  	call_rcu(&head, test_callback);
> > >  	early_srcu_cookie = start_poll_synchronize_srcu(&early_srcu);
> > >  	call_srcu(&early_srcu, &shead, test_callback);
> > > 
> > > ------------------------------------------------------------------------
> > > 
> > > [    0.016333] rcu: -> [0] Invoke synchronize_rcu(): seq number: 12
> > > [    0.016334] rcu: -> [0] Done. A seq number: 16
> > > [    0.114209] rcu: -> [0] Invoke synchronize_rcu(): seq number: 32
> > > [    0.114662] rcu: -> [0] Done. A seq number: 36
> > > [    0.115191] rcu: -> [0] Invoke synchronize_rcu(): seq number: 36
> > > [    0.115684] rcu: -> [0] Done. A seq number: 40
> > > [    0.116046] rcu: -> [0] Invoke synchronize_rcu(): seq number: 48
> > > [    0.116202] rcu: -> [0] Done. A seq number: 52
> > 
> > Just for a backup:
> > 
> > urezki@pc638:~/data/raid0/coding/linux-rcu.git$ git diff
> > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > index 76973d716921..dea7dcc6973b 100644
> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> > @@ -3107,6 +3107,18 @@ static void kfree_rcu_work(struct work_struct *work)
> >                         }
> >                         rcu_lock_release(&rcu_callback_map);
> >  
> > +                       {
> > +                               unsigned long seq_a;
> > +                               unsigned long seq_b;
> > +
> > +                               seq_a = get_state_synchronize_rcu();
> > +                               synchronize_rcu();
> > +                               seq_b = get_state_synchronize_rcu();
> > +
> > +                               if (seq_a == seq_b)
> > +                                       BUG();
> > +                       }
> > +
> >                         raw_spin_lock_irqsave(&krcp->lock, flags);
> >                         if (put_cached_bnode(krcp, bkvhead[i]))
> >                                 bkvhead[i] = NULL;
> > urezki@pc638:~/data/raid0/coding/linux-rcu.git$ head -10 Makefile 
> > # SPDX-License-Identifier: GPL-2.0
> > VERSION = 6
> > PATCHLEVEL = 1
> > SUBLEVEL = 0
> > EXTRAVERSION = -rc1
> > NAME = Hurr durr I'ma ninja sloth
> > 
> > # *DOCUMENTATION*
> > # To see a list of typical targets execute "make help"
> > # More info can be located in ./README
> > urezki@pc638:~/data/raid0/coding/linux-rcu.git
> > 
> > urezki@pc638:~/data/raid0/coding/linux-rcu.git$ ../scripts/rcuscale.sh
> > tools/testing/selftests/rcutorture/initrd/init already exists, no need to create it
> > Results directory: /home/urezki/data/raid0/coding/linux-rcu.git/tools/testing/selftests/rcutorture/res/2022.11.25-17.50.32
> > tools/testing/selftests/rcutorture/bin/kvm.sh --memory 10G --torture rcuscale --allcpus --duration 1 --kconfig CONFIG_NR_CPUS=64 --kconfig CONFIG_RCU_NOCB_CPU=y --kconfig CONFIG_RCU_NOCB_CPU_DEFAULT_ALL=y --kconfig CONFIG_RCU_LAZY=n --bootargs rcuscale.kfree_rcu_test=1 rcuscale.kfree_nthreads=16 rcuscale.holdoff=20 rcuscale.kfree_loops=10000 torture.disable_onoff_at_boot --trust-make
> > ----Start batch 1: Fri 25 Nov 2022 05:50:32 PM CET
> > TREE 64: Starting build. Fri 25 Nov 2022 05:50:32 PM CET
> > TREE 64: Waiting for build to complete. Fri 25 Nov 2022 05:50:32 PM CET
> > TREE 64: Build complete. Fri 25 Nov 2022 05:50:37 PM CET
> > ---- TREE 64: Kernel present. Fri 25 Nov 2022 05:50:37 PM CET
> > ---- Starting kernels. Fri 25 Nov 2022 05:50:37 PM CET
> > 
> > 
> > 
> > ---- All kernel runs complete. Fri 25 Nov 2022 05:54:37 PM CET
> > ---- TREE 64: Build/run results:
> >  --- Fri 25 Nov 2022 05:50:32 PM CET: Starting build, PID 1284995
> >  --- Fri 25 Nov 2022 05:50:37 PM CET: Starting kernel
> >  --- Fri 25 Nov 2022 05:50:37 PM CET: Starting kernel, PID 1291757
> > QEMU 5.2.0 monitor - type 'help' for more information
> > (qemu) Monitoring qemu job at pid 1291776 Fri 25 Nov 2022 05:50:47 PM CET
> > Grace period for qemu job at pid 1291776 Fri 25 Nov 2022 05:51:37 PM CET
> > WARNING: Assertion failure in /home/urezki/data/raid0/coding/linux-rcu.git/tools/testing/selftests/rcutorture/res/2022.11.25-17.50.32/TREE/console.log TREE
> > WARNING: Summary: Bugs: 4 Call Traces: 6
> > 
> > urezki@pc638:~/data/raid0/coding/linux-rcu.git$ cat ../scripts/rcuscale.sh
> > #! /usr/bin/env bash
> > 
> > LOOPS=10
> > 
> > for (( i=0; i<$LOOPS; i++ )); do
> >         tools/testing/selftests/rcutorture/bin/kvm.sh --memory 10G --torture rcuscale --allcpus --duration 1 \
> >         --kconfig CONFIG_NR_CPUS=64 \
> >         --kconfig CONFIG_RCU_NOCB_CPU=y \
> >         --kconfig CONFIG_RCU_NOCB_CPU_DEFAULT_ALL=y \
> >         --kconfig CONFIG_RCU_LAZY=n \
> >         --bootargs "rcuscale.kfree_rcu_test=1 rcuscale.kfree_nthreads=16 rcuscale.holdoff=20 rcuscale.kfree_loops=10000 torture.disable_onoff_at_boot" --trust-make
> >         echo "Done $i"
> > done
> > urezki@pc638:~/data/raid0/coding/linux-rcu.git$
> > 
> > [    0.911180] ------------[ cut here ]------------
> > [    0.911747] kernel BUG at kernel/rcu/tree.c:3119!
> > [    0.912332] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
> > [    0.912984] CPU: 0 PID: 408 Comm: kworker/0:2 Not tainted 6.1.0-rc1-00098-gcda6ad040ade-dirty #1509
> > [    0.913325] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-2 04/01/2014
> > [    0.913325] Workqueue: events kfree_rcu_work
> > [    0.913325] RIP: 0010:kfree_rcu_work+0x348/0x350
> > [    0.913325] Code: 85 48 ff ff ff 48 89 df e8 65 61 ff ff e9 3b ff ff ff c7 44 24 08 01 00 00 00 49 8b 6d 00 e9 49 fd ff ff 0f 0b e9 e5 fe ff ff <0f> 0b e8 91 5d af 00 90 48 83 ec 40 65 48 8b 04 25 28 00 00 00 48
> > [    0.913325] RSP: 0018:ffff9fc680fc3e08 EFLAGS: 00010246
> > [    0.913325] RAX: 0000000000000068 RBX: 0000000000000068 RCX: 0000000000000000
> > [    0.913325] RDX: 0000000000000000 RSI: 0000000000000287 RDI: ffffffffab32f9f8
> > [    0.913325] RBP: ffff8b2a45525000 R08: ffffffffab32ee30 R09: ffffffffab32ee30
> > [    0.913325] R10: 0000000000000000 R11: 0000000000000001 R12: ffff8b2a410d5580
> > [    0.913325] R13: ffff9fc680fc3e28 R14: ffff8b2a45524000 R15: ffff8b2c34e1bb28
> > [    0.913325] FS:  0000000000000000(0000) GS:ffff8b2c34e00000(0000) knlGS:0000000000000000
> > [    0.913325] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > [    0.913325] CR2: ffff8b2c3ffff000 CR3: 000000029aa0c000 CR4: 00000000000006f0
> > [    0.913325] Call Trace:
> > [    0.913325]  <TASK>
> > [    0.913325]  ? finish_task_switch.isra.0+0x89/0x260
> > [    0.913325]  process_one_work+0x1b4/0x310
> > [    0.913325]  worker_thread+0x48/0x3c0
> > [    0.913325]  ? process_one_work+0x310/0x310
> > [    0.913325]  kthread+0xe2/0x110
> > [    0.913325]  ? kthread_complete_and_exit+0x20/0x20
> > [    0.913325]  ret_from_fork+0x22/0x30
> > [    0.913325]  </TASK>
> > [    0.913325] Modules linked in:
> > [    0.930724] calling  init_posix_timers+0x0/0x29 @ 1
> > [    0.931651] initcall init_posix_timers+0x0/0x29 returned 0 after 2 usecs
> > [    0.933036] calling  clockevents_init_sysfs+0x0/0xc7 @ 1
> > [    0.933179] ------------[ cut here ]------------
> > [    0.934353] initcall clockevents_init_sysfs+0x0/0xc7 returned 0 after 406 usecs
> > [    0.934812] kernel BUG at kernel/rcu/tree.c:3119!
> 
> And 110 runs had 46 cases where get_state_synchronize_rcu()
> failed to notice a grace period's passage, and none for
> get_state_synchronize_rcu_full().
> 
> So my advice is to either tolerate the occasional obliviousness of
> get_state_synchronize_rcu() or switch to get_state_synchronize_rcu_full().

Here is the patch I used, just for posterity.

							Thanx, Paul

------------------------------------------------------------------------

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index ae24f6f94f2b7..3de9e95715631 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2905,6 +2905,10 @@ static void kfree_rcu_work(struct work_struct *work)
 	struct kfree_rcu_cpu *krcp;
 	struct kfree_rcu_cpu_work *krwp;
 	int i, j;
+	unsigned long oldstate;
+	struct rcu_gp_oldstate oldfullstate;
+	unsigned long newstate;
+	struct rcu_gp_oldstate newfullstate;
 
 	krwp = container_of(to_rcu_work(work),
 			    struct kfree_rcu_cpu_work, rcu_work);
@@ -2957,6 +2961,15 @@ static void kfree_rcu_work(struct work_struct *work)
 
 			cond_resched_tasks_rcu_qs();
 		}
+		oldstate = get_state_synchronize_rcu();
+		get_state_synchronize_rcu_full(&oldfullstate);
+		pr_err("-> [%d] Invoke synchronize_rcu(): seq number: %lu\n", current->pid, oldstate);
+		synchronize_rcu();
+		get_state_synchronize_rcu_full(&newfullstate);
+		newstate = get_state_synchronize_rcu();
+		WARN_ON_ONCE(same_state_synchronize_rcu(oldstate, newstate));
+		WARN_ON_ONCE(same_state_synchronize_rcu_full(&oldfullstate, &newfullstate));
+		pr_err("-> [%d] Done. A seq number: %lu\n", current->pid, newstate);
 	}
 
 	/*



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux