Re: [PATCH v2 6/7] mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Feb 25, 2025 at 10:05:37AM -0700, Keith Busch wrote:
> On Tue, Feb 25, 2025 at 09:03:38AM -0700, Keith Busch wrote:
> > On Tue, Feb 25, 2025 at 10:57:38AM +0100, Vlastimil Babka wrote:
> > > I tried to create a kunit test for it, but it doesn't trigger anything. Maybe
> > > it's too simple, or racy, and thus we are not flushing any of the queues from
> > > kvfree_rcu_barrier()?
> >
> > Thanks, your test readily triggers it for me, but only if I load
> > rcutorture at the same time.
> 
> Oops, I sent the wrong kernel messages. This is the relevant part:
> 
> [  142.371052] workqueue: WQ_MEM_RECLAIM
> test_kfree_rcu_destroy_wq:cache_destroy_workfn [slub_kunit] is
> flushing !WQ_MEM_RECLAIM events_unbound:kfree_rcu_work
> [  142.371072] WARNING: CPU: 11 PID: 186 at kernel/workqueue.c:3715
> check_flush_dependency.part.0+0xad/0x100
> [  142.375748] Modules linked in: slub_kunit(E) rcutorture(E)
> torture(E) kunit(E) iTCO_wdt(E) iTCO_vendor_support(E)
> intel_uncore_frequency_common(E) skx_edac_common(E) nfit(E)
> libnvdimm(E) kvm_intel(E) kvm(E) evdev(E) bochs(E) serio_raw(E)
> drm_kms_helper(E) i2c_i801(E) e1000e(E) i2c_smbus(E) intel_agp(E)
> intel_gtt(E) lpc_ich(E) agpgart(E) mfd_core(E) drm_shm]
> [  142.384553] CPU: 11 UID: 0 PID: 186 Comm: kworker/u64:11 Tainted: G
>            E    N 6.13.0-04839-g5e7b40f0ddce-dirty #831
> [  142.386755] Tainted: [E]=UNSIGNED_MODULE, [N]=TEST
> [  142.387849] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009),
> BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
> [  142.390236] Workqueue: test_kfree_rcu_destroy_wq
> cache_destroy_workfn [slub_kunit]
> [  142.391863] RIP: 0010:check_flush_dependency.part.0+0xad/0x100
> [  142.393183] Code: 75 dc 48 8b 55 18 49 8d 8d 78 01 00 00 4d 89 f0
> 48 81 c6 78 01 00 00 48 c7 c7 00 e1 9a 82 c6 05 4f 39 c5 02 01 e8 53
> bd fd ff <0f> 0b 5b 5d 41 5c 41 5d 41 5e c3 80 3d 39 39 c5 02 00 75 83
> 41 8b
> [  142.396981] RSP: 0018:ffffc900007cfc90 EFLAGS: 00010092
> [  142.398124] RAX: 000000000000008f RBX: ffff88803e9b10a0 RCX: 0000000000000027
> [  142.399605] RDX: ffff88803eba0d08 RSI: 0000000000000001 RDI: ffff88803eba0d00
> [  142.401092] RBP: ffff888007d9a480 R08: ffffffff83b8c808 R09: 0000000000000003
> [  142.402548] R10: ffffffff8348c820 R11: ffffffff83a11d58 R12: ffff888007150000
> [  142.404098] R13: ffff888005961400 R14: ffffffff813221a0 R15: ffff888005961400
> [  142.405561] FS:  0000000000000000(0000) GS:ffff88803eb80000(0000)
> knlGS:0000000000000000
> [  142.407297] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [  142.408658] CR2: 00007f826bd1a000 CR3: 00000000069db002 CR4: 0000000000772ef0
> [  142.410259] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [  142.411871] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [  142.413341] PKRU: 55555554
> [  142.414038] Call Trace:
> [  142.414658]  <TASK>
> [  142.415249]  ? __warn+0x8d/0x180
> [  142.416035]  ? check_flush_dependency.part.0+0xad/0x100
> [  142.417182]  ? report_bug+0x160/0x170
> [  142.418041]  ? handle_bug+0x4f/0x90
> [  142.418861]  ? exc_invalid_op+0x14/0x70
> [  142.419853]  ? asm_exc_invalid_op+0x16/0x20
> [  142.420877]  ? kfree_rcu_shrink_scan+0x120/0x120
> [  142.422029]  ? check_flush_dependency.part.0+0xad/0x100
> [  142.423244]  __flush_work+0x38a/0x4a0
> [  142.424157]  ? find_held_lock+0x2b/0x80
> [  142.425070]  ? flush_rcu_work+0x26/0x40
> [  142.425953]  ? lock_release+0xb3/0x250
> [  142.426785]  ? __mutex_unlock_slowpath+0x2c/0x270
> [  142.427906]  flush_rcu_work+0x30/0x40
> [  142.428756]  kvfree_rcu_barrier+0xe9/0x130
> [  142.429649]  kmem_cache_destroy+0x2b/0x1f0
> [  142.430578]  cache_destroy_workfn+0x20/0x40 [slub_kunit]
> [  142.431729]  process_one_work+0x1cd/0x560
> [  142.432620]  worker_thread+0x183/0x310
> [  142.433487]  ? rescuer_thread+0x330/0x330
> [  142.434428]  kthread+0xd8/0x1d0
> [  142.435248]  ? ret_from_fork+0x17/0x50
> [  142.436165]  ? lock_release+0xb3/0x250
> [  142.437106]  ? kthreads_online_cpu+0xf0/0xf0
> [  142.438133]  ret_from_fork+0x2d/0x50
> [  142.439045]  ? kthreads_online_cpu+0xf0/0xf0
> [  142.440428]  ret_from_fork_asm+0x11/0x20
> [  142.441476]  </TASK>
> [  142.442152] irq event stamp: 22858
> [  142.443002] hardirqs last  enabled at (22857): [<ffffffff82044ef4>]
> _raw_spin_unlock_irq+0x24/0x30
> [  142.445032] hardirqs last disabled at (22858): [<ffffffff82044ce3>]
> _raw_spin_lock_irq+0x43/0x50
> [  142.451450] softirqs last  enabled at (22714): [<ffffffff810bfdbc>]
> __irq_exit_rcu+0xac/0xd0
> [  142.453345] softirqs last disabled at (22709): [<ffffffff810bfdbc>]
> __irq_exit_rcu+0xac/0xd0
> [  142.455305] ---[ end trace 0000000000000000 ]---
Thanks!

I can trigger this also:

<snip>
[   21.712856] KTAP version 1
[   21.712862] 1..1
[   21.714486]     KTAP version 1
[   21.714490]     # Subtest: slub_test
[   21.714492]     # module: slub_kunit
[   21.714495]     1..10
[   21.750359]     ok 1 test_clobber_zone
[   21.750955]     ok 2 test_next_pointer
[   21.751532]     ok 3 test_first_word
[   21.751991]     ok 4 test_clobber_50th_byte
[   21.752493]     ok 5 test_clobber_redzone_free
[   21.753004] stackdepot: allocating hash table of 1048576 entries via kvcalloc
[   21.756176]     ok 6 test_kmalloc_redzone_access
[   21.806549]     ok 7 test_kfree_rcu
[   22.058010] ------------[ cut here ]------------
[   22.058015] workqueue: WQ_MEM_RECLAIM test_kfree_rcu_destroy_wq:cache_destroy_workfn [slub_kunit] is flushing !WQ_MEM_RECLAIM events_unbound:kfree_rcu_work
[   22.058039] WARNING: CPU: 19 PID: 474 at kernel/workqueue.c:3715 check_flush_dependency.part.0+0xbe/0x130
[   22.058047] Modules linked in: slub_kunit(E) kunit(E) binfmt_misc(E) bochs(E) drm_client_lib(E) drm_shmem_helper(E) ppdev(E) drm_kms_helper(E) snd_pcm(E) sg(E) snd_timer(E) evdev(E) snd(E) joydev(E) parport_pc(E) parport(E) soundcore(E) serio_raw(E) button(E) pcspkr(E) drm(E) fuse(E) dm_mod(E) efi_pstore(E) configfs(E) loop(E) qemu_fw_cfg(E) ip_tables(E) x_tables(E) autofs4(E) ext4(E) crc16(E) mbcache(E) jbd2(E) sr_mod(E) sd_mod(E) cdrom(E) ata_generic(E) ata_piix(E) libata(E) scsi_mod(E) i2c_piix4(E) psmouse(E) e1000(E) i2c_smbus(E) scsi_common(E) floppy(E)
[   22.058091] CPU: 19 UID: 0 PID: 474 Comm: kworker/u257:0 Kdump: loaded Tainted: G            E    N 6.14.0-rc1+ #286
[   22.058096] Tainted: [E]=UNSIGNED_MODULE, [N]=TEST
[   22.058097] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
[   22.058099] Workqueue: test_kfree_rcu_destroy_wq cache_destroy_workfn [slub_kunit]
[   22.058103] RIP: 0010:check_flush_dependency.part.0+0xbe/0x130
[   22.058106] Code: 75 d0 48 8b 55 18 49 8d 8d c0 00 00 00 4d 89 f0 48 81 c6 c0 00 00 00 48 c7 c7 b0 7d c8 bd c6 05 6c 78 53 01 01 e8 a2 ae fd ff <0f> 0b 5b 5d 41 5c 41 5d 41 5e c3 cc cc cc cc f6 c4 08 74 94 31 ed
[   22.058108] RSP: 0018:ffff95e5c123fd50 EFLAGS: 00010086
[   22.058111] RAX: 0000000000000000 RBX: ffff89a4ff22d5a0 RCX: 0000000000000000
[   22.058113] RDX: 0000000000000003 RSI: ffffffffbdce1697 RDI: 00000000ffffffff
[   22.058114] RBP: ffff89961043a780 R08: 0000000000000000 R09: 0000000000000003
[   22.058116] R10: ffff95e5c123fbe8 R11: ffff89a53fefefa8 R12: ffff89960cb6b080
[   22.058117] R13: ffff899600051400 R14: ffffffffbcf2ba80 R15: ffff89960005a800
[   22.058120] FS:  0000000000000000(0000) GS:ffff89a4ff2c0000(0000) knlGS:0000000000000000
[   22.058122] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   22.058124] CR2: 000055bf2cbc6038 CR3: 000000010dc1e000 CR4: 00000000000006f0
[   22.058128] Call Trace:
[   22.058130]  <TASK>
[   22.058133]  ? __warn+0x85/0x130
[   22.058137]  ? check_flush_dependency.part.0+0xbe/0x130
[   22.058139]  ? report_bug+0x18d/0x1c0
[   22.058142]  ? prb_read_valid+0x17/0x20
[   22.058147]  ? handle_bug+0x58/0x90
[   22.058151]  ? exc_invalid_op+0x13/0x60
[   22.058154]  ? asm_exc_invalid_op+0x16/0x20
[   22.058158]  ? __pfx_kfree_rcu_work+0x10/0x10
[   22.058162]  ? check_flush_dependency.part.0+0xbe/0x130
[   22.058165]  __flush_work+0xd6/0x320
[   22.058168]  flush_rcu_work+0x39/0x50
[   22.058171]  kvfree_rcu_barrier+0xe9/0x130
[   22.058174]  kmem_cache_destroy+0x18/0x140
[   22.058177]  process_one_work+0x184/0x3a0
[   22.058180]  worker_thread+0x24d/0x360
[   22.058183]  ? __pfx_worker_thread+0x10/0x10
[   22.058185]  kthread+0xfc/0x230
[   22.058189]  ? finish_task_switch.isra.0+0x85/0x2a0
[   22.058192]  ? __pfx_kthread+0x10/0x10
[   22.058195]  ret_from_fork+0x30/0x50
[   22.058199]  ? __pfx_kthread+0x10/0x10
[   22.058202]  ret_from_fork_asm+0x1a/0x30
[   22.058206]  </TASK>
[   22.058207] ---[ end trace 0000000000000000 ]---
[   23.123507]     ok 8 test_kfree_rcu_wq_destroy
[   23.151033]     ok 9 test_leak_destroy
[   23.151612]     ok 10 test_krealloc_redzone_zeroing
[   23.151617] # slub_test: pass:10 fail:0 skip:0 total:10
[   23.151619] # Totals: pass:10 fail:0 skip:0 total:10
[   23.151620] ok 1 slub_test
urezki@pc638:~$
<snip>

but i had to adapt slightly the Vlastimil's test:

diff --git a/lib/slub_kunit.c b/lib/slub_kunit.c
index f11691315c2f..222f6d204b0d 100644
--- a/lib/slub_kunit.c
+++ b/lib/slub_kunit.c
@@ -6,6 +6,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/rcupdate.h>
+#include <linux/delay.h>
 #include "../mm/slab.h"

 static struct kunit_resource resource;
@@ -181,6 +182,63 @@ static void test_kfree_rcu(struct kunit *test)
        KUNIT_EXPECT_EQ(test, 0, slab_errors);
 }

+struct cache_destroy_work {
+        struct work_struct work;
+        struct kmem_cache *s;
+};
+
+static void cache_destroy_workfn(struct work_struct *w)
+{
+       struct cache_destroy_work *cdw;
+
+       cdw = container_of(w, struct cache_destroy_work, work);
+       kmem_cache_destroy(cdw->s);
+}
+
+#define KMEM_CACHE_DESTROY_NR 10
+
+static void test_kfree_rcu_wq_destroy(struct kunit *test)
+{
+       struct test_kfree_rcu_struct *p;
+       struct cache_destroy_work cdw;
+       struct workqueue_struct *wq;
+       struct kmem_cache *s;
+       unsigned int rnd;
+       int i;
+
+       if (IS_BUILTIN(CONFIG_SLUB_KUNIT_TEST))
+               kunit_skip(test, "can't do kfree_rcu() when test is built-in");
+
+       INIT_WORK_ONSTACK(&cdw.work, cache_destroy_workfn);
+       wq = alloc_workqueue("test_kfree_rcu_destroy_wq",
+                       WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
+
+       if (!wq)
+               kunit_skip(test, "failed to alloc wq");
+
+       for (i = 0; i < KMEM_CACHE_DESTROY_NR; i++) {
+               s = test_kmem_cache_create("TestSlub_kfree_rcu_wq_destroy",
+                               sizeof(struct test_kfree_rcu_struct),
+                               SLAB_NO_MERGE);
+
+               if (!s)
+                       kunit_skip(test, "failed to create cache");
+
+               rnd = get_random_u8() % 255;
+               p = kmem_cache_alloc(s, GFP_KERNEL);
+               kfree_rcu(p, rcu);
+
+               cdw.s = s;
+
+               msleep(rnd);
+               queue_work(wq, &cdw.work);
+               flush_work(&cdw.work);
+       }
+
+       destroy_workqueue(wq);
+       KUNIT_EXPECT_EQ(test, 0, slab_errors);
+}
+
 static void test_leak_destroy(struct kunit *test)
 {
        struct kmem_cache *s = test_kmem_cache_create("TestSlub_leak_destroy",
@@ -254,6 +312,7 @@ static struct kunit_case test_cases[] = {
        KUNIT_CASE(test_clobber_redzone_free),
        KUNIT_CASE(test_kmalloc_redzone_access),
        KUNIT_CASE(test_kfree_rcu),
+       KUNIT_CASE(test_kfree_rcu_wq_destroy),
        KUNIT_CASE(test_leak_destroy),
        KUNIT_CASE(test_krealloc_redzone_zeroing),
        {}

--
Uladzislau Rezki




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux