There is a crash when running the cryptsetup testsuite on Fedora Rawhide. It can be reproduced by installing Rawhide with the 6.0-rc6 kernel, downloading and compiling the cryptsetup repository and running this test in a loop for about 15 minuts: while ./integrity-compat-test; do :; done ------------[ cut here ]------------ WARNING: CPU: 0 PID: 50087 at fs/kernfs/dir.c:504 __kernfs_remove.part.0+0x26f/0x2b0 Modules linked in: crc32_generic loop dm_integrity async_xor async_tx tls isofs uinput snd_seq_dummy snd_hrtimer nft_objref nf_conntrack_netbios_ns nf_conntrack_broadcast nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 rfkill ip_set nf_tables nfnetlink qrtr sunrpc snd_hda_codec_generic ledtrig_audio snd_hda_intel iTCO_wdt snd_intel_dspcfg intel_pmc_bxt snd_intel_sdw_acpi iTCO_vendor_support snd_hda_codec snd_hda_core snd_hwdep snd_seq snd_seq_device joydev snd_pcm i2c_i801 snd_timer pcspkr i2c_smbus virtio_balloon snd lpc_ich soundcore zram virtio_net net_failover virtio_blk serio_raw failover qxl virtio_console drm_ttm_helper ttm ip6_tables ip_tables fuse qemu_fw_cfg Unloaded tainted modules: crc32_pclmul():1 pcc_cpufreq():1 pcc_cpufreq():1 acpi_cpufreq():1 edac_mce_amd():1 pcc_cpufreq():1 acpi_cpufreq():1 edac_mce_amd():1 acpi_cpufreq():1 edac_mce_amd():1 pcc_cpufreq():1 acpi_cpufreq():1 pcc_cpufreq():1 edac_mce_amd():1 edac_mce_amd():1 acpi_cpufreq():1 pcc_cpufreq():1 edac_mce_amd():1 acpi_cpufreq():1 pcc_cpufreq():1 edac_mce_amd():1 acpi_cpufreq():1 pcc_cpufreq():1 edac_mce_amd():1 acpi_cpufreq():1 edac_mce_amd():1 pcc_cpufreq():1 edac_mce_amd():1 acpi_cpufreq():1 pcc_cpufreq():1 edac_mce_amd():1 pcc_cpufreq():1 acpi_cpufreq():1 edac_mce_amd():1 pcc_cpufreq():1 acpi_cpufreq():1 acpi_cpufreq():1 CPU: 0 PID: 50087 Comm: integritysetup Not tainted 6.0.0-0.rc6.41.fc38.x86_64 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-2 04/01/2014 RIP: 0010:__kernfs_remove.part.0+0x26f/0x2b0 Code: 0f 85 24 ff ff ff 4d 85 e4 0f 84 31 ff ff ff 41 0f b7 44 24 70 4c 89 e3 83 e0 0f 66 83 f8 01 0f 84 2c fe ff ff e9 50 fe ff ff <0f> 0b e9 53 fe ff ff 0f 0b e9 6b fe ff ff 48 8b 57 10 48 c7 c6 e8 RSP: 0018:ffffa2e001da7a78 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8e509d111380 RCX: 0000000080200015 RDX: 0000000000000000 RSI: fffff015c5744440 RDI: ffff8e509d1113c8 RBP: ffffa2e001da7ac0 R08: 0000000000000000 R09: 0000000080200015 R10: 0000000000000009 R11: ffff8e514b185488 R12: ffff8e509d111380 R13: ffff8e5114324f98 R14: ffff8e50401e92a0 R15: 0000000000000000 FS: 00007fbbbdc13880(0000) GS:ffff8e53afa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055d1cdc94248 CR3: 0000000219990000 CR4: 00000000000006f0 Call Trace: <TASK> ? _raw_spin_unlock_irqrestore+0x23/0x40 ? kernfs_name_hash+0x12/0x80 kernfs_remove_by_name_ns+0x60/0xa0 sysfs_slab_add+0x166/0x200 __kmem_cache_create+0x3c9/0x4b0 kmem_cache_create_usercopy+0x202/0x340 kmem_cache_create+0x12/0x20 bioset_init+0x1fe/0x280 dm_table_complete+0x3cd/0x6f0 table_load+0x140/0x2c0 ? dev_suspend+0x2e0/0x2e0 ctl_ioctl+0x1f2/0x450 dm_ctl_ioctl+0xa/0x20 __x64_sys_ioctl+0x90/0xd0 do_syscall_64+0x5b/0x80 ? fpregs_restore_userregs+0x12/0xe0 ? exit_to_user_mode_prepare+0x18f/0x1f0 ? syscall_exit_to_user_mode+0x17/0x40 ? do_syscall_64+0x67/0x80 ? do_syscall_64+0x67/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fbbbdf27daf Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <89> c2 3d 00 f0 ff ff 77 18 48 8b 44 24 18 64 48 2b 04 25 28 00 00 RSP: 002b:00007ffc3c305540 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 0000000001f897e0 RCX: 00007fbbbdf27daf RDX: 0000000001f8e000 RSI: 00000000c138fd09 RDI: 0000000000000006 RBP: 0000000000000003 R08: 0000000001f8b150 R09: 0000000000000073 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fbbbe57c6b6 R13: 00007fbbbe57c38c R14: 0000000001f8e030 R15: 00007fbbbe57c78c </TASK> ---[ end trace 0000000000000000 ]--- ------------[ cut here ]------------ kernel BUG at mm/slub.c:381! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 0 PID: 50087 Comm: integritysetup Tainted: G W ------- --- 6.0.0-0.rc6.41.fc38.x86_64 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-2 04/01/2014 RIP: 0010:__slab_free+0x11c/0x2f0 Code: 39 49 8b 04 24 48 89 4c 24 18 48 c1 e8 36 4c 8b ac c3 d8 00 00 00 4c 89 ef e8 90 a2 a3 00 48 8b 4c 24 18 48 89 44 24 20 eb 8e <0f> 0b f7 43 08 00 0d 21 00 75 cc 4d 85 ff 75 c7 80 4c 24 53 80 e9 RSP: 0018:ffffa2e001da7998 EFLAGS: 00010246 RAX: ffff8e5059948440 RBX: ffff8e5040042200 RCX: 0000000082000127 RDX: fffffffcab948440 RSI: fffff015c4665200 RDI: ffff8e5040042200 RBP: ffff8e5059948440 R08: 0000000000000001 R09: ffffffffae457f18 R10: 0000000000000009 R11: ffff8e514b185488 R12: fffff015c4665200 R13: ffff8e509d111398 R14: ffff8e5059948440 R15: ffff8e5059948440 FS: 00007fbbbdc13880(0000) GS:ffff8e53afa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055d1cdc94248 CR3: 0000000219990000 CR4: 00000000000006f0 Call Trace: <TASK> ? osq_unlock+0xf/0x90 ? rwsem_down_write_slowpath+0x1fc/0x710 kernfs_put.part.0+0x58/0x1a0 __kernfs_remove.part.0+0x17c/0x2b0 ? _raw_spin_unlock_irqrestore+0x23/0x40 ? kernfs_name_hash+0x12/0x80 kernfs_remove_by_name_ns+0x60/0xa0 sysfs_slab_add+0x166/0x200 __kmem_cache_create+0x3c9/0x4b0 kmem_cache_create_usercopy+0x202/0x340 kmem_cache_create+0x12/0x20 bioset_init+0x1fe/0x280 dm_table_complete+0x3cd/0x6f0 table_load+0x140/0x2c0 ? dev_suspend+0x2e0/0x2e0 ctl_ioctl+0x1f2/0x450 dm_ctl_ioctl+0xa/0x20 __x64_sys_ioctl+0x90/0xd0 do_syscall_64+0x5b/0x80 ? fpregs_restore_userregs+0x12/0xe0 ? exit_to_user_mode_prepare+0x18f/0x1f0 ? syscall_exit_to_user_mode+0x17/0x40 ? do_syscall_64+0x67/0x80 ? do_syscall_64+0x67/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fbbbdf27daf Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <89> c2 3d 00 f0 ff ff 77 18 48 8b 44 24 18 64 48 2b 04 25 28 00 00 RSP: 002b:00007ffc3c305540 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 0000000001f897e0 RCX: 00007fbbbdf27daf RDX: 0000000001f8e000 RSI: 00000000c138fd09 RDI: 0000000000000006 RBP: 0000000000000003 R08: 0000000001f8b150 R09: 0000000000000073 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fbbbe57c6b6 R13: 00007fbbbe57c38c R14: 0000000001f8e030 R15: 00007fbbbe57c78c </TASK> Modules linked in: crc32_generic loop dm_integrity async_xor async_tx tls isofs uinput snd_seq_dummy snd_hrtimer nft_objref nf_conntrack_netbios_ns nf_conntrack_broadcast nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 rfkill ip_set nf_tables nfnetlink qrtr sunrpc snd_hda_codec_generic ledtrig_audio snd_hda_intel iTCO_wdt snd_intel_dspcfg intel_pmc_bxt snd_intel_sdw_acpi iTCO_vendor_support snd_hda_codec snd_hda_core snd_hwdep snd_seq snd_seq_device joydev snd_pcm i2c_i801 snd_timer pcspkr i2c_smbus virtio_balloon snd lpc_ich soundcore zram virtio_net net_failover virtio_blk serio_raw failover qxl virtio_console drm_ttm_helper ttm ip6_tables ip_tables fuse qemu_fw_cfg Unloaded tainted modules: crc32_pclmul():1 pcc_cpufreq():1 pcc_cpufreq():1 acpi_cpufreq():1 edac_mce_amd():1 pcc_cpufreq():1 acpi_cpufreq():1 edac_mce_amd():1 acpi_cpufreq():1 edac_mce_amd():1 pcc_cpufreq():1 acpi_cpufreq():1 pcc_cpufreq():1 edac_mce_amd():1 edac_mce_amd():1 acpi_cpufreq():1 pcc_cpufreq():1 edac_mce_amd():1 acpi_cpufreq():1 pcc_cpufreq():1 edac_mce_amd():1 acpi_cpufreq():1 pcc_cpufreq():1 edac_mce_amd():1 acpi_cpufreq():1 edac_mce_amd():1 pcc_cpufreq():1 edac_mce_amd():1 acpi_cpufreq():1 pcc_cpufreq():1 edac_mce_amd():1 pcc_cpufreq():1 acpi_cpufreq():1 edac_mce_amd():1 pcc_cpufreq():1 acpi_cpufreq():1 acpi_cpufreq():1 ---[ end trace 0000000000000000 ]--- RIP: 0010:__slab_free+0x11c/0x2f0 Code: 39 49 8b 04 24 48 89 4c 24 18 48 c1 e8 36 4c 8b ac c3 d8 00 00 00 4c 89 ef e8 90 a2 a3 00 48 8b 4c 24 18 48 89 44 24 20 eb 8e <0f> 0b f7 43 08 00 0d 21 00 75 cc 4d 85 ff 75 c7 80 4c 24 53 80 e9 RSP: 0018:ffffa2e001da7998 EFLAGS: 00010246 RAX: ffff8e5059948440 RBX: ffff8e5040042200 RCX: 0000000082000127 RDX: fffffffcab948440 RSI: fffff015c4665200 RDI: ffff8e5040042200 RBP: ffff8e5059948440 R08: 0000000000000001 R09: ffffffffae457f18 R10: 0000000000000009 R11: ffff8e514b185488 R12: fffff015c4665200 R13: ffff8e509d111398 R14: ffff8e5059948440 R15: ffff8e5059948440 FS: 00007fbbbdc13880(0000) GS:ffff8e53afa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055d1cdc94248 CR3: 0000000219990000 CR4: 00000000000006f0 The reason for the crash is this: * create_bio_slab creates the bio slab with SLAB_TYPESAFE_BY_RCU - that means that the slab destruction will be delayed by a rcu grace period * when destroying a slab with SLAB_TYPESAFE_BY_RCU, the function shutdown_cache adds the slab to a list and calls slab_caches_to_rcu_destroy_workfn that will execute rcu_barrier() and free slab caches that are on the list * while slab_caches_to_rcu_destroy_workfn attempts to free the slab, the function bioset_init creates another slab with the same name * bioset_init goes down to sysfs_slab_add -> sysfs_remove_link -> kernfs_remove_by_name -> kernfs_remove_by_name_ns * simultaneously, slab_caches_to_rcu_destroy_workfn goes down to kmem_cache_release which tries to unlink and release the slab kobject * now, we have two processes that are simultaneously trying to delete the same kobject * kernfs_remove_by_name_ns wins the race, grabs the lock &root->kernfs_rwsem and executes __kernfs_remove * __kernfs_remove goes down to pos = kernfs_leftmost_descendant(kn), then it calls kernfs_get(pos) and kernfs_drain(pos) * kernfs_drain(pos) drops the &root->kernfs_rwsem lock temporarily * while the lock is dropped, kmem_cache_release calls sysfs_slab_unlink -> kobject_del -> __kobject_del -> sysfs_remove_dir -> kernfs_remove -> __kernfs_remove * kmem_cache_release calls sysfs_slab_release -> kobject_put -> kref_put -> kobject_release -> kobject_cleanup - this frees the kobject * then we go back to the process that dropped the lock &root->kernfs_rwsem in kernfs_drain(pos) - the process re-acquires the lock and returns to __kernfs_remove. The process still keeps reference to the "pos" kobject, however it no longer keeps reference to the "kn" kobject (which was freed in the step above). It executes kernfs_put(pos) and then continues the loop with "kn" pointing to free memory * kernfs_leftmost_descendant(kn) return "kn" because there are no files under "kn". kernfs_get(pos) triggers a warning WARN_ON(!atomic_read(&kn->count)). kernfs_put(pos) triggers a BUG in set_freepointer because it attempts to free an object that is already free We fix this bug by grabbing a reference to "kn" in __kernfs_remove and dropping it when we are done. This prevents "kn" from being released when we drop the lock in kernfs_drain. Signed-off-by: Mikulas Patocka <mpatocka@xxxxxxxxxx> Reported-by: Milan Broz <gmazyland@xxxxxxxxx> Cc: stable@xxxxxxxxxxxxxxx --- fs/kernfs/dir.c | 8 ++++++++ 1 file changed, 8 insertions(+) Index: linux-2.6/fs/kernfs/dir.c =================================================================== --- linux-2.6.orig/fs/kernfs/dir.c +++ linux-2.6/fs/kernfs/dir.c @@ -1364,6 +1364,12 @@ static void __kernfs_remove(struct kernf if (kernfs_active(pos)) atomic_add(KN_DEACTIVATED_BIAS, &pos->active); + /* + * Make sure that kn won't go away while we drop the lock in + * kernfs_drain(). + */ + kernfs_get(kn); + /* deactivate and unlink the subtree node-by-node */ do { pos = kernfs_leftmost_descendant(kn); @@ -1406,6 +1412,8 @@ static void __kernfs_remove(struct kernf kernfs_put(pos); } while (pos != kn); + + kernfs_put(kn); } /** -- dm-devel mailing list dm-devel@xxxxxxxxxx https://listman.redhat.com/mailman/listinfo/dm-devel