Hi, This soft lockup occurs on amdgpu vanilla torvalds kernel with KCSAN turned on. The platform is Ubuntu 22.04 LTS. Please find config and lshw here: https://domac.alu.unizg.hr/~mtodorov/linux/bugreports/amdgpu/6.5-rc7/ Here is the log excerpt: [ 69.050693] ================================================================== [ 69.198264] watchdog: BUG: soft lockup - CPU#1 stuck for 26s! [systemd-udevd:635] [ 69.198277] Modules linked in: amdgpu(+) snd_hwdep(+) nls_iso8859_1 amdxcp snd_pcm kvm(+) iommu_v2 drm_buddy gpu_sched irqbypass snd_seq_midi crct10dif_pclmul drm_suballoc_helper snd_seq_midi_event polyval_clmulni drm_ttm_helper snd_rawmidi polyval_generic ttm ghash_clmulni_intel sha512_ssse3 drm_display_helper snd_seq aesni_intel cec snd_seq_device snd_timer crypto_simd rc_core cryptd drm_kms_helper joydev input_leds snd rapl ccp wmi_bmof k10temp i2c_algo_bit soundcore sch_fq_codel mac_hid msr parport_pc drm ppdev lp parport ramoops pstore_blk reed_solomon pstore_zone efi_pstore ip_tables x_tables autofs4 btrfs blake2b_generic xor raid6_pq libcrc32c hid_generic usbhid hid nvme nvme_core r8169 ahci xhci_pci video crc32_pclmul libahci i2c_piix4 nvme_common realtek xhci_pci_renesas wmi gpio_amdpt [ 69.198669] CPU: 1 PID: 635 Comm: systemd-udevd Not tainted 6.5.0-rc7-kcsan-g706a74159504 #3 [ 69.198680] Hardware name: ASRock X670E PG Lightning/X670E PG Lightning, BIOS 1.21 04/26/2023 [ 69.198686] RIP: kcsan_setup_watchpoint+0x274/0x3f0 [ 69.198697] Code: 00 00 48 8b 45 c8 48 c7 00 00 00 00 00 f0 48 ff 0d c1 50 d5 0c 45 84 d2 75 10 48 f7 45 c0 00 02 00 00 74 06 fb 0f 1f 44 00 00 <83> 6b 04 01 41 57 9d 48 8d 65 d8 5b 41 5c 41 5d 41 5e 41 5f 5d 31 All code ======== 0: 00 00 add %al,(%rax) 2: 48 8b 45 c8 mov -0x38(%rbp),%rax 6: 48 c7 00 00 00 00 00 movq $0x0,(%rax) d: f0 48 ff 0d c1 50 d5 lock decq 0xcd550c1(%rip) # 0xcd550d6 14: 0c 15: 45 84 d2 test %r10b,%r10b 18: 75 10 jne 0x2a 1a: 48 f7 45 c0 00 02 00 testq $0x200,-0x40(%rbp) 21: 00 22: 74 06 je 0x2a 24: fb sti 25: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 2a:* 83 6b 04 01 subl $0x1,0x4(%rbx) <-- trapping instruction 2e: 41 57 push %r15 30: 9d popf 31: 48 8d 65 d8 lea -0x28(%rbp),%rsp 35: 5b pop %rbx 36: 41 5c pop %r12 38: 41 5d pop %r13 3a: 41 5e pop %r14 3c: 41 5f pop %r15 3e: 5d pop %rbp 3f: 31 .byte 0x31 Code starting with the faulting instruction =========================================== 0: 83 6b 04 01 subl $0x1,0x4(%rbx) 4: 41 57 push %r15 6: 9d popf 7: 48 8d 65 d8 lea -0x28(%rbp),%rsp b: 5b pop %rbx c: 41 5c pop %r12 e: 41 5d pop %r13 10: 41 5e pop %r14 12: 41 5f pop %r15 14: 5d pop %rbp 15: 31 .byte 0x31 [ 69.198726] RSP: 0018:ffffb4a0411f7638 EFLAGS: 00000206 [ 69.198743] RAX: ffffffffb699a748 RBX: ffff9829c57833f0 RCX: 0000000000000000 [ 69.198751] RDX: ffffffffc2569f90 RSI: ffffffffc2569f90 RDI: ffffffffb699a748 [ 69.198758] RBP: ffffb4a0411f7698 R08: 0000000000000001 R09: 0000000000000000 [ 69.198765] R10: 0000000000000000 R11: 0011ffffc26082c8 R12: 0000000000000008 [ 69.198783] R13: ffffffffc26082c8 R14: 0000000000000000 R15: 0000000000000292 [ 69.198790] FS: 00007f452d2438c0(0000) GS:ffff9838d8240000(0000) knlGS:0000000000000000 [ 69.198799] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 69.198807] CR2: 00007f452cea127f CR3: 000000012540a000 CR4: 0000000000750ee0 [ 69.198830] PKRU: 55555554 [ 69.198835] Call Trace: [ 69.198839] <IRQ> [ 69.198844] ? show_regs+0x72/0x90 [ 69.198856] ? watchdog_timer_fn+0x292/0x320 [ 69.198900] ? __pfx_watchdog_timer_fn+0x10/0x10 [ 69.198909] ? __hrtimer_run_queues+0x224/0x470 [ 69.198926] ? hrtimer_interrupt+0x1cb/0x3f0 [ 69.198937] ? __do_softirq+0xda/0x330 [ 69.198950] ? __sysvec_apic_timer_interrupt+0x86/0x1e0 [ 69.198980] ? sysvec_apic_timer_interrupt+0x8e/0xa0 [ 69.198990] </IRQ> [ 69.198994] <TASK> [ 69.199000] ? asm_sysvec_apic_timer_interrupt+0x1b/0x20 [ 69.199032] ? kcsan_setup_watchpoint+0x274/0x3f0 [ 69.199043] ? kcsan_setup_watchpoint+0x1de/0x3f0 [ 69.199055] ? find_kallsyms_symbol+0x139/0x340 [ 69.199068] __tsan_read8+0x11c/0x180 [ 69.199099] find_kallsyms_symbol+0x139/0x340 [ 69.199110] ? vcn_v1_0_enc_ring_emit_fence (drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c:1647 (discriminator 1)) amdgpu [ 69.204958] ? __pfx_vcn_v1_0_enc_ring_emit_fence (drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c:1646) amdgpu [ 69.210899] ? __pfx_vcn_v1_0_enc_ring_emit_fence (drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c:1646) amdgpu [ 69.216910] ? __pfx_vcn_v1_0_enc_ring_emit_fence (drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c:1646) amdgpu [ 69.222561] module_address_lookup+0x8c/0xe0 [ 69.222573] ? __pfx_vcn_v1_0_enc_ring_emit_fence (drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c:1646) amdgpu [ 69.228237] kallsyms_lookup_buildid+0x107/0x1b0 [ 69.228251] ? __pfx_vcn_v1_0_enc_ring_emit_fence (drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c:1646) amdgpu [ 69.234368] kallsyms_lookup+0x14/0x30 [ 69.234381] test_for_valid_rec+0x38/0x90 [ 69.234411] ? sched_clock_noinstr+0x9/0x10 [ 69.234448] ? srso_alias_return_thunk+0x5/0x7f [ 69.234459] ? __mutex_lock_slowpath+0x13/0x20 [ 69.234470] ? srso_alias_return_thunk+0x5/0x7f [ 69.234481] ? mutex_lock+0xa7/0xb0 [ 69.234492] ftrace_module_enable+0x22e/0x3b0 [ 69.234525] load_module+0x3357/0x3980 [ 69.234533] ? aa_file_perm+0x1fc/0x800 [ 69.234562] ? srso_alias_return_thunk+0x5/0x7f [ 69.234593] ? security_kernel_post_read_file+0x79/0x90 [ 69.234618] init_module_from_file+0xdf/0x130 [ 69.234642] ? srso_alias_return_thunk+0x5/0x7f [ 69.234653] ? init_module_from_file+0xdf/0x130 [ 69.234668] idempotent_init_module+0x241/0x360 [ 69.234683] __x64_sys_finit_module+0x8e/0xf0 [ 69.234693] do_syscall_64+0x58/0x90 [ 69.234705] ? srso_alias_return_thunk+0x5/0x7f [ 69.234716] ? exit_to_user_mode_prepare+0x76/0x230 [ 69.234748] ? srso_alias_return_thunk+0x5/0x7f [ 69.234758] ? syscall_exit_to_user_mode+0x29/0x40 [ 69.234769] ? srso_alias_return_thunk+0x5/0x7f [ 69.234780] ? do_syscall_64+0x68/0x90 [ 69.234803] ? srso_alias_return_thunk+0x5/0x7f [ 69.234830] ? exit_to_user_mode_prepare+0x76/0x230 [ 69.234841] ? srso_alias_return_thunk+0x5/0x7f [ 69.234852] ? syscall_exit_to_user_mode+0x29/0x40 [ 69.234869] ? srso_alias_return_thunk+0x5/0x7f [ 69.234888] ? do_syscall_64+0x68/0x90 [ 69.234897] ? srso_alias_return_thunk+0x5/0x7f [ 69.234922] ? do_syscall_64+0x68/0x90 [ 69.234952] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [ 69.234978] RIP: 0033:0x7f452d11ea3d [ 69.234996] Code: 5b 41 5c c3 66 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d c3 a3 0f 00 f7 d8 64 89 01 48 All code ======== 0: 5b pop %rbx 1: 41 5c pop %r12 3: c3 ret 4: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1) b: 00 00 d: f3 0f 1e fa endbr64 11: 48 89 f8 mov %rdi,%rax 14: 48 89 f7 mov %rsi,%rdi 17: 48 89 d6 mov %rdx,%rsi 1a: 48 89 ca mov %rcx,%rdx 1d: 4d 89 c2 mov %r8,%r10 20: 4d 89 c8 mov %r9,%r8 23: 4c 8b 4c 24 08 mov 0x8(%rsp),%r9 28: 0f 05 syscall 2a:* 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax <-- trapping instruction 30: 73 01 jae 0x33 32: c3 ret 33: 48 8b 0d c3 a3 0f 00 mov 0xfa3c3(%rip),%rcx # 0xfa3fd 3a: f7 d8 neg %eax 3c: 64 89 01 mov %eax,%fs:(%rcx) 3f: 48 rex.W Code starting with the faulting instruction =========================================== 0: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax 6: 73 01 jae 0x9 8: c3 ret 9: 48 8b 0d c3 a3 0f 00 mov 0xfa3c3(%rip),%rcx # 0xfa3d3 10: f7 d8 neg %eax 12: 64 89 01 mov %eax,%fs:(%rcx) 15: 48 rex.W [ 69.235005] RSP: 002b:00007ffda20bffe8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [ 69.235020] RAX: ffffffffffffffda RBX: 00005561184c0f30 RCX: 00007f452d11ea3d [ 69.235028] RDX: 0000000000000000 RSI: 000055611837ad80 RDI: 000000000000001a [ 69.235035] RBP: 0000000000020000 R08: 0000000000000000 R09: 0000000000000002 [ 69.235052] R10: 000000000000001a R11: 0000000000000246 R12: 000055611837ad80 [ 69.235059] R13: 000055611836bc10 R14: 0000000000000000 R15: 00005561184ba330 [ 69.235072] </TASK> [ 69.462372] ================================================================== Best regards, Mirsad Todorovac