Please install kernel-dbg file. And run "mod -S directory-of-kernel-ko". Then run "dis -lr rxe_elem_release+15". Show us the result. On Wed, Dec 25, 2019 at 2:02 PM Frank Huang <tigerinxm@xxxxxxxxx> wrote: > > yes, > > what is the information should i post? > > crash> bt > PID: 108 TASK: ffff978e28548000 CPU: 16 COMMAND: "ksoftirqd/16" > #0 [ffffa2f14c9a7b18] machine_kexec at ffffffff8f059992 > #1 [ffffa2f14c9a7b70] __crash_kexec at ffffffff8f13cf7d > #2 [ffffa2f14c9a7c38] crash_kexec at ffffffff8f13e089 > #3 [ffffa2f14c9a7c50] oops_end at ffffffff8f027a77 > #4 [ffffa2f14c9a7c70] general_protection at ffffffff8fa01635 > [exception RIP: rxe_elem_release+15] > RIP: ffffffffc08da38f RSP: ffffa2f14c9a7d28 RFLAGS: 00010246 > RAX: 0000000000000000 RBX: 860e42124013b0aa RCX: 0000000000000000 > RDX: ffff978e03ba8900 RSI: 0000000000000281 RDI: ffff978e02e746e8 > RBP: ffff978e02e746e0 R8: 0000000000000201 R9: ffffa2f14dcb9000 > R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 > R13: 000000000000001d R14: 0000000000000006 R15: ffff978e02e746e0 > ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 > #5 [ffffa2f14c9a7d38] rxe_responder at ffffffffc08d7d10 [rdma_rxe] > #6 [ffffa2f14c9a7e48] rxe_do_task at ffffffffc08e060b [rdma_rxe] > #7 [ffffa2f14c9a7e70] tasklet_action at ffffffff8f0afa1e > #8 [ffffa2f14c9a7e88] __softirqentry_text_start at ffffffff8fc000d9 > #9 [ffffa2f14c9a7ee0] run_ksoftirqd at ffffffff8f0afa4e > #10 [ffffa2f14c9a7ee8] smpboot_thread_fn at ffffffff8f0cca5e > #11 [ffffa2f14c9a7f10] kthread at ffffffff8f0c8c9f > #12 [ffffa2f14c9a7f50] ret_from_fork at ffffffff8fa00205 > crash> dis -l ffffffffc08d7d10 > 0xffffffffc08d7d10 <rxe_responder+3312>: jmpq > 0xffffffffc08d7c6c <rxe_responder+3148> > crash> > > 0xffffffffc08d7c97 <rxe_responder+3191>: mov 0xec(%r15),%eax > 0xffffffffc08d7c9e <rxe_responder+3198>: cmp $0x2,%eax > 0xffffffffc08d7ca1 <rxe_responder+3201>: je > 0xffffffffc08d8213 <rxe_responder+4595> > 0xffffffffc08d7ca7 <rxe_responder+3207>: cmp $0x3,%eax > 0xffffffffc08d7caa <rxe_responder+3210>: jne > 0xffffffffc08d7ecc <rxe_responder+3756> > 0xffffffffc08d7cb0 <rxe_responder+3216>: mov 0x450(%r15),%eax > 0xffffffffc08d7cb7 <rxe_responder+3223>: cmp $0x20,%eax > 0xffffffffc08d7cba <rxe_responder+3226>: jl > 0xffffffffc08d873e <rxe_responder+5918> > 0xffffffffc08d7cc0 <rxe_responder+3232>: cmp $0x21,%eax > 0xffffffffc08d7cc3 <rxe_responder+3235>: jle > 0xffffffffc08d8725 <rxe_responder+5893> > 0xffffffffc08d7cc9 <rxe_responder+3241>: sub $0x26,%eax > 0xffffffffc08d7ccc <rxe_responder+3244>: cmp $0x1,%eax > 0xffffffffc08d7ccf <rxe_responder+3247>: ja > 0xffffffffc08d873e <rxe_responder+5918> > 0xffffffffc08d7cd5 <rxe_responder+3253>: movzbl 0x2d(%rbx),%eax > 0xffffffffc08d7cd9 <rxe_responder+3257>: sub $0x27,%eax > 0xffffffffc08d7cdc <rxe_responder+3260>: cmp $0x3,%al > 0xffffffffc08d7cde <rxe_responder+3262>: sbb %r13d,%r13d > 0xffffffffc08d7ce1 <rxe_responder+3265>: and $0xfffffff0,%r13d > 0xffffffffc08d7ce5 <rxe_responder+3269>: add $0x14,%r13d > 0xffffffffc08d7ce9 <rxe_responder+3273>: jmpq > 0xffffffffc08d70a2 <rxe_responder+130> > 0xffffffffc08d7cee <rxe_responder+3278>: mov %rbp,%rdi > 0xffffffffc08d7cf1 <rxe_responder+3281>: callq > 0xffffffffc08da380 <rxe_elem_release> > 0xffffffffc08d7cf6 <rxe_responder+3286>: jmpq > 0xffffffffc08d7b66 <rxe_responder+2886> > 0xffffffffc08d7cfb <rxe_responder+3291>: mov %rbp,%rdi > 0xffffffffc08d7cfe <rxe_responder+3294>: callq > 0xffffffffc08da380 <rxe_elem_release> > 0xffffffffc08d7d03 <rxe_responder+3299>: jmpq > 0xffffffffc08d7b14 <rxe_responder+2804> > 0xffffffffc08d7d08 <rxe_responder+3304>: mov %rbp,%rdi > 0xffffffffc08d7d0b <rxe_responder+3307>: callq > 0xffffffffc08da380 <rxe_elem_release> > 0xffffffffc08d7d10 <rxe_responder+3312>: jmpq > 0xffffffffc08d7c6c <rxe_responder+3148> > 0xffffffffc08d7d15 <rxe_responder+3317>: test $0x10000,%eax > 0xffffffffc08d7d1a <rxe_responder+3322>: je > 0xffffffffc08d804f <rxe_responder+4143> > 0xffffffffc08d7d20 <rxe_responder+3328>: mov 0x24(%rbx),%r12d > 0xffffffffc08d7d24 <rxe_responder+3332>: movzbl 0x19f(%r15),%edi > 0xffffffffc08d7d2c <rxe_responder+3340>: lea 0x6c0(%r15),%rsi > 0xffffffffc08d7d33 <rxe_responder+3347>: mov %r12d,%edx > 0xffffffffc08d7d36 <rxe_responder+3350>: callq > 0xffffffffc08d6af0 <find_resource> > 0xffffffffc08d7d3b <rxe_responder+3355>: test %rax,%rax > 0xffffffffc08d7d3e <rxe_responder+3358>: je > 0xffffffffc08d8c40 <rxe_responder+7200> > 0xffffffffc08d7d44 <rxe_responder+3364>: movzbl 0x2d(%rbx),%edx > 0xffffffffc08d7d48 <rxe_responder+3368>: movzbl 0x2e(%rbx),%ecx > 0xffffffffc08d7d4c <rxe_responder+3372>: mov $0xc,%r13d > 0xffffffffc08d7d52 <rxe_responder+3378>: mov 0x20(%rax),%rdi > 0xffffffffc08d7d56 <rxe_responder+3382>: shl $0x6,%rdx > 0xffffffffc08d7d5a <rxe_responder+3386>: movslq -0x3f715564(%rdx),%rdx > 0xffffffffc08d7d61 <rxe_responder+3393>: add %rdx,%rcx > 0xffffffffc08d7d64 <rxe_responder+3396>: add 0x18(%rbx),%rcx > 0xffffffffc08d7d68 <rxe_responder+3400>: mov (%rcx),%rdx > 0xffffffffc08d7d6b <rxe_responder+3403>: mov 0xc(%rcx),%esi > 0xffffffffc08d7d6e <rxe_responder+3406>: bswap %rdx > 0xffffffffc08d7d71 <rxe_responder+3409>: bswap %esi > 0xffffffffc08d7d73 <rxe_responder+3411>: cmp %rdi,%rdx > 0xffffffffc08d7d76 <rxe_responder+3414>: jb > 0xffffffffc08d70a2 <rxe_responder+130> > 0xffffffffc08d7d7c <rxe_responder+3420>: mov 0x2c(%rax),%r8d > 0xffffffffc08d7d80 <rxe_responder+3424>: cmp %r8d,%esi > 0xffffffffc08d7d83 <rxe_responder+3427>: ja > 0xffffffffc08d70a2 <rxe_responder+130> > 0xffffffffc08d7d89 <rxe_responder+3433>: mov %esi,%r9d > 0xffffffffc08d7d8c <rxe_responder+3436>: add %r8,%rdi > 0xffffffffc08d7d8f <rxe_responder+3439>: add %rdx,%r9 > 0xffffffffc08d7d92 <rxe_responder+3442>: cmp %rdi,%r9 > 0xffffffffc08d7d95 <rxe_responder+3445>: ja > 0xffffffffc08d70a2 <rxe_responder+130> > 0xffffffffc08d7d9b <rxe_responder+3451>: mov 0x8(%rcx),%ecx > 0xffffffffc08d7d9e <rxe_responder+3454>: bswap %ecx > 0xffffffffc08d7da0 <rxe_responder+3456>: cmp 0x28(%rax),%ecx > > On Wed, Dec 25, 2019 at 1:28 PM Zhu Yanjun <zyjzyj2000@xxxxxxxxx> wrote: > > > > Is there any vmcore about this problem? > > > > On Wed, Dec 25, 2019 at 1:03 PM Frank Huang <tigerinxm@xxxxxxxxx> wrote: > > > > > > hi, there is a panic on rdma_rxe module when the restart > > > network.service or shutdown the switch. > > > > > > it looks like a use-after-free error. > > > > > > everytime it happens, there is the log "rdma_rxe: Unknown layer 3 protocol: 0" > > > > > > is it a known error? > > > > > > my kernel version is 4.14.97 > > > > > > [448840.314544] rdma_rxe: Unknown layer 3 protocol: 0 > > > [448840.314626] general protection fault: 0000 [#1] SMP PTI > > > [448840.314627] Modules linked in: binfmt_misc ib_isert > > > iscsi_target_mod ib_srpt target_core_mod rpcrdma ib_iser ib_srp > > > scsi_transport_srp rdma_rxe(OE) ib_ipoib ib_umad ip6_udp_tunnel > > > udp_tunnel rdma_ucm rdma_cm iw_cm ib_cm ib_uverbs ib_core > > > ebtable_filter ebtables devlink ip6table_filter ip6_tables > > > ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink iptable_nat > > > xt_addrtype xt_conntrack br_netfilter bridge stp llc overlay > > > ip_set_hash_ip ip_set nfnetlink iscsi_tcp libiscsi_tcp libiscsi > > > scsi_transport_iscsi sch_ingress openvswitch nf_conntrack_ipv6 > > > nf_nat_ipv6 nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 > > > nf_defrag_ipv6 nf_nat nf_conntrack libcrc32c sunrpc intel_rapl > > > x86_pkg_temp_thermal intel_powerclamp coretemp vfat fat kvm_intel kvm > > > irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel > > > intel_cstate > > > [448840.314677] intel_uncore intel_rapl_perf mxm_wmi iTCO_wdt > > > iTCO_vendor_support ipmi_ssif pcspkr i2c_i801 lpc_ich ipmi_si > > > ipmi_devintf ipmi_msghandler pcc_cpufreq shpchp wmi ast drm_kms_helper > > > ttm crc32c_intel drm ixgbe igb mdio ptp pps_core dca i2c_algo_bit > > > [448840.314700] CPU: 1 PID: 17 Comm: ksoftirqd/1 Tainted: G > > > OE 4.14.97-el7.centos.x86_64 #1 > > > [448840.314701] Hardware name: /80010211 , BIOS 3.12 11/27/2018 > > > [448840.314703] task: ffff9ce768af8000 task.stack: ffffbd7c4c6c4000 > > > [448840.314710] RIP: 0010:rxe_elem_release+0xf/0x60 [rdma_rxe] > > > [448840.314711] RSP: 0018:ffffbd7c4c6c7d28 EFLAGS: 00010246 > > > [448840.314713] RAX: 0000000000000000 RBX: 2917351aae258b92 RCX: > > > 0000000000000000 > > > [448840.314714] RDX: ffff9cfb3f64ba40 RSI: 000000000000026c RDI: > > > ffff9cfb3f678008 > > > [448840.314715] RBP: ffff9cfb3f678000 R08: 0000000000000201 R09: > > > ffffbd7c4df35000 > > > [448840.314716] R10: 0000000000000000 R11: 0000000000000001 R12: > > > 0000000000000000 > > > [448840.314717] R13: 000000000000001d R14: 0000000000000006 R15: > > > ffff9cfb3f678000 > > > [448840.314719] FS: 0000000000000000(0000) GS:ffff9ce76f840000(0000) > > > knlGS:0000000000000000 > > > [448840.314720] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > > > [448840.314721] CR2: 00007f4fc400f000 CR3: 000000260420a005 CR4: > > > 00000000001626e0 > > > [448840.314723] Call Trace: > > > [448840.314730] rxe_responder+0xcf0/0x1fe0 [rdma_rxe] > > > [448840.314738] ? check_preempt_wakeup+0x125/0x240 > > > [448840.314742] ? check_preempt_curr+0x84/0x90 > > > [448840.314745] ? ttwu_do_wakeup+0x19/0x140 > > > [448840.314747] ? try_to_wake_up+0x54/0x450 > > > [448840.314751] rxe_do_task+0x8b/0x100 [rdma_rxe] > > > [448840.314754] tasklet_action+0xfe/0x110 > > > [448840.314758] __do_softirq+0xd9/0x2a2 > > > [448840.314761] run_ksoftirqd+0x1e/0x70 > > > [448840.314763] smpboot_thread_fn+0x10e/0x160 > > > [448840.314766] kthread+0xff/0x140 > > > [448840.314768] ? sort_range+0x20/0x20 > > > [448840.314770] ? __kthread_parkme+0x90/0x90 > > > [448840.314771] ret_from_fork+0x35/0x40 > > > [448840.314773] Code: 7a 00 00 74 04 31 c0 eb c3 4c 89 e7 e8 bb f9 ff > > > ff 31 c0 eb b7 0f 1f 80 00 00 00 00 0f 1f 44 00 00 55 48 8d 6f f8 53 > > > 48 8b 5f f8 <48> 8b 43 20 48 85 c0 74 08 48 89 ef e8 60 1c 53 fb 8b 43 > > > 30 48 > > > [448840.314817] RIP: rxe_elem_release+0xf/0x60 [rdma_rxe] RSP: ffffbd7c4c6c7d28