Re: rxe panic

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Please install kernel-dbg file. And run "mod -S
directory-of-kernel-ko". Then run "dis -lr rxe_elem_release+15".
Show us the result.

On Wed, Dec 25, 2019 at 2:02 PM Frank Huang <tigerinxm@xxxxxxxxx> wrote:
>
> yes,
>
> what is the information should i post?
>
> crash> bt
> PID: 108    TASK: ffff978e28548000  CPU: 16  COMMAND: "ksoftirqd/16"
>  #0 [ffffa2f14c9a7b18] machine_kexec at ffffffff8f059992
>  #1 [ffffa2f14c9a7b70] __crash_kexec at ffffffff8f13cf7d
>  #2 [ffffa2f14c9a7c38] crash_kexec at ffffffff8f13e089
>  #3 [ffffa2f14c9a7c50] oops_end at ffffffff8f027a77
>  #4 [ffffa2f14c9a7c70] general_protection at ffffffff8fa01635
>     [exception RIP: rxe_elem_release+15]
>     RIP: ffffffffc08da38f  RSP: ffffa2f14c9a7d28  RFLAGS: 00010246
>     RAX: 0000000000000000  RBX: 860e42124013b0aa  RCX: 0000000000000000
>     RDX: ffff978e03ba8900  RSI: 0000000000000281  RDI: ffff978e02e746e8
>     RBP: ffff978e02e746e0   R8: 0000000000000201   R9: ffffa2f14dcb9000
>     R10: 0000000000000000  R11: 0000000000000001  R12: 0000000000000000
>     R13: 000000000000001d  R14: 0000000000000006  R15: ffff978e02e746e0
>     ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
>  #5 [ffffa2f14c9a7d38] rxe_responder at ffffffffc08d7d10 [rdma_rxe]
>  #6 [ffffa2f14c9a7e48] rxe_do_task at ffffffffc08e060b [rdma_rxe]
>  #7 [ffffa2f14c9a7e70] tasklet_action at ffffffff8f0afa1e
>  #8 [ffffa2f14c9a7e88] __softirqentry_text_start at ffffffff8fc000d9
>  #9 [ffffa2f14c9a7ee0] run_ksoftirqd at ffffffff8f0afa4e
> #10 [ffffa2f14c9a7ee8] smpboot_thread_fn at ffffffff8f0cca5e
> #11 [ffffa2f14c9a7f10] kthread at ffffffff8f0c8c9f
> #12 [ffffa2f14c9a7f50] ret_from_fork at ffffffff8fa00205
> crash> dis -l ffffffffc08d7d10
> 0xffffffffc08d7d10 <rxe_responder+3312>:        jmpq
> 0xffffffffc08d7c6c <rxe_responder+3148>
> crash>
>
> 0xffffffffc08d7c97 <rxe_responder+3191>:        mov    0xec(%r15),%eax
> 0xffffffffc08d7c9e <rxe_responder+3198>:        cmp    $0x2,%eax
> 0xffffffffc08d7ca1 <rxe_responder+3201>:        je
> 0xffffffffc08d8213 <rxe_responder+4595>
> 0xffffffffc08d7ca7 <rxe_responder+3207>:        cmp    $0x3,%eax
> 0xffffffffc08d7caa <rxe_responder+3210>:        jne
> 0xffffffffc08d7ecc <rxe_responder+3756>
> 0xffffffffc08d7cb0 <rxe_responder+3216>:        mov    0x450(%r15),%eax
> 0xffffffffc08d7cb7 <rxe_responder+3223>:        cmp    $0x20,%eax
> 0xffffffffc08d7cba <rxe_responder+3226>:        jl
> 0xffffffffc08d873e <rxe_responder+5918>
> 0xffffffffc08d7cc0 <rxe_responder+3232>:        cmp    $0x21,%eax
> 0xffffffffc08d7cc3 <rxe_responder+3235>:        jle
> 0xffffffffc08d8725 <rxe_responder+5893>
> 0xffffffffc08d7cc9 <rxe_responder+3241>:        sub    $0x26,%eax
> 0xffffffffc08d7ccc <rxe_responder+3244>:        cmp    $0x1,%eax
> 0xffffffffc08d7ccf <rxe_responder+3247>:        ja
> 0xffffffffc08d873e <rxe_responder+5918>
> 0xffffffffc08d7cd5 <rxe_responder+3253>:        movzbl 0x2d(%rbx),%eax
> 0xffffffffc08d7cd9 <rxe_responder+3257>:        sub    $0x27,%eax
> 0xffffffffc08d7cdc <rxe_responder+3260>:        cmp    $0x3,%al
> 0xffffffffc08d7cde <rxe_responder+3262>:        sbb    %r13d,%r13d
> 0xffffffffc08d7ce1 <rxe_responder+3265>:        and    $0xfffffff0,%r13d
> 0xffffffffc08d7ce5 <rxe_responder+3269>:        add    $0x14,%r13d
> 0xffffffffc08d7ce9 <rxe_responder+3273>:        jmpq
> 0xffffffffc08d70a2 <rxe_responder+130>
> 0xffffffffc08d7cee <rxe_responder+3278>:        mov    %rbp,%rdi
> 0xffffffffc08d7cf1 <rxe_responder+3281>:        callq
> 0xffffffffc08da380 <rxe_elem_release>
> 0xffffffffc08d7cf6 <rxe_responder+3286>:        jmpq
> 0xffffffffc08d7b66 <rxe_responder+2886>
> 0xffffffffc08d7cfb <rxe_responder+3291>:        mov    %rbp,%rdi
> 0xffffffffc08d7cfe <rxe_responder+3294>:        callq
> 0xffffffffc08da380 <rxe_elem_release>
> 0xffffffffc08d7d03 <rxe_responder+3299>:        jmpq
> 0xffffffffc08d7b14 <rxe_responder+2804>
> 0xffffffffc08d7d08 <rxe_responder+3304>:        mov    %rbp,%rdi
> 0xffffffffc08d7d0b <rxe_responder+3307>:        callq
> 0xffffffffc08da380 <rxe_elem_release>
> 0xffffffffc08d7d10 <rxe_responder+3312>:        jmpq
> 0xffffffffc08d7c6c <rxe_responder+3148>
> 0xffffffffc08d7d15 <rxe_responder+3317>:        test   $0x10000,%eax
> 0xffffffffc08d7d1a <rxe_responder+3322>:        je
> 0xffffffffc08d804f <rxe_responder+4143>
> 0xffffffffc08d7d20 <rxe_responder+3328>:        mov    0x24(%rbx),%r12d
> 0xffffffffc08d7d24 <rxe_responder+3332>:        movzbl 0x19f(%r15),%edi
> 0xffffffffc08d7d2c <rxe_responder+3340>:        lea    0x6c0(%r15),%rsi
> 0xffffffffc08d7d33 <rxe_responder+3347>:        mov    %r12d,%edx
> 0xffffffffc08d7d36 <rxe_responder+3350>:        callq
> 0xffffffffc08d6af0 <find_resource>
> 0xffffffffc08d7d3b <rxe_responder+3355>:        test   %rax,%rax
> 0xffffffffc08d7d3e <rxe_responder+3358>:        je
> 0xffffffffc08d8c40 <rxe_responder+7200>
> 0xffffffffc08d7d44 <rxe_responder+3364>:        movzbl 0x2d(%rbx),%edx
> 0xffffffffc08d7d48 <rxe_responder+3368>:        movzbl 0x2e(%rbx),%ecx
> 0xffffffffc08d7d4c <rxe_responder+3372>:        mov    $0xc,%r13d
> 0xffffffffc08d7d52 <rxe_responder+3378>:        mov    0x20(%rax),%rdi
> 0xffffffffc08d7d56 <rxe_responder+3382>:        shl    $0x6,%rdx
> 0xffffffffc08d7d5a <rxe_responder+3386>:        movslq -0x3f715564(%rdx),%rdx
> 0xffffffffc08d7d61 <rxe_responder+3393>:        add    %rdx,%rcx
> 0xffffffffc08d7d64 <rxe_responder+3396>:        add    0x18(%rbx),%rcx
> 0xffffffffc08d7d68 <rxe_responder+3400>:        mov    (%rcx),%rdx
> 0xffffffffc08d7d6b <rxe_responder+3403>:        mov    0xc(%rcx),%esi
> 0xffffffffc08d7d6e <rxe_responder+3406>:        bswap  %rdx
> 0xffffffffc08d7d71 <rxe_responder+3409>:        bswap  %esi
> 0xffffffffc08d7d73 <rxe_responder+3411>:        cmp    %rdi,%rdx
> 0xffffffffc08d7d76 <rxe_responder+3414>:        jb
> 0xffffffffc08d70a2 <rxe_responder+130>
> 0xffffffffc08d7d7c <rxe_responder+3420>:        mov    0x2c(%rax),%r8d
> 0xffffffffc08d7d80 <rxe_responder+3424>:        cmp    %r8d,%esi
> 0xffffffffc08d7d83 <rxe_responder+3427>:        ja
> 0xffffffffc08d70a2 <rxe_responder+130>
> 0xffffffffc08d7d89 <rxe_responder+3433>:        mov    %esi,%r9d
> 0xffffffffc08d7d8c <rxe_responder+3436>:        add    %r8,%rdi
> 0xffffffffc08d7d8f <rxe_responder+3439>:        add    %rdx,%r9
> 0xffffffffc08d7d92 <rxe_responder+3442>:        cmp    %rdi,%r9
> 0xffffffffc08d7d95 <rxe_responder+3445>:        ja
> 0xffffffffc08d70a2 <rxe_responder+130>
> 0xffffffffc08d7d9b <rxe_responder+3451>:        mov    0x8(%rcx),%ecx
> 0xffffffffc08d7d9e <rxe_responder+3454>:        bswap  %ecx
> 0xffffffffc08d7da0 <rxe_responder+3456>:        cmp    0x28(%rax),%ecx
>
> On Wed, Dec 25, 2019 at 1:28 PM Zhu Yanjun <zyjzyj2000@xxxxxxxxx> wrote:
> >
> > Is there any vmcore about this problem?
> >
> > On Wed, Dec 25, 2019 at 1:03 PM Frank Huang <tigerinxm@xxxxxxxxx> wrote:
> > >
> > > hi, there is a panic on rdma_rxe module when the restart
> > > network.service or shutdown the switch.
> > >
> > > it looks like a use-after-free error.
> > >
> > > everytime it happens, there is the log "rdma_rxe: Unknown layer 3 protocol: 0"
> > >
> > > is it a known error?
> > >
> > > my kernel version is 4.14.97
> > >
> > > [448840.314544] rdma_rxe: Unknown layer 3 protocol: 0
> > > [448840.314626] general protection fault: 0000 [#1] SMP PTI
> > > [448840.314627] Modules linked in: binfmt_misc ib_isert
> > > iscsi_target_mod ib_srpt target_core_mod rpcrdma ib_iser ib_srp
> > > scsi_transport_srp rdma_rxe(OE) ib_ipoib ib_umad ip6_udp_tunnel
> > > udp_tunnel rdma_ucm rdma_cm iw_cm ib_cm ib_uverbs ib_core
> > > ebtable_filter ebtables devlink ip6table_filter ip6_tables
> > > ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink iptable_nat
> > > xt_addrtype xt_conntrack br_netfilter bridge stp llc overlay
> > > ip_set_hash_ip ip_set nfnetlink iscsi_tcp libiscsi_tcp libiscsi
> > > scsi_transport_iscsi sch_ingress openvswitch nf_conntrack_ipv6
> > > nf_nat_ipv6 nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4
> > > nf_defrag_ipv6 nf_nat nf_conntrack libcrc32c sunrpc intel_rapl
> > > x86_pkg_temp_thermal intel_powerclamp coretemp vfat fat kvm_intel kvm
> > > irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel
> > > intel_cstate
> > > [448840.314677]  intel_uncore intel_rapl_perf mxm_wmi iTCO_wdt
> > > iTCO_vendor_support ipmi_ssif pcspkr i2c_i801 lpc_ich ipmi_si
> > > ipmi_devintf ipmi_msghandler pcc_cpufreq shpchp wmi ast drm_kms_helper
> > > ttm crc32c_intel drm ixgbe igb mdio ptp pps_core dca i2c_algo_bit
> > > [448840.314700] CPU: 1 PID: 17 Comm: ksoftirqd/1 Tainted: G
> > > OE   4.14.97-el7.centos.x86_64 #1
> > > [448840.314701] Hardware name:  /80010211        , BIOS 3.12 11/27/2018
> > > [448840.314703] task: ffff9ce768af8000 task.stack: ffffbd7c4c6c4000
> > > [448840.314710] RIP: 0010:rxe_elem_release+0xf/0x60 [rdma_rxe]
> > > [448840.314711] RSP: 0018:ffffbd7c4c6c7d28 EFLAGS: 00010246
> > > [448840.314713] RAX: 0000000000000000 RBX: 2917351aae258b92 RCX:
> > > 0000000000000000
> > > [448840.314714] RDX: ffff9cfb3f64ba40 RSI: 000000000000026c RDI:
> > > ffff9cfb3f678008
> > > [448840.314715] RBP: ffff9cfb3f678000 R08: 0000000000000201 R09:
> > > ffffbd7c4df35000
> > > [448840.314716] R10: 0000000000000000 R11: 0000000000000001 R12:
> > > 0000000000000000
> > > [448840.314717] R13: 000000000000001d R14: 0000000000000006 R15:
> > > ffff9cfb3f678000
> > > [448840.314719] FS:  0000000000000000(0000) GS:ffff9ce76f840000(0000)
> > > knlGS:0000000000000000
> > > [448840.314720] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > > [448840.314721] CR2: 00007f4fc400f000 CR3: 000000260420a005 CR4:
> > > 00000000001626e0
> > > [448840.314723] Call Trace:
> > > [448840.314730]  rxe_responder+0xcf0/0x1fe0 [rdma_rxe]
> > > [448840.314738]  ? check_preempt_wakeup+0x125/0x240
> > > [448840.314742]  ? check_preempt_curr+0x84/0x90
> > > [448840.314745]  ? ttwu_do_wakeup+0x19/0x140
> > > [448840.314747]  ? try_to_wake_up+0x54/0x450
> > > [448840.314751]  rxe_do_task+0x8b/0x100 [rdma_rxe]
> > > [448840.314754]  tasklet_action+0xfe/0x110
> > > [448840.314758]  __do_softirq+0xd9/0x2a2
> > > [448840.314761]  run_ksoftirqd+0x1e/0x70
> > > [448840.314763]  smpboot_thread_fn+0x10e/0x160
> > > [448840.314766]  kthread+0xff/0x140
> > > [448840.314768]  ? sort_range+0x20/0x20
> > > [448840.314770]  ? __kthread_parkme+0x90/0x90
> > > [448840.314771]  ret_from_fork+0x35/0x40
> > > [448840.314773] Code: 7a 00 00 74 04 31 c0 eb c3 4c 89 e7 e8 bb f9 ff
> > > ff 31 c0 eb b7 0f 1f 80 00 00 00 00 0f 1f 44 00 00 55 48 8d 6f f8 53
> > > 48 8b 5f f8 <48> 8b 43 20 48 85 c0 74 08 48 89 ef e8 60 1c 53 fb 8b 43
> > > 30 48
> > > [448840.314817] RIP: rxe_elem_release+0xf/0x60 [rdma_rxe] RSP: ffffbd7c4c6c7d28



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux