[resend] Crash in nvmet_req_init() - null req->rsp pointer

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Resending in plain text...

----

Hey guys,

I'm debugging a nvmet_rdma crash on the linux-4.14.52 stable kernel
code.  Under heavy load, including 80 nvmf devices, after 13 hours of
running, I see an Oops [1] when the target is processing a new ingress
nvme command.  It crashes in nvmet_req_init() because req->rsp is NULL:

  493   bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
  494                   struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops)
  495   {
  496           u8 flags = req->cmd->common.flags;
  497           u16 status;
  498
  499           req->cq = cq;
  500           req->sq = sq;
  501           req->ops = ops;
  502           req->sg = NULL;
  503           req->sg_cnt = 0;
  504           req->rsp->status = 0; <-- HERE

The  nvme command opcode is nvme_cmd_write.  The nvmet_rdma_queue state
is NVMET_RDMA_Q_LIVE.  The nvmet_req looks valid [2].  IE not garbage. 
But it seems very bad that req->rsp is NULL! :)

Any thoughts?  I didn't see anything like this in recent nvmf fixes...

Thanks,

Steve.

[1]  Oops stack trace:

[45259.588347] BUG: unable to handle kernel NULL pointer dereference at
000000000000000e
[45259.597112] IP: nvmet_req_init+0x3d/0x120 [nvmet]
[45259.602687] PGD 0 P4D 0
[45259.606083] Oops: 0002 [#1] SMP PTI
[45259.610421] Modules linked in: tcp_lp t4_tom(OE) toecore(OE)
nvmet_rdma nvmet rpcrdma ib_isert iscsi_target_mod ib_iser libiscsi
scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp
ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm
iw_cxgb4(OE) ib_core cxgb4(OE) dm_mirror dm_region_hash dm_log
intel_rapl vfat nls_utf8 fat isofs x86_pkg_temp_thermal intel_powerclamp
coretemp kvm_intel kvm loop irqbypass crct10dif_pclmul crc32_pclmul
ghash_clmulni_intel pcbc aesni_intel crypto_simd glue_helper dm_mod
iTCO_wdt iTCO_vendor_support pcspkr lpc_ich joydev shpchp i2c_i801 sg
ipmi_si wmi ipmi_devintf ipmi_msghandler nfsd auth_rpcgss nfs_acl lockd
grace sunrpc ip_tables ext4 mbcache jbd2 ast drm_kms_helper syscopyarea
sysfillrect sysimgblt fb_sys_fops sd_mod ttm igb drm ptp ahci
[45259.687761]  libahci pps_core libata crc32c_intel dca i2c_algo_bit
i2c_core nvme nvme_core
[45259.697067] CPU: 15 PID: 4259 Comm: kworker/15:1H Tainted:
G           OE   4.14.52 #1
[45259.706042] Hardware name: Supermicro X10SRA-F/X10SRA-F, BIOS 2.0b
06/12/2017
[45259.714204] Workqueue: ib-comp-wq ib_cq_poll_work [ib_core]
[45259.720873] task: ffff8bb54c2c0000 task.stack: ffffadd60a334000
[45259.727802] RIP: 0010:nvmet_req_init+0x3d/0x120 [nvmet]
[45259.734140] RSP: 0000:ffffadd60a337dc8 EFLAGS: 00010246
[45259.740373] RAX: 0000000000000040 RBX: ffff8bb3ecf4f748 RCX:
0000000000000000
[45259.748508] RDX: 0000000000000000 RSI: ffff8bb3ecf4f830 RDI:
ffff8bb3ecf4f748
[45259.756662] RBP: ffff8bb3ecf4f838 R08: 0000000000000000 R09:
ffffadd60a337d4c
[45259.764818] R10: 0000000000000000 R11: 0000000000001000 R12:
ffff8bb3ecf4f748
[45259.773006] R13: 0000000000000000 R14: 0000000000000010 R15:
ffff8bb498ec7400
[45259.781187] FS:  0000000000000000(0000) GS:ffff8bb5575c0000(0000)
knlGS:0000000000000000
[45259.790256] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[45259.796985] CR2: 000000000000000e CR3: 000000070980a003 CR4:
00000000003606e0
[45259.805113] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
0000000000000000
[45259.813256] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
0000000000000400
[45259.821382] Call Trace:
[45259.824794]  nvmet_rdma_handle_command+0xab/0x2a0 [nvmet_rdma]
[45259.831603]  __ib_process_cq+0x71/0xc0 [ib_core]
[45259.837219]  ib_cq_poll_work+0x24/0x60 [ib_core]
[45259.842829]  process_one_work+0x14f/0x360
[45259.847800]  worker_thread+0x47/0x3e0
[45259.852470]  kthread+0xfc/0x130
[45259.856538]  ? max_active_store+0x80/0x80
[45259.861476]  ? __kthread_parkme+0x90/0x90
[45259.866427]  ret_from_fork+0x35/0x40
[45259.870910] Code: 40 01 48 89 77 18 48 89 53 10 48 8b 57 08 48 89 8f
58 01 00 00 31 c9 48 c7 47 28 00 00 00 00 c7 87 38 01 00 00 00 00 00 00
a8 03 <66> 89 4a 0e 75 51 83 e0 c0 3c 40 75 4a 48 8b 47 10 48 8b 10 48
[45259.891681] RIP: nvmet_req_init+0x3d/0x120 [nvmet] RSP: ffffadd60a337dc8
[45259.899369] CR2: 000000000000000e
[45259.903653] ---[ end trace 3bf559cbf4cd1ab5 ]---

[2]  various structs:

crash> nvmet_req ffff8bb3ecf4f748
struct nvmet_req {
  cmd = 0xffff8bb4cfd64b40,
  rsp = 0x0,
  sq = 0xffff8bb3ecf4f838,
  cq = 0xffff8bb3ecf4f830,
  ns = 0xb5000000b92,
  sg = 0x0,
  inline_bio = {
    bi_next = 0x3c0,
    bi_disk = 0x1000f0,
    bi_opf = 0,
    bi_flags = 24216,
    bi_ioprio = 41863,
    bi_write_hint = 0,
    bi_status = 0 '\000',
    bi_partno = 0 '\000',
    bi_phys_segments = 9,
    bi_seg_front_size = 0,
    bi_seg_back_size = 0,
    bi_iter = {
      bi_sector = 0,
      bi_size = 15728648,
      bi_idx = 13500636,
      bi_done = 272,
      bi_bvec_done = 704
    },
    __bi_remaining = {
      counter = 1426761728
    },
    bi_end_io = 0xffff8bb4550aa8ce,
    bi_private = 0x100001050,
    bi_ioc = 0x1,
    bi_css = 0x0,
    {
      bi_integrity = 0x0
    },
    bi_vcnt = 0,
    bi_max_vecs = 0,
    __bi_cnt = {
      counter = 0
    },
    bi_io_vec = 0x0,
    bi_pool = 0x0,
    bi_inline_vecs = 0xffff8bb3ecf4f800
  },
  inline_bvec = {{
      bv_page = 0xffff8bb498ec4c00,
      bv_len = 2832033792,
      bv_offset = 4294937516
    }, {
      bv_page = 0xffff8bb46af86800,
      bv_len = 7,
      bv_offset = 0
    }, {
      bv_page = 0xffff8bb4039c9900,
      bv_len = 0,
      bv_offset = 1
    }, {
      bv_page = 0x200003,
      bv_len = 3463016448,
      bv_offset = 4294937524
    }, {
      bv_page = 0x8000000000000001,
      bv_len = 2822996336,
      bv_offset = 16928
    }, {
      bv_page = 0xffffffffc0c530f0 <nvmet_sq_free>,
      bv_len = 0,
      bv_offset = 0
    }, {
      bv_page = 0x0,
      bv_len = 0,
      bv_offset = 0
    }, {
      bv_page = 0x0,
      bv_len = 2097155,
      bv_offset = 19
    }},
  sg_cnt = 0,
  data_len = 0,
  port = 0xffff8baca8cd6800,
  execute = 0xffff8bb3ecf4f890,
  ops = 0xffffffffc08a3100 <nvmet_rdma_ops>
}
crash> nvmet_rdma_cmd 0xffff8bb50b83d200
struct nvmet_rdma_cmd {
  sge = {{
      addr = 42141633344,
      length = 64,
      lkey = 0
    }, {
      addr = 40797528064,
      length = 4096,
      lkey = 0
    }},
  cqe = {
    done = 0xffffffffc08a05f0 <nvmet_rdma_recv_done>
  },
  wr = {
    next = 0x0,
    {
      wr_id = 18446616208431370784,
      wr_cqe = 0xffff8bb50b83d220
    },
    sg_list = 0xffff8bb50b83d200,
    num_sge = 2
  },
  inline_sg = {
    page_link = 18446694492170806146,
    offset = 0,
    length = 4096,
    dma_address = 0,
    dma_length = 0
  },
  inline_page = 0xffffd2e7e5fee380,
  nvme_cmd = 0xffff8bb4cfd64b40,
  queue = 0xffff8bb3ecf4f800
}
crash> nvme_command.rw 0xffff8bb4cfd64b40
    rw = {
      opcode = 1 '\001',
      flags = 64 '@',
      command_id = 27,
      nsid = 38,
      rsvd2 = 0,
      metadata = 0,
      dptr = {
        {
          prp1 = 30590726144,
          prp2 = 5692787825389355008
        },
        sgl = {
          addr = 30590726144,
          length = 2449489920,
          rsvd = <incomplete sequence \330>,
          type = 79 'O'
        },
        ksgl = {
          addr = 30590726144,
          length = "\000@",
          key = "\222", <incomplete sequence \330>,
          type = 79 'O'
        }
      },
      slba = 1001032,
      length = 31,
      control = 0,
      dsmgmt = 0,
      reftag = 0,
      apptag = 0,
      appmask = 0
    }
crash> nvmet_rdma_queue 0xffff8bb3ecf4f800
struct nvmet_rdma_queue {
  cm_id = 0xffff8bb498ec4c00,
  port = 0xffff8baca8cd6800,
  cq = 0xffff8bb46af86800,
  sq_wr_avail = {
    counter = 7
  },
  dev = 0xffff8bb4039c9900,
  state_lock = {
    {
      rlock = {
        raw_lock = {
          val = {
            counter = 0
          }
        }
      }
    }
  },
  state = NVMET_RDMA_Q_LIVE,
  nvme_cq = {
    qid = 3,
    size = 32
  },
  nvme_sq = {
    ctrl = 0xffff8bb4ce697000,
    ref = {
      count = {
        counter = -9223372036854775807
      },
      percpu_count_ptr = 72708029383024,
      release = 0xffffffffc0c530f0 <nvmet_sq_free>,
      confirm_switch = 0x0,
      force_atomic = false,
      rcu = {
        next = 0x0,
        func = 0x0
      }
    },
    qid = 3,
    size = 32,
    sqhd = 19,
    free_done = {
      done = 0,
      wait = {
        lock = {
          {
            rlock = {
              raw_lock = {
                val = {
                  counter = 0
                }
              }
            }
          }
        },
        head = {
          next = 0xffff8baca8cd6800,
          prev = 0xffff8bb3ecf4f890
        }
      }
    },
    confirm_done = {
      done = 3230281984,
      wait = {
        lock = {
          {
            rlock = {
              raw_lock = {
                val = {
                  counter = 0
                }
              }
            }
          }
        },
        head = {
          next = 0xffff8bb3ecf4f8b0,
          prev = 0xffff8bb3ecf4f8b0
        }
      }
    }
  },
  rsps = 0xffff8bb4cd9c0000,
  free_rsps = {
    next = 0xdead000000000100,
    prev = 0xdead000000000200
  },
  rsps_lock = {
    {
      rlock = {
        raw_lock = {
          val = {
            counter = 0
          }
        }
      }
    }
  },
  cmds = 0xffff8bb50b83d000,
  release_work = {
    data = {
      counter = 68719476704
    },
    entry = {
      next = 0xffff8bb3ecf4f8f0,
      prev = 0xffff8bb3ecf4f8f0
    },
    func = 0xffffffffc089f0d0 <nvmet_rdma_release_queue_work>
  },
  rsp_wait_list = {
    next = 0xffff8bb3ecf4f908,
    prev = 0xffff8bb3ecf4f908
  },
  rsp_wr_wait_list = {
    next = 0xffff8bb3ecf4f918,
    prev = 0xffff8bb3ecf4f918
  },
  rsp_wr_wait_lock = {
    {
      rlock = {
        raw_lock = {
          val = {
            counter = 0
          }
        }
      }
    }
  },
  idx = 151,
  host_qid = 3,
  recv_queue_size = 32,
  send_queue_size = 32,
  queue_list = {
    next = 0xffff8bb464686540,
    prev = 0xffff8bb3ecf4d140
  }
}
crash> ib_wc ffff8bb498ec7400
struct ib_wc {
  {
    wr_id = 18446616208431370784,
    wr_cqe = 0xffff8bb50b83d220
  },
  status = IB_WC_SUCCESS,
  opcode = IB_WC_RECV,
  vendor_err = 0,
  byte_len = 64,
  qp = 0xffff8bb4d9759c00,
  ex = {
    imm_data = 282903233,
    invalidate_rkey = 282903233
  },
  src_qp = 283165377,
  slid = 283427521,
  wc_flags = 0,
  pkey_index = 49857,
  sl = 236 '\354',
  dlid_path_bits = 16 '\020',
  port_num = 193 '\301',
  smac = "\302\360\020\301\302", <incomplete sequence \364>,
  vlan_id = 49857,
  network_hdr_type = 248 '\370'
}






[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux