On Fri, Apr 17, 2020 at 10:12:44AM -0700, Jeff Kirsher wrote: > From: Mustafa Ismail <mustafa.ismail@xxxxxxxxx> > > Implement device supported verb APIs. The supported APIs > vary based on the underlying transport the ibdev is > registered as (i.e. iWARP or RoCEv2). > > Signed-off-by: Mustafa Ismail <mustafa.ismail@xxxxxxxxx> > Signed-off-by: Shiraz Saleem <shiraz.saleem@xxxxxxxxx> > --- > drivers/infiniband/hw/irdma/verbs.c | 4555 +++++++++++++++++++++++ > drivers/infiniband/hw/irdma/verbs.h | 213 ++ > include/uapi/rdma/ib_user_ioctl_verbs.h | 1 + > 3 files changed, 4769 insertions(+) > create mode 100644 drivers/infiniband/hw/irdma/verbs.c > create mode 100644 drivers/infiniband/hw/irdma/verbs.h <...> > +static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) > +{ > + struct irdma_qp *iwqp = to_iwqp(ibqp); > + > + iwqp->destroyed = 1; > + if (iwqp->ibqp_state >= IB_QPS_INIT && iwqp->ibqp_state < IB_QPS_RTS) > + irdma_next_iw_state(iwqp, IRDMA_QP_STATE_ERROR, 0, 0, 0); > + > + if (!iwqp->user_mode) { > + if (iwqp->iwscq) { > + irdma_clean_cqes(iwqp, iwqp->iwscq); > + if (iwqp->iwrcq != iwqp->iwscq) > + irdma_clean_cqes(iwqp, iwqp->iwrcq); > + } > + } > + > + irdma_remove_push_mmap_entries(iwqp); > + irdma_free_lsmm_rsrc(iwqp); > + irdma_rem_ref(&iwqp->ibqp); No, please ensure that call to destroy_qp is kfree QP without any need in reference counting. We need this to move QP allocation to be IB/core responsibility. I hope that all other verbs objects (with MR as exception) follow the same pattern: create->kzalloc->destroy>kfree. > + > + return 0; > +} <...> > + > +/** > + * irdma_create_qp - create qp > + * @ibpd: ptr of pd > + * @init_attr: attributes for qp > + * @udata: user data for create qp > + */ > +static struct ib_qp *irdma_create_qp(struct ib_pd *ibpd, > + struct ib_qp_init_attr *init_attr, > + struct ib_udata *udata) > +{ > + struct irdma_pd *iwpd = to_iwpd(ibpd); > + struct irdma_device *iwdev = to_iwdev(ibpd->device); > + struct irdma_pci_f *rf = iwdev->rf; > + struct irdma_cqp *iwcqp = &rf->cqp; > + struct irdma_qp *iwqp; > + struct irdma_create_qp_req req; > + struct irdma_create_qp_resp uresp = {}; > + struct i40iw_create_qp_resp uresp_gen1 = {}; > + u32 qp_num = 0; > + void *mem; > + enum irdma_status_code ret; > + int err_code = 0; > + int sq_size; > + int rq_size; > + struct irdma_sc_qp *qp; > + struct irdma_sc_dev *dev = &rf->sc_dev; > + struct irdma_uk_attrs *uk_attrs = &dev->hw_attrs.uk_attrs; > + struct irdma_qp_init_info init_info = {}; > + struct irdma_create_qp_info *qp_info; > + struct irdma_cqp_request *cqp_request; > + struct cqp_cmds_info *cqp_info; > + struct irdma_qp_host_ctx_info *ctx_info; > + struct irdma_iwarp_offload_info *iwarp_info; > + struct irdma_roce_offload_info *roce_info; > + struct irdma_udp_offload_info *udp_info; > + unsigned long flags; > + > + if (init_attr->create_flags || > + init_attr->cap.max_inline_data > uk_attrs->max_hw_inline || > + init_attr->cap.max_send_sge > uk_attrs->max_hw_wq_frags || > + init_attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags) > + return ERR_PTR(-EINVAL); > + > + sq_size = init_attr->cap.max_send_wr; > + rq_size = init_attr->cap.max_recv_wr; > + > + init_info.vsi = &iwdev->vsi; > + init_info.qp_uk_init_info.uk_attrs = uk_attrs; > + init_info.qp_uk_init_info.sq_size = sq_size; > + init_info.qp_uk_init_info.rq_size = rq_size; > + init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge; > + init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge; > + init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data; > + > + mem = kzalloc(sizeof(*iwqp), GFP_KERNEL); > + if (!mem) > + return ERR_PTR(-ENOMEM); > + > + iwqp = mem; I'm confused, why do you need "mem" in the first place? > + qp = &iwqp->sc_qp; > + qp->qp_uk.back_qp = (void *)iwqp; > + qp->qp_uk.lock = &iwqp->lock; > + qp->push_idx = IRDMA_INVALID_PUSH_PAGE_INDEX; > + > + iwqp->q2_ctx_mem.size = ALIGN(IRDMA_Q2_BUF_SIZE + IRDMA_QP_CTX_SIZE, > + 256); > + iwqp->q2_ctx_mem.va = dma_alloc_coherent(hw_to_dev(dev->hw), > + iwqp->q2_ctx_mem.size, > + &iwqp->q2_ctx_mem.pa, > + GFP_KERNEL); > + if (!iwqp->q2_ctx_mem.va) { > + err_code = -ENOMEM; > + goto error; > + } > + > + init_info.q2 = iwqp->q2_ctx_mem.va; > + init_info.q2_pa = iwqp->q2_ctx_mem.pa; > + init_info.host_ctx = (void *)init_info.q2 + IRDMA_Q2_BUF_SIZE; > + init_info.host_ctx_pa = init_info.q2_pa + IRDMA_Q2_BUF_SIZE; > + > + if (init_attr->qp_type == IB_QPT_GSI && !rf->ldev.ftype) > + qp_num = 1; > + else > + err_code = irdma_alloc_rsrc(rf, rf->allocated_qps, rf->max_qp, > + &qp_num, &rf->next_qp); > + if (err_code) > + goto error; > + > + iwqp->iwdev = iwdev; > + iwqp->iwpd = iwpd; > + if (init_attr->qp_type == IB_QPT_GSI && !rf->ldev.ftype) > + iwqp->ibqp.qp_num = 1; > + else > + iwqp->ibqp.qp_num = qp_num; > + > + qp = &iwqp->sc_qp; > + iwqp->iwscq = to_iwcq(init_attr->send_cq); > + iwqp->iwrcq = to_iwcq(init_attr->recv_cq); > + iwqp->host_ctx.va = init_info.host_ctx; > + iwqp->host_ctx.pa = init_info.host_ctx_pa; > + iwqp->host_ctx.size = IRDMA_QP_CTX_SIZE; > + > + init_info.pd = &iwpd->sc_pd; > + init_info.qp_uk_init_info.qp_id = iwqp->ibqp.qp_num; > + if (!rdma_protocol_roce(&iwdev->ibdev, 1)) > + init_info.qp_uk_init_info.first_sq_wq = 1; > + iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp; > + init_waitqueue_head(&iwqp->waitq); > + init_waitqueue_head(&iwqp->mod_qp_waitq); > + > + if (rdma_protocol_roce(&iwdev->ibdev, 1)) { > + if (init_attr->qp_type != IB_QPT_RC && > + init_attr->qp_type != IB_QPT_UD && > + init_attr->qp_type != IB_QPT_GSI) { > + err_code = -EINVAL; > + goto error; > + } > + } else { > + if (init_attr->qp_type != IB_QPT_RC) { > + err_code = -EINVAL; > + goto error; > + } > + } > + if (udata) { > + err_code = ib_copy_from_udata(&req, udata, > + min(sizeof(req), udata->inlen)); > + if (err_code) { > + ibdev_dbg(to_ibdev(iwdev), > + "VERBS: ib_copy_from_data fail\n"); > + goto error; > + } > + > + iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx; > + iwqp->user_mode = 1; > + if (req.user_wqe_bufs) { > + struct irdma_ucontext *ucontext = > + rdma_udata_to_drv_context(udata, > + struct irdma_ucontext, > + ibucontext); > + spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); > + iwqp->iwpbl = irdma_get_pbl((unsigned long)req.user_wqe_bufs, > + &ucontext->qp_reg_mem_list); > + spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); > + > + if (!iwqp->iwpbl) { > + err_code = -ENODATA; > + ibdev_dbg(to_ibdev(iwdev), > + "VERBS: no pbl info\n"); > + goto error; > + } > + } > + init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver; > + err_code = irdma_setup_virt_qp(iwdev, iwqp, &init_info); > + } else { > + init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER; > + err_code = irdma_setup_kmode_qp(iwdev, iwqp, &init_info, init_attr); > + } > + > + if (err_code) { > + ibdev_dbg(to_ibdev(iwdev), "VERBS: setup qp failed\n"); > + goto error; > + } > + > + if (rdma_protocol_roce(&iwdev->ibdev, 1)) { > + if (init_attr->qp_type == IB_QPT_RC) { > + init_info.type = IRDMA_QP_TYPE_ROCE_RC; > + init_info.qp_uk_init_info.qp_caps = IRDMA_SEND_WITH_IMM | > + IRDMA_WRITE_WITH_IMM | > + IRDMA_ROCE; > + } else { > + init_info.type = IRDMA_QP_TYPE_ROCE_UD; > + init_info.qp_uk_init_info.qp_caps = IRDMA_SEND_WITH_IMM | > + IRDMA_ROCE; > + } > + } else { > + init_info.type = IRDMA_QP_TYPE_IWARP; > + init_info.qp_uk_init_info.qp_caps = IRDMA_WRITE_WITH_IMM; > + } > + > + ret = dev->iw_priv_qp_ops->qp_init(qp, &init_info); > + if (ret) { > + err_code = -EPROTO; > + ibdev_dbg(to_ibdev(iwdev), "VERBS: qp_init fail\n"); > + goto error; > + } > + > + ctx_info = &iwqp->ctx_info; > + if (rdma_protocol_roce(&iwdev->ibdev, 1)) { > + iwqp->ctx_info.roce_info = &iwqp->roce_info; > + iwqp->ctx_info.udp_info = &iwqp->udp_info; > + udp_info = &iwqp->udp_info; > + udp_info->snd_mss = irdma_roce_mtu(iwdev->vsi.mtu); > + udp_info->cwnd = 0x400; > + udp_info->src_port = 0xc000; > + udp_info->dst_port = ROCE_V2_UDP_DPORT; > + roce_info = &iwqp->roce_info; > + ether_addr_copy(roce_info->mac_addr, iwdev->netdev->dev_addr); > + > + if (init_attr->qp_type == IB_QPT_GSI && !rf->sc_dev.privileged) > + roce_info->is_qp1 = true; > + roce_info->rd_en = true; > + roce_info->wr_rdresp_en = true; > + roce_info->dcqcn_en = true; > + > + roce_info->ack_credits = 0x1E; > + roce_info->ird_size = IRDMA_MAX_ENCODED_IRD_SIZE; > + roce_info->ord_size = dev->hw_attrs.max_hw_ord; > + > + if (!iwqp->user_mode) { > + roce_info->priv_mode_en = true; > + roce_info->fast_reg_en = true; > + roce_info->udprivcq_en = true; > + } > + roce_info->roce_tver = 0; > + } else { > + iwqp->ctx_info.iwarp_info = &iwqp->iwarp_info; > + iwarp_info = &iwqp->iwarp_info; > + ether_addr_copy(iwarp_info->mac_addr, iwdev->netdev->dev_addr); > + iwarp_info->rd_en = true; > + iwarp_info->wr_rdresp_en = true; > + iwarp_info->ecn_en = true; > + > + if (dev->hw_attrs.uk_attrs.hw_rev > IRDMA_GEN_1) > + iwarp_info->ib_rd_en = true; > + if (!iwqp->user_mode) { > + iwarp_info->priv_mode_en = true; > + iwarp_info->fast_reg_en = true; > + } > + iwarp_info->ddp_ver = 1; > + iwarp_info->rdmap_ver = 1; > + ctx_info->iwarp_info_valid = true; > + } > + ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id; > + ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id; > + if (rdma_protocol_roce(&iwdev->ibdev, 1)) { > + ret = dev->iw_priv_qp_ops->qp_setctx_roce(&iwqp->sc_qp, > + iwqp->host_ctx.va, > + ctx_info); > + } else { > + ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp, > + iwqp->host_ctx.va, > + ctx_info); > + ctx_info->iwarp_info_valid = false; > + } > + > + cqp_request = irdma_get_cqp_request(iwcqp, true); > + if (!cqp_request) { > + err_code = -ENOMEM; > + goto error; > + } > + > + cqp_info = &cqp_request->info; > + qp_info = &cqp_request->info.in.u.qp_create.info; > + memset(qp_info, 0, sizeof(*qp_info)); > + qp_info->mac_valid = true; > + qp_info->cq_num_valid = true; > + qp_info->next_iwarp_state = IRDMA_QP_STATE_IDLE; > + > + cqp_info->cqp_cmd = IRDMA_OP_QP_CREATE; > + cqp_info->post_sq = 1; > + cqp_info->in.u.qp_create.qp = qp; > + cqp_info->in.u.qp_create.scratch = (uintptr_t)cqp_request; > + ret = irdma_handle_cqp_op(rf, cqp_request); > + if (ret) { > + ibdev_dbg(to_ibdev(iwdev), "VERBS: CQP-OP QP create fail"); > + err_code = -ENOMEM; > + goto error; > + } > + > + refcount_set(&iwqp->refcnt, 1); > + spin_lock_init(&iwqp->lock); > + spin_lock_init(&iwqp->sc_qp.pfpdu.lock); > + iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0; > + rf->qp_table[qp_num] = iwqp; > + iwqp->max_send_wr = sq_size; > + iwqp->max_recv_wr = rq_size; > + if (udata) { > + /* GEN_1 legacy support with libi40iw */ > + if (iwpd->sc_pd.abi_ver <= 5) { > + uresp_gen1.lsmm = 1; > + uresp_gen1.actual_sq_size = sq_size; > + uresp_gen1.actual_rq_size = rq_size; > + uresp_gen1.qp_id = qp_num; > + uresp_gen1.push_idx = IRDMA_INVALID_PUSH_PAGE_INDEX; > + uresp_gen1.lsmm = 1; > + err_code = ib_copy_to_udata(udata, &uresp_gen1, > + min(sizeof(uresp_gen1), udata->outlen)); > + } else { > + if (rdma_protocol_iwarp(&iwdev->ibdev, 1)) > + uresp.lsmm = 1; > + uresp.actual_sq_size = sq_size; > + uresp.actual_rq_size = rq_size; > + uresp.qp_id = qp_num; > + uresp.qp_caps = qp->qp_uk.qp_caps; > + > + err_code = ib_copy_to_udata(udata, &uresp, > + min(sizeof(uresp), udata->outlen)); > + } > + if (err_code) { > + ibdev_dbg(to_ibdev(iwdev), > + "VERBS: copy_to_udata failed\n"); > + irdma_destroy_qp(&iwqp->ibqp, udata); > + return ERR_PTR(err_code); > + } > + } > + init_completion(&iwqp->sq_drained); > + init_completion(&iwqp->rq_drained); > + return &iwqp->ibqp; > + > +error: > + irdma_free_qp_rsrc(iwdev, iwqp, qp_num); > + > + return ERR_PTR(err_code); > +} > + This function was too long. Thanks