On Tue, Jul 17, 2018 at 01:08:17PM +0530, Potnuri Bharat Teja wrote: > Adds iw_cxgb4 functionality to support RDMA_WRITE_WITH_IMMEDATE opcode. > > Signed-off-by: Potnuri Bharat Teja <bharat@xxxxxxxxxxx> > Signed-off-by: Steve Wise <swise@xxxxxxxxxxxxxxxxxxxxx> > drivers/infiniband/hw/cxgb4/cq.c | 23 ++++++++++++++++--- > drivers/infiniband/hw/cxgb4/qp.c | 37 ++++++++++++++++++++++++------- > drivers/infiniband/hw/cxgb4/t4.h | 16 ++++++++++++- > drivers/infiniband/hw/cxgb4/t4fw_ri_api.h | 18 ++++++++++++--- > include/uapi/rdma/cxgb4-abi.h | 3 ++- > 5 files changed, 81 insertions(+), 16 deletions(-) > > diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c > index 64f70800945b..5792675150e0 100644 > +++ b/drivers/infiniband/hw/cxgb4/cq.c > @@ -816,15 +816,32 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) > wc->byte_len = CQE_LEN(&cqe); > else > wc->byte_len = 0; > - wc->opcode = IB_WC_RECV; > - if (CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_INV || > - CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) { > + > + switch (CQE_OPCODE(&cqe)) { > + case FW_RI_SEND: > + wc->opcode = IB_WC_RECV; > + break; > + case FW_RI_SEND_WITH_INV: > + case FW_RI_SEND_WITH_SE_INV: > + wc->opcode = IB_WC_RECV; > wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe); > wc->wc_flags |= IB_WC_WITH_INVALIDATE; > c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey); > + break; > + case FW_RI_WRITE_IMMEDIATE: > + wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; > + wc->ex.imm_data = CQE_IMM_DATA(&cqe); > + wc->wc_flags |= IB_WC_WITH_IMM; > + break; > + default: > + pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n", > + CQE_OPCODE(&cqe), CQE_QPID(&cqe)); > + ret = -EINVAL; > + goto out; > } > } else { > switch (CQE_OPCODE(&cqe)) { > + case FW_RI_WRITE_IMMEDIATE: > case FW_RI_RDMA_WRITE: > wc->opcode = IB_WC_RDMA_WRITE; > break; > diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c > index 08dc555942af..88bd67d7dc77 100644 > +++ b/drivers/infiniband/hw/cxgb4/qp.c > @@ -555,7 +555,15 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, > > if (wr->num_sge > T4_MAX_SEND_SGE) > return -EINVAL; > - wqe->write.r2 = 0; > + > + /* > + * iWARP protocol supports 64 bit immediate data but rdma api > + * limits it to 32bit. > + */ > + if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) > + wqe->write.iw_imm_data.ib_imm_data.imm_data32 = wr->ex.imm_data; > + else > + wqe->write.iw_imm_data.ib_imm_data.imm_data32 = 0; > wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); > wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); > if (wr->num_sge) { > @@ -846,6 +854,9 @@ static int ib_to_fw_opcode(int ib_opcode) > case IB_WR_RDMA_WRITE: > opcode = FW_RI_RDMA_WRITE; > break; > + case IB_WR_RDMA_WRITE_WITH_IMM: > + opcode = FW_RI_WRITE_IMMEDIATE; > + break; > case IB_WR_RDMA_READ: > case IB_WR_RDMA_READ_WITH_INV: > opcode = FW_RI_READ_REQ; > @@ -964,6 +975,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, > enum fw_wr_opcodes fw_opcode = 0; > enum fw_ri_wr_flags fw_flags; > struct c4iw_qp *qhp; > + struct c4iw_dev *rhp; > union t4_wr *wqe = NULL; > u32 num_wrs; > struct t4_swsqe *swsqe; > @@ -971,6 +983,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, > u16 idx = 0; > > qhp = to_c4iw_qp(ibqp); > + rhp = qhp->rhp; > spin_lock_irqsave(&qhp->lock, flag); > > /* > @@ -1015,6 +1028,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, > swsqe->opcode = FW_RI_SEND_WITH_INV; > err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16); > break; > + case IB_WR_RDMA_WRITE_WITH_IMM: > + if (unlikely(!rhp->rdev.lldi.write_w_imm_support)) { > + err = -EINVAL; > + break; > + } > + fw_flags |= FW_RI_RDMA_WRITE_WITH_IMMEDIATE; > + /*FALLTHROUGH*/ > case IB_WR_RDMA_WRITE: > fw_opcode = FW_RI_RDMA_WRITE_WR; > swsqe->opcode = FW_RI_RDMA_WRITE; > @@ -1025,8 +1045,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, > fw_opcode = FW_RI_RDMA_READ_WR; > swsqe->opcode = FW_RI_READ_REQ; > if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) { > - c4iw_invalidate_mr(qhp->rhp, > - wr->sg_list[0].lkey); > + c4iw_invalidate_mr(rhp, wr->sg_list[0].lkey); > fw_flags = FW_RI_RDMA_READ_INVALIDATE; > } else { > fw_flags = 0; > @@ -1042,7 +1061,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, > struct c4iw_mr *mhp = to_c4iw_mr(reg_wr(wr)->mr); > > swsqe->opcode = FW_RI_FAST_REGISTER; > - if (qhp->rhp->rdev.lldi.fr_nsmr_tpte_wr_support && > + if (rhp->rdev.lldi.fr_nsmr_tpte_wr_support && > !mhp->attr.state && mhp->mpl_len <= 2) { > fw_opcode = FW_RI_FR_NSMR_TPTE_WR; > build_tpte_memreg(&wqe->fr_tpte, reg_wr(wr), > @@ -1051,7 +1070,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, > fw_opcode = FW_RI_FR_NSMR_WR; > err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), > mhp, &len16, > - qhp->rhp->rdev.lldi.ulptx_memwrite_dsgl); > + rhp->rdev.lldi.ulptx_memwrite_dsgl); > if (err) > break; > } > @@ -1064,7 +1083,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, > fw_opcode = FW_RI_INV_LSTAG_WR; > swsqe->opcode = FW_RI_LOCAL_INV; > err = build_inv_stag(wqe, wr, &len16); > - c4iw_invalidate_mr(qhp->rhp, wr->ex.invalidate_rkey); > + c4iw_invalidate_mr(rhp, wr->ex.invalidate_rkey); > break; > default: > pr_warn("%s post of type=%d TBD!\n", __func__, > @@ -1083,7 +1102,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, > swsqe->wr_id = wr->wr_id; > if (c4iw_wr_log) { > swsqe->sge_ts = cxgb4_read_sge_timestamp( > - qhp->rhp->rdev.lldi.ports[0]); > + rhp->rdev.lldi.ports[0]); > swsqe->host_time = ktime_get(); > } > > @@ -1097,7 +1116,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, > t4_sq_produce(&qhp->wq, len16); > idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); > } > - if (!qhp->rhp->rdev.status_page->db_off) { > + if (!rhp->rdev.status_page->db_off) { > t4_ring_sq_db(&qhp->wq, idx, wqe); > spin_unlock_irqrestore(&qhp->lock, flag); > } else { > @@ -2092,6 +2111,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, > uresp.flags = C4IW_QPF_ONCHIP; > } else > uresp.flags = 0; > + if (rhp->rdev.lldi.write_w_imm_support) > + uresp.flags |= C4IW_QPF_WRITE_W_IMM; > uresp.qid_mask = rhp->rdev.qpmask; > uresp.sqid = qhp->wq.sq.qid; > uresp.sq_size = qhp->wq.sq.size; > diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h > index 11d55fc2ded7..3fdcc445ff16 100644 > +++ b/drivers/infiniband/hw/cxgb4/t4.h > @@ -190,7 +190,19 @@ struct t4_cqe { > __be32 abs_rqe_idx; > } srcqe; > struct { > - __be64 imm_data; > + __be32 mo; > + __be32 msn; > + /* > + * Use union for immediate data to be consistent with > + * stack's 32 bit data and iWARP spec's 64 bit data. > + */ > + union { > + struct { > + u32 reserved; > + __be32 imm_data32; > + } ib_imm_data; > + __be64 imm_data64; You guys should think carefully about this as this choice to put the 32 bit version at the end becomes a permanent wire protocol ABI for iWarp. The definition of 'imm data' is a memory image of what is put into the packet, so placing the 32 bit version of this at the end of the array on the wire seems like a strange choice. If we extend verbs, it will continue to be a memory image semantic, and I would expect the 32 bits version to be the upper chunk of the array, not the lower chunk. Jason -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html