RE: [PATCH for-next v2 1/2] iw_cxgb4: RDMA write with immediate support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hey Jason,

> -----Original Message-----
> From: linux-rdma-owner@xxxxxxxxxxxxxxx <linux-rdma-
> owner@xxxxxxxxxxxxxxx> On Behalf Of Jason Gunthorpe
> Sent: Thursday, July 26, 2018 12:21 PM
> To: Potnuri Bharat Teja <bharat@xxxxxxxxxxx>
> Cc: dledford@xxxxxxxxxx; swise@xxxxxxxxxxxxxxxxxxxxx;
> rajur@xxxxxxxxxxx; linux-rdma@xxxxxxxxxxxxxxx
> Subject: Re: [PATCH for-next v2 1/2] iw_cxgb4: RDMA write with immediate
> support
> 
> On Tue, Jul 17, 2018 at 01:08:17PM +0530, Potnuri Bharat Teja wrote:
> > Adds iw_cxgb4 functionality to support RDMA_WRITE_WITH_IMMEDATE
> opcode.
> >
> > Signed-off-by: Potnuri Bharat Teja <bharat@xxxxxxxxxxx>
> > Signed-off-by: Steve Wise <swise@xxxxxxxxxxxxxxxxxxxxx>
> >  drivers/infiniband/hw/cxgb4/cq.c          | 23 ++++++++++++++++---
> >  drivers/infiniband/hw/cxgb4/qp.c          | 37
++++++++++++++++++++++++--
> -----
> >  drivers/infiniband/hw/cxgb4/t4.h          | 16 ++++++++++++-
> >  drivers/infiniband/hw/cxgb4/t4fw_ri_api.h | 18 ++++++++++++---
> >  include/uapi/rdma/cxgb4-abi.h             |  3 ++-
> >  5 files changed, 81 insertions(+), 16 deletions(-)
> >
> > diff --git a/drivers/infiniband/hw/cxgb4/cq.c
> b/drivers/infiniband/hw/cxgb4/cq.c
> > index 64f70800945b..5792675150e0 100644
> > +++ b/drivers/infiniband/hw/cxgb4/cq.c
> > @@ -816,15 +816,32 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp,
> struct ib_wc *wc)
> >  			wc->byte_len = CQE_LEN(&cqe);
> >  		else
> >  			wc->byte_len = 0;
> > -		wc->opcode = IB_WC_RECV;
> > -		if (CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_INV ||
> > -		    CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) {
> > +
> > +		switch (CQE_OPCODE(&cqe)) {
> > +		case FW_RI_SEND:
> > +			wc->opcode = IB_WC_RECV;
> > +			break;
> > +		case FW_RI_SEND_WITH_INV:
> > +		case FW_RI_SEND_WITH_SE_INV:
> > +			wc->opcode = IB_WC_RECV;
> >  			wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe);
> >  			wc->wc_flags |= IB_WC_WITH_INVALIDATE;
> >  			c4iw_invalidate_mr(qhp->rhp, wc-
> >ex.invalidate_rkey);
> > +			break;
> > +		case FW_RI_WRITE_IMMEDIATE:
> > +			wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
> > +			wc->ex.imm_data = CQE_IMM_DATA(&cqe);
> > +			wc->wc_flags |= IB_WC_WITH_IMM;
> > +			break;
> > +		default:
> > +			pr_err("Unexpected opcode %d in the CQE received
> for QPID=0x%0x\n",
> > +			       CQE_OPCODE(&cqe), CQE_QPID(&cqe));
> > +			ret = -EINVAL;
> > +			goto out;
> >  		}
> >  	} else {
> >  		switch (CQE_OPCODE(&cqe)) {
> > +		case FW_RI_WRITE_IMMEDIATE:
> >  		case FW_RI_RDMA_WRITE:
> >  			wc->opcode = IB_WC_RDMA_WRITE;
> >  			break;
> > diff --git a/drivers/infiniband/hw/cxgb4/qp.c
> b/drivers/infiniband/hw/cxgb4/qp.c
> > index 08dc555942af..88bd67d7dc77 100644
> > +++ b/drivers/infiniband/hw/cxgb4/qp.c
> > @@ -555,7 +555,15 @@ static int build_rdma_write(struct t4_sq *sq,
> union t4_wr *wqe,
> >
> >  	if (wr->num_sge > T4_MAX_SEND_SGE)
> >  		return -EINVAL;
> > -	wqe->write.r2 = 0;
> > +
> > +	/*
> > +	 * iWARP protocol supports 64 bit immediate data but rdma api
> > +	 * limits it to 32bit.
> > +	 */
> > +	if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
> > +		wqe->write.iw_imm_data.ib_imm_data.imm_data32 = wr-
> >ex.imm_data;
> > +	else
> > +		wqe->write.iw_imm_data.ib_imm_data.imm_data32 = 0;
> >  	wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
> >  	wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
> >  	if (wr->num_sge) {
> > @@ -846,6 +854,9 @@ static int ib_to_fw_opcode(int ib_opcode)
> >  	case IB_WR_RDMA_WRITE:
> >  		opcode = FW_RI_RDMA_WRITE;
> >  		break;
> > +	case IB_WR_RDMA_WRITE_WITH_IMM:
> > +		opcode = FW_RI_WRITE_IMMEDIATE;
> > +		break;
> >  	case IB_WR_RDMA_READ:
> >  	case IB_WR_RDMA_READ_WITH_INV:
> >  		opcode = FW_RI_READ_REQ;
> > @@ -964,6 +975,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct
> ib_send_wr *wr,
> >  	enum fw_wr_opcodes fw_opcode = 0;
> >  	enum fw_ri_wr_flags fw_flags;
> >  	struct c4iw_qp *qhp;
> > +	struct c4iw_dev *rhp;
> >  	union t4_wr *wqe = NULL;
> >  	u32 num_wrs;
> >  	struct t4_swsqe *swsqe;
> > @@ -971,6 +983,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct
> ib_send_wr *wr,
> >  	u16 idx = 0;
> >
> >  	qhp = to_c4iw_qp(ibqp);
> > +	rhp = qhp->rhp;
> >  	spin_lock_irqsave(&qhp->lock, flag);
> >
> >  	/*
> > @@ -1015,6 +1028,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct
> ib_send_wr *wr,
> >  				swsqe->opcode = FW_RI_SEND_WITH_INV;
> >  			err = build_rdma_send(&qhp->wq.sq, wqe, wr,
> &len16);
> >  			break;
> > +		case IB_WR_RDMA_WRITE_WITH_IMM:
> > +			if (unlikely(!rhp->rdev.lldi.write_w_imm_support)) {
> > +				err = -EINVAL;
> > +				break;
> > +			}
> > +			fw_flags |=
> FW_RI_RDMA_WRITE_WITH_IMMEDIATE;
> > +			/*FALLTHROUGH*/
> >  		case IB_WR_RDMA_WRITE:
> >  			fw_opcode = FW_RI_RDMA_WRITE_WR;
> >  			swsqe->opcode = FW_RI_RDMA_WRITE;
> > @@ -1025,8 +1045,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct
> ib_send_wr *wr,
> >  			fw_opcode = FW_RI_RDMA_READ_WR;
> >  			swsqe->opcode = FW_RI_READ_REQ;
> >  			if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) {
> > -				c4iw_invalidate_mr(qhp->rhp,
> > -						   wr->sg_list[0].lkey);
> > +				c4iw_invalidate_mr(rhp,
wr->sg_list[0].lkey);
> >  				fw_flags = FW_RI_RDMA_READ_INVALIDATE;
> >  			} else {
> >  				fw_flags = 0;
> > @@ -1042,7 +1061,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct
> ib_send_wr *wr,
> >  			struct c4iw_mr *mhp = to_c4iw_mr(reg_wr(wr)->mr);
> >
> >  			swsqe->opcode = FW_RI_FAST_REGISTER;
> > -			if (qhp->rhp->rdev.lldi.fr_nsmr_tpte_wr_support &&
> > +			if (rhp->rdev.lldi.fr_nsmr_tpte_wr_support &&
> >  			    !mhp->attr.state && mhp->mpl_len <= 2) {
> >  				fw_opcode = FW_RI_FR_NSMR_TPTE_WR;
> >  				build_tpte_memreg(&wqe->fr_tpte,
> reg_wr(wr),
> > @@ -1051,7 +1070,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct
> ib_send_wr *wr,
> >  				fw_opcode = FW_RI_FR_NSMR_WR;
> >  				err = build_memreg(&qhp->wq.sq, wqe,
> reg_wr(wr),
> >  				       mhp, &len16,
> > -				       qhp->rhp-
> >rdev.lldi.ulptx_memwrite_dsgl);
> > +				       rhp->rdev.lldi.ulptx_memwrite_dsgl);
> >  				if (err)
> >  					break;
> >  			}
> > @@ -1064,7 +1083,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct
> ib_send_wr *wr,
> >  			fw_opcode = FW_RI_INV_LSTAG_WR;
> >  			swsqe->opcode = FW_RI_LOCAL_INV;
> >  			err = build_inv_stag(wqe, wr, &len16);
> > -			c4iw_invalidate_mr(qhp->rhp, wr-
> >ex.invalidate_rkey);
> > +			c4iw_invalidate_mr(rhp, wr->ex.invalidate_rkey);
> >  			break;
> >  		default:
> >  			pr_warn("%s post of type=%d TBD!\n", __func__,
> > @@ -1083,7 +1102,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct
> ib_send_wr *wr,
> >  		swsqe->wr_id = wr->wr_id;
> >  		if (c4iw_wr_log) {
> >  			swsqe->sge_ts = cxgb4_read_sge_timestamp(
> > -					qhp->rhp->rdev.lldi.ports[0]);
> > +					rhp->rdev.lldi.ports[0]);
> >  			swsqe->host_time = ktime_get();
> >  		}
> >
> > @@ -1097,7 +1116,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct
> ib_send_wr *wr,
> >  		t4_sq_produce(&qhp->wq, len16);
> >  		idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
> >  	}
> > -	if (!qhp->rhp->rdev.status_page->db_off) {
> > +	if (!rhp->rdev.status_page->db_off) {
> >  		t4_ring_sq_db(&qhp->wq, idx, wqe);
> >  		spin_unlock_irqrestore(&qhp->lock, flag);
> >  	} else {
> > @@ -2092,6 +2111,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd,
> struct ib_qp_init_attr *attrs,
> >  			uresp.flags = C4IW_QPF_ONCHIP;
> >  		} else
> >  			uresp.flags = 0;
> > +		if (rhp->rdev.lldi.write_w_imm_support)
> > +			uresp.flags |= C4IW_QPF_WRITE_W_IMM;
> >  		uresp.qid_mask = rhp->rdev.qpmask;
> >  		uresp.sqid = qhp->wq.sq.qid;
> >  		uresp.sq_size = qhp->wq.sq.size;
> > diff --git a/drivers/infiniband/hw/cxgb4/t4.h
> b/drivers/infiniband/hw/cxgb4/t4.h
> > index 11d55fc2ded7..3fdcc445ff16 100644
> > +++ b/drivers/infiniband/hw/cxgb4/t4.h
> > @@ -190,7 +190,19 @@ struct t4_cqe {
> >  			__be32 abs_rqe_idx;
> >  		} srcqe;
> >  		struct {
> > -			__be64 imm_data;
> > +			__be32 mo;
> > +			__be32 msn;
> > +			/*
> > +			 * Use union for immediate data to be consistent
with
> > +			 * stack's 32 bit data and iWARP spec's 64 bit data.
> > +			 */
> > +			union {
> > +				struct {
> > +					u32 reserved;
> > +					__be32 imm_data32;
> > +				} ib_imm_data;
> > +				__be64 imm_data64;
> 
> You guys should think carefully about this as this choice to put the
> 32 bit version at the end becomes a permanent wire protocol ABI for
> iWarp.
> 
> The definition of 'imm data' is a memory image of what is put into the
> packet, so placing the 32 bit version of this at the end of the array
> on the wire seems like a strange choice.
> 
> If we extend verbs, it will continue to be a memory image semantic,
> and I would expect the 32 bits version to be the upper chunk of the
> array, not the lower chunk.
> 

Are you assuming an LE host?   Which brings up a point that 'imm data' is
not very platform independent.  But I guess putting it in the upper chunk is
fine.



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux