[PATCH rdma-core 07/11] libbnxtre: Enable UD control path and wqe posting

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch adds infrastructure needed to enable unreliable
datagram control path. It also adds support to allow posting
Send WQEs to UD QPs. Following are the major changes:

 - Mmap the shared page exported from kernel driver to
   read AH-ID from kernel space.
 - Adds support to create-AH and destroy-AH.
 - Add support to allow posting UD WQEs.
 - Do not use search-psn memory for UD QPs.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@xxxxxxxxxxxx>
Signed-off-by: Somnath Kotur <somnath.kotur@xxxxxxxxxxxx>
Signed-off-by: Selvin Xavier <selvin.xavier@xxxxxxxxxxxx>
Signed-off-by: Devesh Sharma <devesh.sharma@xxxxxxxxxxxx>
---
 providers/bnxtre/abi.h   |  7 ++++
 providers/bnxtre/main.c  | 17 +++++++++
 providers/bnxtre/main.h  | 13 +++++++
 providers/bnxtre/verbs.c | 98 +++++++++++++++++++++++++++++++++++++++---------
 4 files changed, 117 insertions(+), 18 deletions(-)

diff --git a/providers/bnxtre/abi.h b/providers/bnxtre/abi.h
index f660d13..d77bb38 100644
--- a/providers/bnxtre/abi.h
+++ b/providers/bnxtre/abi.h
@@ -176,6 +176,13 @@ enum bnxt_re_ud_flags_mask {
 	BNXT_RE_UD_FLAGS_ROCE_IPV6	= 0x03
 };
 
+enum bnxt_re_shpg_offt {
+	BNXT_RE_SHPG_BEG_RESV_OFFT	= 0x00,
+	BNXT_RE_SHPG_AVID_OFFT		= 0x10,
+	BNXT_RE_SHPG_AVID_SIZE		= 0x04,
+	BNXT_RE_SHPG_END_RESV_OFFT	= 0xFF0
+};
+
 struct bnxt_re_db_hdr {
 	__u32 indx;
 	__u32 typ_qid; /* typ: 4, qid:20*/
diff --git a/providers/bnxtre/main.c b/providers/bnxtre/main.c
index effb3b6..c362e72 100644
--- a/providers/bnxtre/main.c
+++ b/providers/bnxtre/main.c
@@ -134,18 +134,35 @@ static int bnxt_re_init_context(struct verbs_device *vdev,
 	dev->cqe_size = resp.cqe_size;
 	dev->max_cq_depth = resp.max_cqd;
 	pthread_spin_init(&cntx->fqlock, PTHREAD_PROCESS_PRIVATE);
+	/* mmap shared page. */
+	cntx->shpg = mmap(NULL, dev->pg_size, PROT_READ | PROT_WRITE,
+			  MAP_SHARED, cmd_fd, 0);
+	if (cntx->shpg == MAP_FAILED) {
+		cntx->shpg = NULL;
+		goto failed;
+	}
+	pthread_mutex_init(&cntx->shlock, NULL);
+
 	ibvctx->ops = bnxt_re_cntx_ops;
 
 	return 0;
+failed:
+	fprintf(stderr, DEV "Failed to allocate context for device\n");
+	return errno;
 }
 
 static void bnxt_re_uninit_context(struct verbs_device *vdev,
 				   struct ibv_context *ibvctx)
 {
+	struct bnxt_re_dev *dev;
 	struct bnxt_re_context *cntx;
 
+	dev = to_bnxt_re_dev(&vdev->device);
 	cntx = to_bnxt_re_context(ibvctx);
 	/* Unmap if anything device specific was mapped in init_context. */
+	pthread_mutex_destroy(&cntx->shlock);
+	if (cntx->shpg)
+		munmap(cntx->shpg, dev->pg_size);
 	pthread_spin_destroy(&cntx->fqlock);
 }
 
diff --git a/providers/bnxtre/main.h b/providers/bnxtre/main.h
index 5526bc6..c3689a5 100644
--- a/providers/bnxtre/main.h
+++ b/providers/bnxtre/main.h
@@ -122,6 +122,12 @@ struct bnxt_re_mr {
 	struct ibv_mr ibvmr;
 };
 
+struct bnxt_re_ah {
+	struct ibv_ah ibvah;
+	struct bnxt_re_pd *pd;
+	uint32_t avid;
+};
+
 struct bnxt_re_dev {
 	struct verbs_device vdev;
 	uint8_t abi_version;
@@ -137,6 +143,8 @@ struct bnxt_re_context {
 	uint32_t max_qp;
 	uint32_t max_srq;
 	struct bnxt_re_dpi udpi;
+	void *shpg;
+	pthread_mutex_t shlock;
 	pthread_spinlock_t fqlock;
 };
 
@@ -174,6 +182,11 @@ static inline struct bnxt_re_qp *to_bnxt_re_qp(struct ibv_qp *ibvqp)
 	return container_of(ibvqp, struct bnxt_re_qp, ibvqp);
 }
 
+static inline struct bnxt_re_ah *to_bnxt_re_ah(struct ibv_ah *ibvah)
+{
+	return container_of(ibvah, struct bnxt_re_ah, ibvah);
+}
+
 static inline uint8_t bnxt_re_ibv_to_bnxt_wr_opcd(uint8_t ibv_opcd)
 {
 	uint8_t bnxt_opcd;
diff --git a/providers/bnxtre/verbs.c b/providers/bnxtre/verbs.c
index adb9b23..de9bec8 100644
--- a/providers/bnxtre/verbs.c
+++ b/providers/bnxtre/verbs.c
@@ -710,9 +710,6 @@ static int bnxt_re_check_qp_limits(struct bnxt_re_context *cntx,
 	struct ibv_device_attr devattr;
 	int ret;
 
-	if (attr->qp_type == IBV_QPT_UD)
-		return -ENOSYS;
-
 	ret = bnxt_re_query_device(&cntx->ibvctx, &devattr);
 	if (ret)
 		return ret;
@@ -788,6 +785,11 @@ static int bnxt_re_alloc_queues(struct bnxt_re_qp *qp,
 		psn_depth++;
 
 	que->depth += psn_depth;
+	/* PSN-search memory is allocated without checking for
+	 * QP-Type. Kenrel driver do not map this memory if it
+	 * is UD-qp. UD-qp use this memory to maintain WC-opcode.
+	 * See definition of bnxt_re_fill_psns() for the use case.
+	 */
 	ret = bnxt_re_alloc_aligned(qp->sqq, pg_size);
 	if (ret)
 		return ret;
@@ -1013,18 +1015,18 @@ static void bnxt_re_fill_psns(struct bnxt_re_qp *qp, struct bnxt_re_psns *psns,
 	uint32_t pkt_cnt = 0, nxt_psn;
 
 	memset(psns, 0, sizeof(*psns));
-	psns->opc_spsn = qp->sq_psn & BNXT_RE_PSNS_SPSN_MASK;
+	if (qp->qptyp == IBV_QPT_RC) {
+		psns->opc_spsn = qp->sq_psn & BNXT_RE_PSNS_SPSN_MASK;
+		pkt_cnt = (len / qp->mtu);
+		if (len % qp->mtu)
+			pkt_cnt++;
+		nxt_psn = ((qp->sq_psn + pkt_cnt) & BNXT_RE_PSNS_NPSN_MASK);
+		psns->flg_npsn = nxt_psn;
+		qp->sq_psn = nxt_psn;
+	}
 	opcode = bnxt_re_ibv_wr_to_wc_opcd(opcode);
 	psns->opc_spsn |= ((opcode & BNXT_RE_PSNS_OPCD_MASK) <<
 			    BNXT_RE_PSNS_OPCD_SHIFT);
-
-	pkt_cnt = (len / qp->mtu);
-	if (len % qp->mtu)
-		pkt_cnt++;
-	nxt_psn = ((qp->sq_psn + pkt_cnt) & BNXT_RE_PSNS_NPSN_MASK);
-	psns->flg_npsn = nxt_psn;
-	qp->sq_psn = nxt_psn;
-
 	*(uint64_t *)psns = htole64(*(uint64_t *)psns);
 }
 
@@ -1066,10 +1068,26 @@ static int bnxt_re_build_send_sqe(struct bnxt_re_qp *qp, void *wqe,
 	qesize += (bnxt_re_get_sqe_hdr_sz() >> 4);
 	hdr->rsv_ws_fl_wt |= (qesize & BNXT_RE_HDR_WS_MASK) <<
 			      BNXT_RE_HDR_WS_SHIFT;
-#if 0
-	if (qp_typ == IBV_QPT_UD) {
+	return len;
+}
+
+static int bnxt_re_build_ud_sqe(struct bnxt_re_qp *qp, void *wqe,
+				struct ibv_send_wr *wr, uint8_t is_inline)
+{
+	struct bnxt_re_send *sqe = ((void *)wqe + sizeof(struct bnxt_re_bsqe));
+	struct bnxt_re_ah *ah;
+	uint32_t len;
+
+	len = bnxt_re_build_send_sqe(qp, wqe, wr, is_inline);
+	sqe->qkey = wr->wr.ud.remote_qkey;
+	sqe->dst_qp = wr->wr.ud.remote_qpn;
+	if (!wr->wr.ud.ah) {
+		len = -EINVAL;
+		goto bail;
 	}
-#endif
+	ah = to_bnxt_re_ah(wr->wr.ud.ah);
+	sqe->avid = ah->avid & 0xFFFFF;
+bail:
 	return len;
 }
 
@@ -1134,9 +1152,14 @@ int bnxt_re_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
 		case IBV_WR_SEND_WITH_IMM:
 			hdr->key_immd = wr->imm_data;
 		case IBV_WR_SEND:
-			bytes = bnxt_re_build_send_sqe(qp, sqe, wr, is_inline);
+			if (qp->qptyp == IBV_QPT_UD)
+				bytes = bnxt_re_build_ud_sqe(qp, sqe, wr,
+							     is_inline);
+			else
+				bytes = bnxt_re_build_send_sqe(qp, sqe, wr,
+							       is_inline);
 			if (bytes < 0)
-				ret = ENOMEM;
+				ret = (bytes == -EINVAL) ? EINVAL : ENOMEM;
 			break;
 		case IBV_WR_RDMA_WRITE_WITH_IMM:
 			hdr->key_immd = wr->imm_data;
@@ -1277,10 +1300,49 @@ int bnxt_re_post_srq_recv(struct ibv_srq *ibvsrq, struct ibv_recv_wr *wr,
 
 struct ibv_ah *bnxt_re_create_ah(struct ibv_pd *ibvpd, struct ibv_ah_attr *attr)
 {
+	struct bnxt_re_pd *pd;
+	struct bnxt_re_context *uctx;
+	struct bnxt_re_ah *ah;
+	struct ibv_create_ah_resp resp;
+	int status;
+
+	pd = to_bnxt_re_pd(ibvpd);
+	uctx = to_bnxt_re_context(ibvpd->context);
+
+	ah = calloc(1, sizeof(struct bnxt_re_ah));
+	if (!ah)
+		goto failed;
+
+	ah->pd = pd;
+	pthread_mutex_lock(&uctx->shlock);
+	memset(&resp, 0, sizeof(resp));
+	status = ibv_cmd_create_ah(ibvpd, &ah->ibvah, attr,
+				   &resp, sizeof(resp));
+	if (status) {
+		pthread_mutex_unlock(&uctx->shlock);
+		free(ah);
+		goto failed;
+	}
+	/* read AV ID now. */
+	rmb();
+	ah->avid = *(uint32_t *)(uctx->shpg + BNXT_RE_SHPG_AVID_OFFT);
+	pthread_mutex_unlock(&uctx->shlock);
+
+	return &ah->ibvah;
+failed:
 	return NULL;
 }
 
 int bnxt_re_destroy_ah(struct ibv_ah *ibvah)
 {
-	return -ENOSYS;
+	struct bnxt_re_ah *ah;
+	int status;
+
+	ah = to_bnxt_re_ah(ibvah);
+	status = ibv_cmd_destroy_ah(ibvah);
+	if (status)
+		return status;
+	free(ah);
+
+	return 0;
 }
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux