[PATCH V4 rdma-core 3/5] libhns: Introduce CQ operations referred to hip08 device

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



CQ APIs need to operate doorbell and cqe. the design of doorbell
and cqe in The different hardware is discrepant. Hence, This patch
introduces the CQ operations of hip08 hardware.

Signed-off-by: Lijun Ou <oulijun@xxxxxxxxxx>
Signed-off-by: Wei Hu <xavier.huwei@xxxxxxxxxx>
---
 providers/hns/hns_roce_u_hw_v2.c | 304 +++++++++++++++++++++++++++++++++++++++
 providers/hns/hns_roce_u_hw_v2.h |  75 ++++++++++
 providers/hns/hns_roce_u_verbs.c |  14 +-
 3 files changed, 388 insertions(+), 5 deletions(-)

diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index bf1c3f3..2aecc2b 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -37,6 +37,59 @@
 #include "hns_roce_u_db.h"
 #include "hns_roce_u_hw_v2.h"
 
+static void hns_roce_v2_handle_error_cqe(struct hns_roce_v2_cqe *cqe,
+					 struct ibv_wc *wc)
+{
+	unsigned int status = roce_get_field(cqe->byte_4, CQE_BYTE_4_STATUS_M,
+					     CQE_BYTE_4_STATUS_S);
+
+	fprintf(stderr, PFX "error cqe!\n");
+	switch (status & HNS_ROCE_V2_CQE_STATUS_MASK) {
+	case HNS_ROCE_V2_CQE_LOCAL_LENGTH_ERR:
+		wc->status = IBV_WC_LOC_LEN_ERR;
+		break;
+	case HNS_ROCE_V2_CQE_LOCAL_QP_OP_ERR:
+		wc->status = IBV_WC_LOC_QP_OP_ERR;
+		break;
+	case HNS_ROCE_V2_CQE_LOCAL_PROT_ERR:
+		wc->status = IBV_WC_LOC_PROT_ERR;
+		break;
+	case HNS_ROCE_V2_CQE_WR_FLUSH_ERR:
+		wc->status = IBV_WC_WR_FLUSH_ERR;
+		break;
+	case HNS_ROCE_V2_CQE_MEM_MANAGERENT_OP_ERR:
+		wc->status = IBV_WC_MW_BIND_ERR;
+		break;
+	case HNS_ROCE_V2_CQE_BAD_RESP_ERR:
+		wc->status = IBV_WC_BAD_RESP_ERR;
+		break;
+	case HNS_ROCE_V2_CQE_LOCAL_ACCESS_ERR:
+		wc->status = IBV_WC_LOC_ACCESS_ERR;
+		break;
+	case HNS_ROCE_V2_CQE_REMOTE_INVAL_REQ_ERR:
+		wc->status = IBV_WC_REM_INV_REQ_ERR;
+		break;
+	case HNS_ROCE_V2_CQE_REMOTE_ACCESS_ERR:
+		wc->status = IBV_WC_REM_ACCESS_ERR;
+		break;
+	case HNS_ROCE_V2_CQE_REMOTE_OP_ERR:
+		wc->status = IBV_WC_REM_OP_ERR;
+		break;
+	case HNS_ROCE_V2_CQE_TRANSPORT_RETRY_EXC_ERR:
+		wc->status = IBV_WC_RETRY_EXC_ERR;
+		break;
+	case HNS_ROCE_V2_CQE_RNR_RETRY_EXC_ERR:
+		wc->status = IBV_WC_RNR_RETRY_EXC_ERR;
+		break;
+	case HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR:
+		wc->status = IBV_WC_REM_ABORT_ERR;
+		break;
+	default:
+		wc->status = IBV_WC_GENERAL_ERR;
+		break;
+	}
+}
+
 static struct hns_roce_v2_cqe *get_cqe_v2(struct hns_roce_cq *cq, int entry)
 {
 	return cq->buf.buf + entry * HNS_ROCE_CQE_ENTRY_SIZE;
@@ -50,6 +103,11 @@ static void *get_sw_cqe_v2(struct hns_roce_cq *cq, int n)
 		!!(n & (cq->ibv_cq.cqe + 1))) ? cqe : NULL;
 }
 
+static struct hns_roce_v2_cqe *next_cqe_sw(struct hns_roce_cq *cq)
+{
+	return get_sw_cqe_v2(cq, cq->cons_index);
+}
+
 static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx,
 					     struct hns_roce_cq *cq)
 {
@@ -71,6 +129,17 @@ static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx,
 	hns_roce_write64((uint32_t *)&cq_db, ctx, ROCEE_VF_DB_CFG0_OFFSET);
 }
 
+static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx,
+					       uint32_t qpn)
+{
+	int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
+
+	if (ctx->qp_table[tind].refcnt)
+		return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask];
+	else
+		return NULL;
+}
+
 static void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, uint32_t qpn)
 {
 	int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
@@ -81,6 +150,239 @@ static void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, uint32_t qpn)
 		ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = NULL;
 }
 
+static int hns_roce_v2_poll_one(struct hns_roce_cq *cq,
+				struct hns_roce_qp **cur_qp, struct ibv_wc *wc)
+{
+	uint32_t qpn;
+	int is_send;
+	uint16_t wqe_ctr;
+	uint32_t local_qpn;
+	struct hns_roce_wq *wq = NULL;
+	struct hns_roce_v2_cqe *cqe = NULL;
+
+	/* According to CI, find the relative cqe */
+	cqe = next_cqe_sw(cq);
+	if (!cqe)
+		return V2_CQ_EMPTY;
+
+	/* Get the next cqe, CI will be added gradually */
+	++cq->cons_index;
+
+	udma_from_device_barrier();
+
+	qpn = roce_get_field(cqe->byte_16, CQE_BYTE_16_LCL_QPN_M,
+			     CQE_BYTE_16_LCL_QPN_S);
+
+	is_send = (roce_get_bit(cqe->byte_4, CQE_BYTE_4_S_R_S) ==
+		   HNS_ROCE_V2_CQE_IS_SQ);
+
+	local_qpn = roce_get_field(cqe->byte_16, CQE_BYTE_16_LCL_QPN_M,
+				   CQE_BYTE_16_LCL_QPN_S);
+
+	/* if qp is zero, it will not get the correct qpn */
+	if (!*cur_qp ||
+	   (local_qpn & HNS_ROCE_V2_CQE_QPN_MASK) != (*cur_qp)->ibv_qp.qp_num) {
+
+		*cur_qp = hns_roce_v2_find_qp(to_hr_ctx(cq->ibv_cq.context),
+					      qpn & 0xffffff);
+		if (!*cur_qp) {
+			fprintf(stderr, PFX "can't find qp!\n");
+			return V2_CQ_POLL_ERR;
+		}
+	}
+	wc->qp_num = qpn & 0xffffff;
+
+	if (is_send) {
+		wq = &(*cur_qp)->sq;
+		/*
+		 * if sq_signal_bits is 1, the tail pointer first update to
+		 * the wqe corresponding the current cqe
+		 */
+		if ((*cur_qp)->sq_signal_bits) {
+			wqe_ctr = (uint16_t)(roce_get_field(cqe->byte_4,
+						CQE_BYTE_4_WQE_IDX_M,
+						CQE_BYTE_4_WQE_IDX_S));
+			/*
+			 * wq->tail will plus a positive number every time,
+			 * when wq->tail exceeds 32b, it is 0 and acc
+			 */
+			wq->tail += (wqe_ctr - (uint16_t) wq->tail) &
+				    (wq->wqe_cnt - 1);
+		}
+		/* write the wr_id of wq into the wc */
+		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+		++wq->tail;
+	} else {
+		wq = &(*cur_qp)->rq;
+		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+		++wq->tail;
+	}
+
+	/*
+	 * HW maintains wc status, set the err type and directly return, after
+	 * generated the incorrect CQE
+	 */
+	if (roce_get_field(cqe->byte_4, CQE_BYTE_4_STATUS_M,
+			   CQE_BYTE_4_STATUS_S) != HNS_ROCE_V2_CQE_SUCCESS) {
+		hns_roce_v2_handle_error_cqe(cqe, wc);
+		return V2_CQ_OK;
+	}
+
+	wc->status = IBV_WC_SUCCESS;
+
+	/*
+	 * According to the opcode type of cqe, mark the opcode and other
+	 * information of wc
+	 */
+	if (is_send) {
+		/* Get opcode and flag before update the tail point for send */
+		switch (roce_get_field(cqe->byte_4, CQE_BYTE_4_OPCODE_M,
+			CQE_BYTE_4_OPCODE_S) & HNS_ROCE_V2_CQE_OPCODE_MASK) {
+		case HNS_ROCE_SQ_OP_SEND:
+			wc->opcode = IBV_WC_SEND;
+			wc->wc_flags = 0;
+			break;
+
+		case HNS_ROCE_SQ_OP_SEND_WITH_IMM:
+			wc->opcode = IBV_WC_SEND;
+			wc->wc_flags = IBV_WC_WITH_IMM;
+			break;
+
+		case HNS_ROCE_SQ_OP_SEND_WITH_INV:
+			wc->opcode = IBV_WC_SEND;
+			break;
+
+		case HNS_ROCE_SQ_OP_RDMA_READ:
+			wc->opcode = IBV_WC_RDMA_READ;
+			wc->byte_len = cqe->byte_cnt;
+			wc->wc_flags = 0;
+			break;
+
+		case HNS_ROCE_SQ_OP_RDMA_WRITE:
+			wc->opcode = IBV_WC_RDMA_WRITE;
+			wc->wc_flags = 0;
+			break;
+
+		case HNS_ROCE_SQ_OP_RDMA_WRITE_WITH_IMM:
+			wc->opcode = IBV_WC_RDMA_WRITE;
+			wc->wc_flags = IBV_WC_WITH_IMM;
+			break;
+		case HNS_ROCE_SQ_OP_LOCAL_INV:
+			wc->opcode = IBV_WC_LOCAL_INV;
+			wc->wc_flags = IBV_WC_WITH_INV;
+			break;
+		case HNS_ROCE_SQ_OP_ATOMIC_COMP_AND_SWAP:
+			wc->opcode = IBV_WC_COMP_SWAP;
+			wc->byte_len  = 8;
+			wc->wc_flags = 0;
+			break;
+		case HNS_ROCE_SQ_OP_ATOMIC_FETCH_AND_ADD:
+			wc->opcode = IBV_WC_FETCH_ADD;
+			wc->byte_len  = 8;
+			wc->wc_flags = 0;
+			break;
+		case HNS_ROCE_SQ_OP_BIND_MW:
+			wc->opcode = IBV_WC_BIND_MW;
+			wc->wc_flags = 0;
+			break;
+		default:
+			wc->status = IBV_WC_GENERAL_ERR;
+			wc->wc_flags = 0;
+			break;
+		}
+	} else {
+		/* Get opcode and flag in rq&srq */
+		wc->byte_len = cqe->byte_cnt;
+		switch (roce_get_field(cqe->byte_4, CQE_BYTE_4_OPCODE_M,
+			CQE_BYTE_4_OPCODE_S) & HNS_ROCE_V2_CQE_OPCODE_MASK) {
+		case HNS_ROCE_RECV_OP_RDMA_WRITE_IMM:
+			wc->opcode = IBV_WC_RECV_RDMA_WITH_IMM;
+			wc->wc_flags = IBV_WC_WITH_IMM;
+			wc->imm_data = cqe->rkey_immtdata;
+			break;
+
+		case HNS_ROCE_RECV_OP_SEND:
+			wc->opcode = IBV_WC_RECV;
+			wc->wc_flags = 0;
+			break;
+
+		case HNS_ROCE_RECV_OP_SEND_WITH_IMM:
+			wc->opcode = IBV_WC_RECV;
+			wc->wc_flags = IBV_WC_WITH_IMM;
+			wc->imm_data = cqe->rkey_immtdata;
+			break;
+
+		case HNS_ROCE_RECV_OP_SEND_WITH_INV:
+			wc->opcode = IBV_WC_RECV;
+			wc->wc_flags = IBV_WC_WITH_INV;
+			wc->imm_data = cqe->rkey_immtdata;
+			break;
+		default:
+			wc->status = IBV_WC_GENERAL_ERR;
+			break;
+		}
+	}
+
+	return V2_CQ_OK;
+}
+
+static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
+				 struct ibv_wc *wc)
+{
+	int npolled;
+	int err = V2_CQ_OK;
+	struct hns_roce_qp *qp = NULL;
+	struct hns_roce_cq *cq = to_hr_cq(ibvcq);
+	struct hns_roce_context *ctx = to_hr_ctx(ibvcq->context);
+
+	pthread_spin_lock(&cq->lock);
+
+	for (npolled = 0; npolled < ne; ++npolled) {
+		err = hns_roce_v2_poll_one(cq, &qp, wc + npolled);
+		if (err != V2_CQ_OK)
+			break;
+	}
+
+	if (npolled) {
+		mmio_ordered_writes_hack();
+
+		hns_roce_v2_update_cq_cons_index(ctx, cq);
+	}
+
+	pthread_spin_unlock(&cq->lock);
+
+	return err == V2_CQ_POLL_ERR ? err : npolled;
+}
+
+static int hns_roce_u_v2_arm_cq(struct ibv_cq *ibvcq, int solicited)
+{
+	uint32_t ci;
+	uint32_t solicited_flag;
+	struct hns_roce_v2_cq_db cq_db;
+	struct hns_roce_cq *cq = to_hr_cq(ibvcq);
+
+	ci  = cq->cons_index & ((cq->cq_depth << 1) - 1);
+	solicited_flag = solicited ? HNS_ROCE_V2_CQ_DB_REQ_SOL :
+				     HNS_ROCE_V2_CQ_DB_REQ_NEXT;
+
+	cq_db.byte_4 = 0;
+	cq_db.parameter = 0;
+
+	roce_set_field(cq_db.byte_4, DB_BYTE_4_TAG_M, DB_BYTE_4_TAG_S, cq->cqn);
+	roce_set_field(cq_db.byte_4, DB_BYTE_4_CMD_M, DB_BYTE_4_CMD_S, 0x4);
+
+	roce_set_field(cq_db.parameter, CQ_DB_PARAMETER_CQ_CONSUMER_IDX_M,
+		       CQ_DB_PARAMETER_CQ_CONSUMER_IDX_S, ci);
+
+	roce_set_field(cq_db.parameter, CQ_DB_PARAMETER_CMD_SN_M,
+		       CQ_DB_PARAMETER_CMD_SN_S, 1);
+	roce_set_bit(cq_db.parameter, CQ_DB_PARAMETER_NOTIFY_S, solicited_flag);
+
+	hns_roce_write64((uint32_t *)&cq_db, to_hr_ctx(ibvcq->context),
+			  ROCEE_VF_DB_CFG0_OFFSET);
+	return 0;
+}
+
 static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn,
 				   struct hns_roce_srq *srq)
 {
@@ -226,6 +528,8 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
 
 struct hns_roce_u_hw hns_roce_u_hw_v2 = {
 	.hw_version = HNS_ROCE_HW_VER2,
+	.poll_cq = hns_roce_u_v2_poll_cq,
+	.arm_cq = hns_roce_u_v2_arm_cq,
 	.modify_qp = hns_roce_u_v2_modify_qp,
 	.destroy_qp = hns_roce_u_v2_destroy_qp,
 };
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h
index d7fcf94..238bebf 100644
--- a/providers/hns/hns_roce_u_hw_v2.h
+++ b/providers/hns/hns_roce_u_hw_v2.h
@@ -33,9 +33,84 @@
 #ifndef _HNS_ROCE_U_HW_V2_H
 #define _HNS_ROCE_U_HW_V2_H
 
+#define HNS_ROCE_V2_CQE_IS_SQ			0
+
+#define HNS_ROCE_V2_CQ_DB_REQ_SOL		1
+#define HNS_ROCE_V2_CQ_DB_REQ_NEXT		0
+
 /* V2 REG DEFINITION */
 #define ROCEE_VF_DB_CFG0_OFFSET			0x0230
 
+enum {
+	HNS_ROCE_WQE_OP_SEND = 0x0,
+	HNS_ROCE_WQE_OP_SEND_WITH_INV = 0x1,
+	HNS_ROCE_WQE_OP_SEND_WITH_IMM = 0x2,
+	HNS_ROCE_WQE_OP_RDMA_WRITE = 0x3,
+	HNS_ROCE_WQE_OP_RDMA_WRITE_WITH_IMM = 0x4,
+	HNS_ROCE_WQE_OP_RDMA_READ = 0x5,
+	HNS_ROCE_WQE_OP_ATOMIC_COM_AND_SWAP = 0x6,
+	HNS_ROCE_WQE_OP_ATOMIC_FETCH_AND_ADD = 0x7,
+	HNS_ROCE_WQE_OP_ATOMIC_MASK_COMP_AND_SWAP = 0x8,
+	HNS_ROCE_WQE_OP_ATOMIC_MASK_FETCH_AND_ADD = 0x9,
+	HNS_ROCE_WQE_OP_FAST_REG_PMR = 0xa,
+	HNS_ROCE_WQE_OP_LOCAL_INV = 0xb,
+	HNS_ROCE_WQE_OP_BIND_MW_TYPE = 0xc,
+	HNS_ROCE_WQE_OP_MASK = 0x1f
+};
+
+enum {
+	/* rq operations */
+	HNS_ROCE_RECV_OP_RDMA_WRITE_IMM = 0x0,
+	HNS_ROCE_RECV_OP_SEND = 0x1,
+	HNS_ROCE_RECV_OP_SEND_WITH_IMM = 0x2,
+	HNS_ROCE_RECV_OP_SEND_WITH_INV = 0x3,
+};
+
+enum {
+	HNS_ROCE_SQ_OP_SEND = 0x0,
+	HNS_ROCE_SQ_OP_SEND_WITH_INV = 0x1,
+	HNS_ROCE_SQ_OP_SEND_WITH_IMM = 0x2,
+	HNS_ROCE_SQ_OP_RDMA_WRITE = 0x3,
+	HNS_ROCE_SQ_OP_RDMA_WRITE_WITH_IMM = 0x4,
+	HNS_ROCE_SQ_OP_RDMA_READ = 0x5,
+	HNS_ROCE_SQ_OP_ATOMIC_COMP_AND_SWAP = 0x6,
+	HNS_ROCE_SQ_OP_ATOMIC_FETCH_AND_ADD = 0x7,
+	HNS_ROCE_SQ_OP_ATOMIC_MASK_COMP_AND_SWAP = 0x8,
+	HNS_ROCE_SQ_OP_ATOMIC_MASK_FETCH_AND_ADD = 0x9,
+	HNS_ROCE_SQ_OP_FAST_REG_PMR = 0xa,
+	HNS_ROCE_SQ_OP_LOCAL_INV = 0xb,
+	HNS_ROCE_SQ_OP_BIND_MW = 0xc,
+};
+
+enum {
+	V2_CQ_OK			=  0,
+	V2_CQ_EMPTY			= -1,
+	V2_CQ_POLL_ERR			= -2,
+};
+
+enum {
+	HNS_ROCE_V2_CQE_QPN_MASK	= 0x3ffff,
+	HNS_ROCE_V2_CQE_STATUS_MASK	= 0xff,
+	HNS_ROCE_V2_CQE_OPCODE_MASK	= 0x1f,
+};
+
+enum {
+	HNS_ROCE_V2_CQE_SUCCESS				= 0x00,
+	HNS_ROCE_V2_CQE_LOCAL_LENGTH_ERR		= 0x01,
+	HNS_ROCE_V2_CQE_LOCAL_QP_OP_ERR			= 0x02,
+	HNS_ROCE_V2_CQE_LOCAL_PROT_ERR			= 0x04,
+	HNS_ROCE_V2_CQE_WR_FLUSH_ERR			= 0x05,
+	HNS_ROCE_V2_CQE_MEM_MANAGERENT_OP_ERR		= 0x06,
+	HNS_ROCE_V2_CQE_BAD_RESP_ERR			= 0x10,
+	HNS_ROCE_V2_CQE_LOCAL_ACCESS_ERR		= 0x11,
+	HNS_ROCE_V2_CQE_REMOTE_INVAL_REQ_ERR		= 0x12,
+	HNS_ROCE_V2_CQE_REMOTE_ACCESS_ERR		= 0x13,
+	HNS_ROCE_V2_CQE_REMOTE_OP_ERR			= 0x14,
+	HNS_ROCE_V2_CQE_TRANSPORT_RETRY_EXC_ERR		= 0x15,
+	HNS_ROCE_V2_CQE_RNR_RETRY_EXC_ERR		= 0x16,
+	HNS_ROCE_V2_CQE_REMOTE_ABORTED_ERR		= 0x22,
+};
+
 struct hns_roce_db {
 	unsigned int	byte_4;
 	unsigned int	parameter;
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 8f6c666..64a4ac3 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -197,11 +197,15 @@ static void hns_roce_set_sq_sizes(struct hns_roce_qp *qp,
 
 static int hns_roce_verify_cq(int *cqe, struct hns_roce_context *context)
 {
-	if (*cqe < HNS_ROCE_MIN_CQE_NUM) {
-		fprintf(stderr, "cqe = %d, less than minimum CQE number.\n",
-			*cqe);
-		*cqe = HNS_ROCE_MIN_CQE_NUM;
-	}
+	struct hns_roce_device *hr_dev = to_hr_dev(context->ibv_ctx.device);
+
+	if (hr_dev->hw_version == HNS_ROCE_HW_VER1)
+		if (*cqe < HNS_ROCE_MIN_CQE_NUM) {
+			fprintf(stderr,
+				"cqe = %d, less than minimum CQE number.\n",
+				*cqe);
+			*cqe = HNS_ROCE_MIN_CQE_NUM;
+		}
 
 	if (*cqe > context->max_cqe)
 		return -1;
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux