Add the userspace verbs implementation related header files: 'erdma_hw.h' for hardware interface definitions, 'erdma_verbs.h' for verbs related definitions and 'erdma_db.h' for doorbell records related definitions. Signed-off-by: Cheng Xu <chengyou@xxxxxxxxxxxxxxxxx> --- providers/erdma/erdma_db.h | 17 +++ providers/erdma/erdma_hw.h | 206 ++++++++++++++++++++++++++++++++++ providers/erdma/erdma_verbs.h | 134 ++++++++++++++++++++++ 3 files changed, 357 insertions(+) create mode 100644 providers/erdma/erdma_db.h create mode 100644 providers/erdma/erdma_hw.h create mode 100644 providers/erdma/erdma_verbs.h diff --git a/providers/erdma/erdma_db.h b/providers/erdma/erdma_db.h new file mode 100644 index 00000000..c302cb7a --- /dev/null +++ b/providers/erdma/erdma_db.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 or OpenIB.org BSD (MIT) See COPYING file */ +/* + * Authors: Cheng Xu <chengyou@xxxxxxxxxxxxxxxxx> + * Copyright (c) 2020-2021, Alibaba Group. + */ + +#ifndef __ERDMA_DB_H__ +#define __ERDMA_DB_H__ + +#include <inttypes.h> + +#include "erdma.h" + +uint64_t *erdma_alloc_dbrecords(struct erdma_context *ctx); +void erdma_dealloc_dbrecords(struct erdma_context *ctx, uint64_t *dbrecords); + +#endif diff --git a/providers/erdma/erdma_hw.h b/providers/erdma/erdma_hw.h new file mode 100644 index 00000000..f373f7f8 --- /dev/null +++ b/providers/erdma/erdma_hw.h @@ -0,0 +1,206 @@ +/* SPDX-License-Identifier: GPL-2.0 or OpenIB.org BSD (MIT) See COPYING file */ +/* + * Authors: Cheng Xu <chengyou@xxxxxxxxxxxxxxxxx> + * Copyright (c) 2020-2021, Alibaba Group. + */ + +#ifndef __ERDMA_HW_H__ +#define __ERDMA_HW_H__ + +#include <stdint.h> + +#define ERDMA_SDB_PAGE 0 +#define ERDMA_SDB_ENTRY 1 +#define ERDMA_SDB_SHARED 2 + +#define ERDMA_SQDB_SIZE 128 +#define ERDMA_CQDB_SIZE 8 +#define ERDMA_RQDB_SIZE 8 +#define ERDMA_RQDB_SPACE_SIZE 32 + +/* WQE related. */ +#define EQE_SIZE 16 +#define EQE_SHIFT 4 +#define RQE_SIZE 32 +#define RQE_SHIFT 5 +#define CQE_SIZE 32 +#define CQE_SHIFT 5 +#define SQEBB_SIZE 32 +#define SQEBB_SHIFT 5 +#define SQEBB_MASK (~(SQEBB_SIZE - 1)) +#define SQEBB_ALIGN(size) ((size + SQEBB_SIZE - 1) & SQEBB_MASK) +#define SQEBB_COUNT(size) (SQEBB_ALIGN(size) >> SQEBB_SHIFT) + +#define MAX_WQEBB_PER_SQE 4 + +enum erdma_opcode { + ERDMA_OP_WRITE = 0, + ERDMA_OP_READ = 1, + ERDMA_OP_SEND = 2, + ERDMA_OP_SEND_WITH_IMM = 3, + + ERDMA_OP_RECEIVE = 4, + ERDMA_OP_RECV_IMM = 5, + ERDMA_OP_RECV_INV = 6, + + ERDMA_OP_REQ_ERR = 7, + ERDNA_OP_READ_RESPONSE = 8, + ERDMA_OP_WRITE_WITH_IMM = 9, + + ERDMA_OP_RECV_ERR = 10, + + ERDMA_OP_INVALIDATE = 11, + ERDMA_OP_RSP_SEND_IMM = 12, + ERDMA_OP_SEND_WITH_INV = 13, + + ERDMA_OP_REG_MR = 14, + ERDMA_OP_LOCAL_INV = 15, + ERDMA_OP_READ_WITH_INV = 16, + ERDMA_NUM_OPCODES = 17, + ERDMA_OP_INVALID = ERDMA_NUM_OPCODES + 1 +}; + +/* + * Inline data are kept within the work request itself occupying + * the space of sge[1] .. sge[n]. Therefore, inline data cannot be + * supported if ERDMA_MAX_SGE is below 2 elements. + */ +#define ERDMA_MAX_INLINE (sizeof(struct erdma_sge) * (ERDMA_MAX_SEND_SGE)) + +enum erdma_wc_status { + ERDMA_WC_SUCCESS = 0, + ERDMA_WC_GENERAL_ERR = 1, + ERDMA_WC_RECV_WQE_FORMAT_ERR = 2, + ERDMA_WC_RECV_STAG_INVALID_ERR = 3, + ERDMA_WC_RECV_ADDR_VIOLATION_ERR = 4, + ERDMA_WC_RECV_RIGHT_VIOLATION_ERR = 5, + ERDMA_WC_RECV_PDID_ERR = 6, + ERDMA_WC_RECV_WARRPING_ERR = 7, + ERDMA_WC_SEND_WQE_FORMAT_ERR = 8, + ERDMA_WC_SEND_WQE_ORD_EXCEED = 9, + ERDMA_WC_SEND_STAG_INVALID_ERR = 10, + ERDMA_WC_SEND_ADDR_VIOLATION_ERR = 11, + ERDMA_WC_SEND_RIGHT_VIOLATION_ERR = 12, + ERDMA_WC_SEND_PDID_ERR = 13, + ERDMA_WC_SEND_WARRPING_ERR = 14, + ERDMA_WC_FLUSH_ERR = 15, + ERDMA_WC_RETRY_EXC_ERR = 16, + ERDMA_NUM_WC_STATUS +}; + +enum erdma_vendor_err { + ERDMA_WC_VENDOR_NO_ERR = 0, + ERDMA_WC_VENDOR_INVALID_RQE = 1, + ERDMA_WC_VENDOR_RQE_INVALID_STAG = 2, + ERDMA_WC_VENDOR_RQE_ADDR_VIOLATION = 3, + ERDMA_WC_VENDOR_RQE_ACCESS_RIGHT_ERR = 4, + ERDMA_WC_VENDOR_RQE_INVALID_PD = 5, + ERDMA_WC_VENDOR_RQE_WRAP_ERR = 6, + ERDMA_WC_VENDOR_INVALID_SQE = 0x20, + ERDMA_WC_VENDOR_ZERO_ORD = 0x21, + ERDMA_WC_VENDOR_SQE_INVALID_STAG = 0x30, + ERDMA_WC_VENDOR_SQE_ADDR_VIOLATION = 0x31, + ERDMA_WC_VENDOR_SQE_ACCESS_ERR = 0x32, + ERDMA_WC_VENDOR_SQE_INVALID_PD = 0x33, + ERDMA_WC_VENDOR_SQE_WARP_ERR = 0x34 +}; + + +/* Doorbell related. */ +#define ERDMA_CQDB_EQN_MASK GENMASK_ULL(63, 56) +#define ERDMA_CQDB_CQN_MASK GENMASK_ULL(55, 32) +#define ERDMA_CQDB_ARM_MASK BIT_ULL(31) +#define ERDMA_CQDB_SOL_MASK BIT_ULL(30) +#define ERDMA_CQDB_CMDSN_MASK GENMASK_ULL(29, 28) +#define ERDMA_CQDB_CI_MASK GENMASK_ULL(23, 0) + +#define ERDMA_CQE_QTYPE_SQ 0 +#define ERDMA_CQE_QTYPE_RQ 1 +#define ERDMA_CQE_QTYPE_CMDQ 2 + +/* CQE hdr */ +#define ERDMA_CQE_HDR_OWNER_MASK BIT(31) +#define ERDMA_CQE_HDR_OPCODE_MASK GENMASK(23, 16) +#define ERDMA_CQE_HDR_QTYPE_MASK GENMASK(15, 8) +#define ERDMA_CQE_HDR_SYNDROME_MASK GENMASK(7, 0) + +#define ERDMA_CQE_QTYPE_SQ 0 +#define ERDMA_CQE_QTYPE_RQ 1 +#define ERDMA_CQE_QTYPE_CMDQ 2 + + +struct erdma_cqe { + __be32 hdr; + __be32 qe_idx; + __be32 qpn; + __le32 imm_data; + __be32 size; + __be32 rsvd[3]; +}; + +struct erdma_sge { + __aligned_le64 laddr; + __le32 length; + __le32 lkey; +}; + +/* Receive Queue Element */ +struct erdma_rqe { + __le16 qe_idx; + __le16 rsvd; + __le32 qpn; + __le32 rsvd2; + __le32 rsvd3; + __le64 to; + __le32 length; + __le32 stag; +}; + +/* SQE */ +#define ERDMA_SQE_HDR_SGL_LEN_MASK GENMASK_ULL(63, 56) +#define ERDMA_SQE_HDR_WQEBB_CNT_MASK GENMASK_ULL(54, 52) +#define ERDMA_SQE_HDR_QPN_MASK GENMASK_ULL(51, 32) +#define ERDMA_SQE_HDR_OPCODE_MASK GENMASK_ULL(31, 27) +#define ERDMA_SQE_HDR_DWQE_MASK BIT_ULL(26) +#define ERDMA_SQE_HDR_INLINE_MASK BIT_ULL(25) +#define ERDMA_SQE_HDR_FENCE_MASK BIT_ULL(24) +#define ERDMA_SQE_HDR_SE_MASK BIT_ULL(23) +#define ERDMA_SQE_HDR_CE_MASK BIT_ULL(22) +#define ERDMA_SQE_HDR_WQEBB_INDEX_MASK GENMASK_ULL(15, 0) + +struct erdma_write_sqe { + __le64 hdr; + __be32 imm_data; + __le32 length; + + __le32 sink_stag; + /* avoid sink_to not 8-byte aligned. */ + __le32 sink_to_low; + __le32 sink_to_high; + + __le32 rsvd; + + struct erdma_sge sgl[0]; +}; + +struct erdma_send_sqe { + __le64 hdr; + __be32 imm_data; + __le32 length; + struct erdma_sge sgl[0]; +}; + +struct erdma_readreq_sqe { + __le64 hdr; + __le32 invalid_stag; + __le32 length; + __le32 sink_stag; + /* avoid sink_to not 8-byte aligned. */ + __le32 sink_to_low; + __le32 sink_to_high; + __le32 rsvd0; + struct erdma_sge sgl; +}; + + +#endif diff --git a/providers/erdma/erdma_verbs.h b/providers/erdma/erdma_verbs.h new file mode 100644 index 00000000..944d29f5 --- /dev/null +++ b/providers/erdma/erdma_verbs.h @@ -0,0 +1,134 @@ +/* SPDX-License-Identifier: GPL-2.0 or OpenIB.org BSD (MIT) See COPYING file */ +/* + * Authors: Cheng Xu <chengyou@xxxxxxxxxxxxxxxxx> + * Copyright (c) 2020-2021, Alibaba Group. + */ + +#ifndef __ERDMA_VERBS_H__ +#define __ERDMA_VERBS_H__ + +#include <pthread.h> +#include <inttypes.h> +#include <stddef.h> + +#include "erdma.h" +#include "erdma_hw.h" + +#define ERDMA_MAX_SEND_SGE 6 +#define ERDMA_MAX_RECV_SGE 1 + +struct erdma_queue { + void *qbuf; + void *db; + + uint16_t rsvd0; + uint16_t depth; + uint32_t size; + + uint16_t pi; + uint16_t ci; + + uint32_t rsvd1; + uint64_t *wr_tbl; + + void *db_record; +}; + +struct erdma_qp { + struct ibv_qp base_qp; + struct erdma_device *erdma_dev; + + uint32_t id; /* qpn */ + + pthread_spinlock_t sq_lock; + pthread_spinlock_t rq_lock; + + int sq_sig_all; + + struct erdma_queue sq; + struct erdma_queue rq; + + void *qbuf; + uint64_t *db_records; +}; + +struct erdma_cq { + struct ibv_cq base_cq; + struct erdma_device *erdma_dev; + uint32_t id; + + uint32_t event_stats; + + uint32_t depth; + uint32_t ci; + uint32_t owner; + struct erdma_cqe *queue; + + void *db; + uint16_t db_offset; + + void *db_record; + uint32_t cmdsn; + int comp_vector; + + pthread_spinlock_t lock; +}; + +static inline struct erdma_qp *to_eqp(struct ibv_qp *base) +{ + return container_of(base, struct erdma_qp, base_qp); +} + +static inline struct erdma_cq *to_ecq(struct ibv_cq *base) +{ + return container_of(base, struct erdma_cq, base_cq); +} + +static inline void *get_sq_wqebb(struct erdma_qp *qp, uint16_t idx) +{ + idx &= (qp->sq.depth - 1); + return qp->sq.qbuf + (idx << SQEBB_SHIFT); +} + +static inline void __kick_sq_db(struct erdma_qp *qp, uint16_t pi) +{ + uint64_t db_data; + + db_data = FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, qp->id) | + FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, pi); + + *(__le64 *)qp->sq.db_record = htole64(db_data); + udma_to_device_barrier(); + mmio_write64_le(qp->sq.db, htole64(db_data)); +} + +struct ibv_pd *erdma_alloc_pd(struct ibv_context *ctx); +int erdma_free_pd(struct ibv_pd *pd); + +int erdma_query_device(struct ibv_context *ctx, + const struct ibv_query_device_ex_input *input, + struct ibv_device_attr_ex *attr, size_t attr_size); +int erdma_query_port(struct ibv_context *ctx, uint8_t port, struct ibv_port_attr *attr); + +struct ibv_mr *erdma_reg_mr(struct ibv_pd *pd, void *addr, size_t len, + uint64_t hca_va, int access); +int erdma_dereg_mr(struct verbs_mr *vmr); + +struct ibv_qp *erdma_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr); +int erdma_modify_qp(struct ibv_qp *base_qp, struct ibv_qp_attr *attr, int attr_mask); +int erdma_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + int attr_mask, struct ibv_qp_init_attr *init_attr); +int erdma_post_send(struct ibv_qp *base_qp, struct ibv_send_wr *wr, struct ibv_send_wr **bad_wr); +int erdma_post_recv(struct ibv_qp *base_qp, struct ibv_recv_wr *wr, struct ibv_recv_wr **bad_wr); +int erdma_destroy_qp(struct ibv_qp *base_qp); + +void erdma_free_context(struct ibv_context *ibv_ctx); + +struct ibv_cq *erdma_create_cq(struct ibv_context *ctx, int num_cqe, + struct ibv_comp_channel *channel, int comp_vector); +int erdma_destroy_cq(struct ibv_cq *base_cq); +int erdma_notify_cq(struct ibv_cq *ibcq, int solicited); +void erdma_cq_event(struct ibv_cq *ibcq); +int erdma_poll_cq(struct ibv_cq *ibcq, int num_entries, struct ibv_wc *wc); + +#endif -- 2.27.0