This patch mainly implements the srq recv verb for libhns. Signed-off-by: Lijun Ou <oulijun@xxxxxxxxxx> --- providers/hns/hns_roce_u.h | 1 - providers/hns/hns_roce_u_hw_v2.c | 107 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 1 deletion(-) diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h index 5248a53..a9c0113 100644 --- a/providers/hns/hns_roce_u.h +++ b/providers/hns/hns_roce_u.h @@ -162,7 +162,6 @@ struct hns_roce_idx_que { int buf_size; int entry_sz; unsigned long *bitmap; - unsigned long use_cnt; }; struct hns_roce_srq { diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c index 4231193..bdf8bbb 100644 --- a/providers/hns/hns_roce_u_hw_v2.c +++ b/providers/hns/hns_roce_u_hw_v2.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#define _GNU_SOURCE #include <stdio.h> #include <string.h> #include <malloc.h> @@ -148,6 +149,11 @@ static void *get_send_sge_ex(struct hns_roce_qp *qp, int n) return qp->buf.buf + qp->sge.offset + (n << qp->sge.sge_shift); } +static void *get_srq_wqe(struct hns_roce_srq *srq, int n) +{ + return srq->buf.buf + (n << srq->wqe_shift); +} + static int hns_roce_v2_wq_overflow(struct hns_roce_wq *wq, int nreq, struct hns_roce_cq *cq) { @@ -1136,6 +1142,106 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp) return ret; } +static void fill_idx_que(struct hns_roce_idx_que *idx_que, + int cur_idx, int wqe_idx) +{ + unsigned int *addr; + + addr = idx_que->buf.buf + cur_idx * idx_que->entry_sz; + *addr = wqe_idx; +} + +static int find_empty_entry(struct hns_roce_idx_que *idx_que) +{ + int bit_num; + int i; + + /* bitmap[i] is set zero if all bits are allocated */ + for (i = 0; idx_que->bitmap[i] == 0; ++i) + ; + bit_num = ffsl(idx_que->bitmap[i]); + idx_que->bitmap[i] &= ~(1ULL << (bit_num - 1)); + + return i * sizeof(uint64_t) * BIT_CNT_PER_BYTE + (bit_num - 1); +} + +static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, + struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr) +{ + struct hns_roce_context *ctx = to_hr_ctx(ib_srq->context); + struct hns_roce_srq *srq = to_hr_srq(ib_srq); + struct hns_roce_v2_wqe_data_seg *dseg; + struct hns_roce_db srq_db; + int ret = 0; + int wqe_idx; + void *wqe; + int nreq; + int ind; + int i; + + pthread_spin_lock(&srq->lock); + + /* current idx of srqwq */ + ind = srq->head & (srq->max - 1); + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (wr->num_sge > srq->max_gs) { + ret = -1; + *bad_wr = wr; + break; + } + + if (srq->head == srq->tail) { + /* SRQ is full*/ + ret = -1; + *bad_wr = wr; + break; + } + + wqe_idx = find_empty_entry(&srq->idx_que); + fill_idx_que(&srq->idx_que, ind, wqe_idx); + + wqe = get_srq_wqe(srq, wqe_idx); + dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; + + for (i = 0; i < wr->num_sge; ++i) { + dseg[i].len = htole32(wr->sg_list[i].length); + dseg[i].lkey = htole32(wr->sg_list[i].lkey); + dseg[i].addr = htole64(wr->sg_list[i].addr); + } + + if (i < srq->max_gs) { + dseg->len = 0; + dseg->lkey = htole32(0x100); + dseg->addr = 0; + } + + srq->wrid[wqe_idx] = wr->wr_id; + ind = (ind + 1) & (srq->max - 1); + } + + if (nreq) { + srq->head += nreq; + + /* + * Make sure that descriptors are written before + * we write doorbell record. + */ + udma_to_device_barrier(); + + srq_db.byte_4 = htole32(2 << 24 | srq->srqn); + srq_db.parameter = htole32(srq->head); + + hns_roce_write64((uint32_t *)&srq_db, ctx, + ROCEE_VF_DB_CFG0_OFFSET); + } + + pthread_spin_unlock(&srq->lock); + + return ret; +} + const struct hns_roce_u_hw hns_roce_u_hw_v2 = { .hw_version = HNS_ROCE_HW_VER2, .hw_ops = { @@ -1145,5 +1251,6 @@ const struct hns_roce_u_hw hns_roce_u_hw_v2 = { .post_recv = hns_roce_u_v2_post_recv, .modify_qp = hns_roce_u_v2_modify_qp, .destroy_qp = hns_roce_u_v2_destroy_qp, + .post_srq_recv = hns_roce_u_v2_post_srq_recv, }, }; -- 2.8.1