On 2020/4/23 19:16, Weihang Li wrote: > From: Xi Wang <wangxi11@xxxxxxxxxx> > > Optimize the QP's WQE buffer parameters calculating process to make the > codes more readable. > > Signed-off-by: Xi Wang <wangxi11@xxxxxxxxxx> > Signed-off-by: Weihang Li <liweihang@xxxxxxxxxx> > --- > drivers/infiniband/hw/hns/hns_roce_device.h | 34 +++ > drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 109 ++++------ > drivers/infiniband/hw/hns/hns_roce_qp.c | 313 +++++++++++----------------- > 3 files changed, 191 insertions(+), 265 deletions(-) > > diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h > index 6185f8c..eaebd4b 100644 > --- a/drivers/infiniband/hw/hns/hns_roce_device.h > +++ b/drivers/infiniband/hw/hns/hns_roce_device.h > @@ -1079,6 +1079,8 @@ static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, int idx) > return buf->page_list[idx].map; > } > > +#define hr_hw_page_align(x) ALIGN(x, 1 << PAGE_ADDR_SHIFT) > + > static inline u64 to_hr_hw_page_addr(u64 addr) > { > return addr >> PAGE_ADDR_SHIFT; > @@ -1089,6 +1091,38 @@ static inline u32 to_hr_hw_page_shift(u32 page_shift) > return page_shift - PAGE_ADDR_SHIFT; > } > > +static inline u32 to_hr_hem_hopnum(u32 hopnum, u32 count) > +{ > + if (count > 0) > + return hopnum == HNS_ROCE_HOP_NUM_0 ? 0 : hopnum; > + > + return 0; > +} > + > +static inline u32 to_hr_hem_entries_size(u32 count, u32 buf_shift) > +{ > + if (count > 0) > + return hr_hw_page_align(count << buf_shift); > + > + return 0; > +} Sorry, I didn't notice that Leon has a comment on the similar function in rdma-core today. The judgment of count is meaningless, will send v2 later. > + > +static inline u32 to_hr_hem_entries_count(u32 count, u32 buf_shift) > +{ > + if (count > 0) > + return hr_hw_page_align(count << buf_shift) >> buf_shift; > + > + return 0; > +} > + > +static inline u32 to_hr_hem_entries_shift(u32 count, u32 buf_shift) > +{ > + if (count > 0) > + return ilog2(to_hr_hem_entries_count(count, buf_shift)); > + > + return 0; > +} > + > int hns_roce_init_uar_table(struct hns_roce_dev *dev); > int hns_roce_uar_alloc(struct hns_roce_dev *dev, struct hns_roce_uar *uar); > void hns_roce_uar_free(struct hns_roce_dev *dev, struct hns_roce_uar *uar); > diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > index bdcbb8b..97b8cb3 100644 > --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > @@ -154,47 +154,24 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, > unsigned int *sge_ind, int valid_num_sge) > { > struct hns_roce_v2_wqe_data_seg *dseg; > - struct ib_sge *sg; > - int num_in_wqe = 0; > - int extend_sge_num; > - int fi_sge_num; > - int se_sge_num; > - int shift; > - int i; > + struct ib_sge *sge = wr->sg_list; > + unsigned int idx = *sge_ind; > + int cnt = valid_num_sge; > > - if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) > - num_in_wqe = HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; > - extend_sge_num = valid_num_sge - num_in_wqe; > - sg = wr->sg_list + num_in_wqe; > - shift = qp->mtr.hem_cfg.buf_pg_shift; > + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { > + cnt -= HNS_ROCE_SGE_IN_WQE; > + sge += HNS_ROCE_SGE_IN_WQE; > + } > > - /* > - * Check whether wr->num_sge sges are in the same page. If not, we > - * should calculate how many sges in the first page and the second > - * page. > - */ > - dseg = hns_roce_get_extend_sge(qp, (*sge_ind) & (qp->sge.sge_cnt - 1)); > - fi_sge_num = (round_up((uintptr_t)dseg, 1 << shift) - > - (uintptr_t)dseg) / > - sizeof(struct hns_roce_v2_wqe_data_seg); > - if (extend_sge_num > fi_sge_num) { > - se_sge_num = extend_sge_num - fi_sge_num; > - for (i = 0; i < fi_sge_num; i++) { > - set_data_seg_v2(dseg++, sg + i); > - (*sge_ind)++; > - } > - dseg = hns_roce_get_extend_sge(qp, > - (*sge_ind) & (qp->sge.sge_cnt - 1)); > - for (i = 0; i < se_sge_num; i++) { > - set_data_seg_v2(dseg++, sg + fi_sge_num + i); > - (*sge_ind)++; > - } > - } else { > - for (i = 0; i < extend_sge_num; i++) { > - set_data_seg_v2(dseg++, sg + i); > - (*sge_ind)++; > - } > + while (cnt > 0) { > + dseg = hns_roce_get_extend_sge(qp, idx & (qp->sge.sge_cnt - 1)); > + set_data_seg_v2(dseg, sge); > + idx++; > + sge++; > + cnt--; > } > + > + *sge_ind = idx; > } > > static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, > @@ -232,7 +209,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, > roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S, > 1); > } else { > - if (valid_num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) { > + if (valid_num_sge <= HNS_ROCE_SGE_IN_WQE) { > for (i = 0; i < wr->num_sge; i++) { > if (likely(wr->sg_list[i].length)) { > set_data_seg_v2(dseg, wr->sg_list + i); > @@ -245,8 +222,8 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, > V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, > (*sge_ind) & (qp->sge.sge_cnt - 1)); > > - for (i = 0; i < wr->num_sge && > - j < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) { > + for (i = 0; i < wr->num_sge && j < HNS_ROCE_SGE_IN_WQE; > + i++) { > if (likely(wr->sg_list[i].length)) { > set_data_seg_v2(dseg, wr->sg_list + i); > dseg++; > @@ -675,7 +652,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, > } > > /* rq support inline data */ > - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) { > + if (hr_qp->rq_inl_buf.wqe_cnt) { > sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list; > hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt = > (u32)wr->num_sge; > @@ -3491,29 +3468,18 @@ static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp, > struct hns_roce_v2_qp_context *context, > struct hns_roce_v2_qp_context *qpc_mask) > { > - if (hr_qp->ibqp.qp_type == IB_QPT_GSI) > - roce_set_field(context->byte_4_sqpn_tst, > - V2_QPC_BYTE_4_SGE_SHIFT_M, > - V2_QPC_BYTE_4_SGE_SHIFT_S, > - ilog2((unsigned int)hr_qp->sge.sge_cnt)); > - else > - roce_set_field(context->byte_4_sqpn_tst, > - V2_QPC_BYTE_4_SGE_SHIFT_M, > - V2_QPC_BYTE_4_SGE_SHIFT_S, > - hr_qp->sq.max_gs > > - HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE ? > - ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0); > + roce_set_field(context->byte_4_sqpn_tst, > + V2_QPC_BYTE_4_SGE_SHIFT_M, V2_QPC_BYTE_4_SGE_SHIFT_S, > + to_hr_hem_entries_shift(hr_qp->sge.sge_cnt, > + hr_qp->sge.sge_shift)); > > roce_set_field(context->byte_20_smac_sgid_idx, > V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, > - ilog2((unsigned int)hr_qp->sq.wqe_cnt)); > + ilog2(hr_qp->sq.wqe_cnt)); > > roce_set_field(context->byte_20_smac_sgid_idx, > V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, > - (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI || > - hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || > - hr_qp->ibqp.srq) ? 0 : > - ilog2((unsigned int)hr_qp->rq.wqe_cnt)); > + ilog2(hr_qp->rq.wqe_cnt)); > } > > static void modify_qp_reset_to_init(struct ib_qp *ibqp, > @@ -3781,17 +3747,16 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, > > roce_set_field(context->byte_12_sq_hop, V2_QPC_BYTE_12_SQ_HOP_NUM_M, > V2_QPC_BYTE_12_SQ_HOP_NUM_S, > - hr_dev->caps.wqe_sq_hop_num == HNS_ROCE_HOP_NUM_0 ? > - 0 : hr_dev->caps.wqe_sq_hop_num); > + to_hr_hem_hopnum(hr_dev->caps.wqe_sq_hop_num, > + hr_qp->sq.wqe_cnt)); > roce_set_field(qpc_mask->byte_12_sq_hop, V2_QPC_BYTE_12_SQ_HOP_NUM_M, > V2_QPC_BYTE_12_SQ_HOP_NUM_S, 0); > > roce_set_field(context->byte_20_smac_sgid_idx, > V2_QPC_BYTE_20_SGE_HOP_NUM_M, > V2_QPC_BYTE_20_SGE_HOP_NUM_S, > - ((ibqp->qp_type == IB_QPT_GSI) || > - hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ? > - hr_dev->caps.wqe_sge_hop_num : 0); > + to_hr_hem_hopnum(hr_dev->caps.wqe_sge_hop_num, > + hr_qp->sge.sge_cnt)); > roce_set_field(qpc_mask->byte_20_smac_sgid_idx, > V2_QPC_BYTE_20_SGE_HOP_NUM_M, > V2_QPC_BYTE_20_SGE_HOP_NUM_S, 0); > @@ -3799,8 +3764,9 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, > roce_set_field(context->byte_20_smac_sgid_idx, > V2_QPC_BYTE_20_RQ_HOP_NUM_M, > V2_QPC_BYTE_20_RQ_HOP_NUM_S, > - hr_dev->caps.wqe_rq_hop_num == HNS_ROCE_HOP_NUM_0 ? > - 0 : hr_dev->caps.wqe_rq_hop_num); > + to_hr_hem_hopnum(hr_dev->caps.wqe_rq_hop_num, > + hr_qp->rq.wqe_cnt)); > + > roce_set_field(qpc_mask->byte_20_smac_sgid_idx, > V2_QPC_BYTE_20_RQ_HOP_NUM_M, > V2_QPC_BYTE_20_RQ_HOP_NUM_S, 0); > @@ -3977,7 +3943,7 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, > return -EINVAL; > } > > - if (hr_qp->sge.offset) { > + if (hr_qp->sge.sge_cnt > 0) { > page_size = 1 << hr_qp->mtr.hem_cfg.buf_pg_shift; > count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, > hr_qp->sge.offset / page_size, > @@ -4011,15 +3977,12 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, > V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M, > V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S, 0); > > - context->sq_cur_sge_blk_addr = ((ibqp->qp_type == IB_QPT_GSI) || > - hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ? > - cpu_to_le32(to_hr_hw_page_addr(sge_cur_blk)) : 0; > + context->sq_cur_sge_blk_addr = > + cpu_to_le32(to_hr_hw_page_addr(sge_cur_blk)); > roce_set_field(context->byte_184_irrl_idx, > V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M, > V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S, > - ((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs > > - HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ? > - upper_32_bits(to_hr_hw_page_addr(sge_cur_blk)) : 0); > + upper_32_bits(to_hr_hw_page_addr(sge_cur_blk))); > qpc_mask->sq_cur_sge_blk_addr = 0; > roce_set_field(qpc_mask->byte_184_irrl_idx, > V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M, > diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c > index d05d3cb..b570759 100644 > --- a/drivers/infiniband/hw/hns/hns_roce_qp.c > +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c > @@ -355,16 +355,16 @@ static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) > hns_roce_bitmap_free_range(&qp_table->bitmap, hr_qp->qpn, 1, BITMAP_RR); > } > > -static int set_rq_size(struct hns_roce_dev *hr_dev, > - struct ib_qp_cap *cap, bool is_user, int has_rq, > - struct hns_roce_qp *hr_qp) > +static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, > + struct hns_roce_qp *hr_qp, int has_rq) > { > - u32 max_cnt; > + u32 cnt; > > /* If srq exist, set zero for relative number of rq */ > if (!has_rq) { > hr_qp->rq.wqe_cnt = 0; > hr_qp->rq.max_gs = 0; > + hr_qp->rq_inl_buf.wqe_cnt = 0; > cap->max_recv_wr = 0; > cap->max_recv_sge = 0; > > @@ -379,17 +379,14 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, > return -EINVAL; > } > > - max_cnt = max(cap->max_recv_wr, hr_dev->caps.min_wqes); > - > - hr_qp->rq.wqe_cnt = roundup_pow_of_two(max_cnt); > - if ((u32)hr_qp->rq.wqe_cnt > hr_dev->caps.max_wqes) { > + cnt = roundup_pow_of_two(max(cap->max_recv_wr, hr_dev->caps.min_wqes)); > + if (cnt > hr_dev->caps.max_wqes) { > ibdev_err(&hr_dev->ib_dev, "rq depth %u too large\n", > cap->max_recv_wr); > return -EINVAL; > } > > - max_cnt = max(1U, cap->max_recv_sge); > - hr_qp->rq.max_gs = roundup_pow_of_two(max_cnt); > + hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge)); > > if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE) > hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz); > @@ -397,12 +394,61 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, > hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz * > hr_qp->rq.max_gs); > > - cap->max_recv_wr = hr_qp->rq.wqe_cnt; > + hr_qp->rq.wqe_cnt = cnt; > + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) > + hr_qp->rq_inl_buf.wqe_cnt = cnt; > + else > + hr_qp->rq_inl_buf.wqe_cnt = 0; > + > + cap->max_recv_wr = cnt; > cap->max_recv_sge = hr_qp->rq.max_gs; > > return 0; > } > > +static int set_extend_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt, > + struct hns_roce_qp *hr_qp, > + struct ib_qp_cap *cap) > +{ > + struct ib_device *ibdev = &hr_dev->ib_dev; > + u32 cnt; > + > + cnt = max(1U, cap->max_send_sge); > + if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) { > + hr_qp->sq.max_gs = roundup_pow_of_two(cnt); > + hr_qp->sge.sge_cnt = 0; > + > + return 0; > + } > + > + hr_qp->sq.max_gs = cnt; > + > + /* UD sqwqe's sge use extend sge */ > + if (hr_qp->ibqp.qp_type == IB_QPT_GSI || > + hr_qp->ibqp.qp_type == IB_QPT_UD) { > + cnt = roundup_pow_of_two(sq_wqe_cnt * hr_qp->sq.max_gs); > + } else if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) { > + cnt = roundup_pow_of_two(sq_wqe_cnt * > + (hr_qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE)); > + > + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A) { > + if (cnt > hr_dev->caps.max_extend_sg) { > + ibdev_err(ibdev, > + "failed to check exSGE num, exSGE num = %d.\n", > + cnt); > + return -EINVAL; > + } > + } > + } else { > + cnt = 0; > + } > + > + hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT; > + hr_qp->sge.sge_cnt = cnt; > + > + return 0; > +} > + > static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, > struct ib_qp_cap *cap, > struct hns_roce_ib_create_qp *ucmd) > @@ -430,82 +476,27 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev, > struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp, > struct hns_roce_ib_create_qp *ucmd) > { > - u32 ex_sge_num; > - u32 page_size; > - u32 max_cnt; > + struct ib_device *ibdev = &hr_dev->ib_dev; > + u32 cnt = 0; > int ret; > > - if (check_shl_overflow(1, ucmd->log_sq_bb_count, &hr_qp->sq.wqe_cnt) || > - hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) > + if (check_shl_overflow(1, ucmd->log_sq_bb_count, &cnt) || > + cnt > hr_dev->caps.max_wqes) > return -EINVAL; > > ret = check_sq_size_with_integrity(hr_dev, cap, ucmd); > if (ret) { > - ibdev_err(&hr_dev->ib_dev, "Failed to check user SQ size limit\n"); > + ibdev_err(ibdev, "failed to check user SQ size, ret = %d.\n", > + ret); > return ret; > } > > - hr_qp->sq.wqe_shift = ucmd->log_sq_stride; > - > - max_cnt = max(1U, cap->max_send_sge); > - if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) > - hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt); > - else > - hr_qp->sq.max_gs = max_cnt; > - > - if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) > - hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt * > - (hr_qp->sq.max_gs - 2)); > - > - if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE && > - hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A) { > - if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) { > - ibdev_err(&hr_dev->ib_dev, > - "Failed to check extended SGE size limit %d\n", > - hr_qp->sge.sge_cnt); > - return -EINVAL; > - } > - } > - > - hr_qp->sge.sge_shift = 4; > - ex_sge_num = hr_qp->sge.sge_cnt; > + ret = set_extend_sge_param(hr_dev, cnt, hr_qp, cap); > + if (ret) > + return ret; > > - /* Get buf size, SQ and RQ are aligned to page_szie */ > - if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) { > - hr_qp->buff_size = round_up((hr_qp->rq.wqe_cnt << > - hr_qp->rq.wqe_shift), PAGE_SIZE) + > - round_up((hr_qp->sq.wqe_cnt << > - hr_qp->sq.wqe_shift), PAGE_SIZE); > - > - hr_qp->sq.offset = 0; > - hr_qp->rq.offset = round_up((hr_qp->sq.wqe_cnt << > - hr_qp->sq.wqe_shift), PAGE_SIZE); > - } else { > - page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); > - hr_qp->sge.sge_cnt = ex_sge_num ? > - max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num) : 0; > - hr_qp->buff_size = round_up((hr_qp->rq.wqe_cnt << > - hr_qp->rq.wqe_shift), page_size) + > - round_up((hr_qp->sge.sge_cnt << > - hr_qp->sge.sge_shift), page_size) + > - round_up((hr_qp->sq.wqe_cnt << > - hr_qp->sq.wqe_shift), page_size); > - > - hr_qp->sq.offset = 0; > - if (ex_sge_num) { > - hr_qp->sge.offset = round_up((hr_qp->sq.wqe_cnt << > - hr_qp->sq.wqe_shift), > - page_size); > - hr_qp->rq.offset = hr_qp->sge.offset + > - round_up((hr_qp->sge.sge_cnt << > - hr_qp->sge.sge_shift), > - page_size); > - } else { > - hr_qp->rq.offset = round_up((hr_qp->sq.wqe_cnt << > - hr_qp->sq.wqe_shift), > - page_size); > - } > - } > + hr_qp->sq.wqe_shift = ucmd->log_sq_stride; > + hr_qp->sq.wqe_cnt = cnt; > > return 0; > } > @@ -514,84 +505,50 @@ static int split_wqe_buf_region(struct hns_roce_dev *hr_dev, > struct hns_roce_qp *hr_qp, > struct hns_roce_buf_attr *buf_attr) > { > - bool is_extend_sge; > int buf_size; > int idx = 0; > > - if (hr_qp->buff_size < 1) > - return -EINVAL; > - > - buf_attr->page_shift = PAGE_ADDR_SHIFT + hr_dev->caps.mtt_buf_pg_sz; > - buf_attr->fixed_page = true; > - buf_attr->region_count = 0; > - > - if (hr_qp->sge.sge_cnt > 0) > - is_extend_sge = true; > - else > - is_extend_sge = false; > + hr_qp->buff_size = 0; > > /* SQ WQE */ > - if (is_extend_sge) > - buf_size = hr_qp->sge.offset - hr_qp->sq.offset; > - else > - buf_size = hr_qp->rq.offset - hr_qp->sq.offset; > - > + hr_qp->sq.offset = 0; > + buf_size = to_hr_hem_entries_size(hr_qp->sq.wqe_cnt, > + hr_qp->sq.wqe_shift); > if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) { > buf_attr->region[idx].size = buf_size; > buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sq_hop_num; > idx++; > + hr_qp->buff_size += buf_size; > } > > - /* extend SGE in SQ WQE */ > - buf_size = hr_qp->rq.offset - hr_qp->sge.offset; > - if (buf_size > 0 && is_extend_sge && > - idx < ARRAY_SIZE(buf_attr->region)) { > + /* extend SGE WQE in SQ */ > + hr_qp->sge.offset = hr_qp->buff_size; > + buf_size = to_hr_hem_entries_size(hr_qp->sge.sge_cnt, > + hr_qp->sge.sge_shift); > + if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) { > buf_attr->region[idx].size = buf_size; > - buf_attr->region[idx].hopnum = > - hr_dev->caps.wqe_sge_hop_num; > + buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sge_hop_num; > idx++; > + hr_qp->buff_size += buf_size; > } > > /* RQ WQE */ > - buf_size = hr_qp->buff_size - hr_qp->rq.offset; > + hr_qp->rq.offset = hr_qp->buff_size; > + buf_size = to_hr_hem_entries_size(hr_qp->rq.wqe_cnt, > + hr_qp->rq.wqe_shift); > if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) { > buf_attr->region[idx].size = buf_size; > buf_attr->region[idx].hopnum = hr_dev->caps.wqe_rq_hop_num; > idx++; > + hr_qp->buff_size += buf_size; > } > > - buf_attr->region_count = idx; > - > - return 0; > -} > - > -static int set_extend_sge_param(struct hns_roce_dev *hr_dev, > - struct hns_roce_qp *hr_qp) > -{ > - struct device *dev = hr_dev->dev; > - > - if (hr_qp->sq.max_gs > 2) { > - hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt * > - (hr_qp->sq.max_gs - 2)); > - hr_qp->sge.sge_shift = 4; > - } > - > - /* ud sqwqe's sge use extend sge */ > - if (hr_dev->hw_rev != HNS_ROCE_HW_VER1 && > - hr_qp->ibqp.qp_type == IB_QPT_GSI) { > - hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt * > - hr_qp->sq.max_gs); > - hr_qp->sge.sge_shift = 4; > - } > + if (hr_qp->buff_size < 1) > + return -EINVAL; > > - if (hr_qp->sq.max_gs > 2 && > - hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A) { > - if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) { > - dev_err(dev, "The extended sge cnt error! sge_cnt=%d\n", > - hr_qp->sge.sge_cnt); > - return -EINVAL; > - } > - } > + buf_attr->page_shift = PAGE_ADDR_SHIFT + hr_dev->caps.mtt_buf_pg_sz; > + buf_attr->fixed_page = true; > + buf_attr->region_count = idx; > > return 0; > } > @@ -599,62 +556,35 @@ static int set_extend_sge_param(struct hns_roce_dev *hr_dev, > static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, > struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp) > { > - u32 page_size; > - u32 max_cnt; > - int size; > + struct ib_device *ibdev = &hr_dev->ib_dev; > + u32 cnt; > int ret; > > if (!cap->max_send_wr || cap->max_send_wr > hr_dev->caps.max_wqes || > cap->max_send_sge > hr_dev->caps.max_sq_sg || > cap->max_inline_data > hr_dev->caps.max_sq_inline) { > - ibdev_err(&hr_dev->ib_dev, > - "SQ WR or sge or inline data error!\n"); > + ibdev_err(ibdev, > + "failed to check SQ WR, SGE or inline num, ret = %d.\n", > + -EINVAL); > return -EINVAL; > } > > - hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz); > - > - max_cnt = max(cap->max_send_wr, hr_dev->caps.min_wqes); > - > - hr_qp->sq.wqe_cnt = roundup_pow_of_two(max_cnt); > - if ((u32)hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) { > - ibdev_err(&hr_dev->ib_dev, > - "while setting kernel sq size, sq.wqe_cnt too large\n"); > + cnt = roundup_pow_of_two(max(cap->max_send_wr, hr_dev->caps.min_wqes)); > + if (cnt > hr_dev->caps.max_wqes) { > + ibdev_err(ibdev, "failed to check WQE num, WQE num = %d.\n", > + cnt); > return -EINVAL; > } > > - /* Get data_seg numbers */ > - max_cnt = max(1U, cap->max_send_sge); > - if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) > - hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt); > - else > - hr_qp->sq.max_gs = max_cnt; > + hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz); > + hr_qp->sq.wqe_cnt = cnt; > > - ret = set_extend_sge_param(hr_dev, hr_qp); > - if (ret) { > - ibdev_err(&hr_dev->ib_dev, "set extend sge parameters fail\n"); > + ret = set_extend_sge_param(hr_dev, cnt, hr_qp, cap); > + if (ret) > return ret; > - } > > - /* Get buf size, SQ and RQ are aligned to PAGE_SIZE */ > - page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); > - hr_qp->sq.offset = 0; > - size = round_up(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, page_size); > - > - if (hr_dev->hw_rev != HNS_ROCE_HW_VER1 && hr_qp->sge.sge_cnt) { > - hr_qp->sge.sge_cnt = max(page_size/(1 << hr_qp->sge.sge_shift), > - (u32)hr_qp->sge.sge_cnt); > - hr_qp->sge.offset = size; > - size += round_up(hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift, > - page_size); > - } > - > - hr_qp->rq.offset = size; > - size += round_up((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), page_size); > - hr_qp->buff_size = size; > - > - /* Get wr and sge number which send */ > - cap->max_send_wr = hr_qp->sq.wqe_cnt; > + /* sync the parameters of kernel QP to user's configuration */ > + cap->max_send_wr = cnt; > cap->max_send_sge = hr_qp->sq.max_gs; > > /* We don't support inline sends for kernel QPs (yet) */ > @@ -685,8 +615,8 @@ static int alloc_rq_inline_buf(struct hns_roce_qp *hr_qp, > struct ib_qp_init_attr *init_attr) > { > u32 max_recv_sge = init_attr->cap.max_recv_sge; > + u32 wqe_cnt = hr_qp->rq_inl_buf.wqe_cnt; > struct hns_roce_rinl_wqe *wqe_list; > - u32 wqe_cnt = hr_qp->rq.wqe_cnt; > int i; > > /* allocate recv inline buf */ > @@ -708,7 +638,6 @@ static int alloc_rq_inline_buf(struct hns_roce_qp *hr_qp, > wqe_list[i].sg_list = &wqe_list[0].sg_list[i * max_recv_sge]; > > hr_qp->rq_inl_buf.wqe_list = wqe_list; > - hr_qp->rq_inl_buf.wqe_cnt = wqe_cnt; > > return 0; > > @@ -721,7 +650,8 @@ static int alloc_rq_inline_buf(struct hns_roce_qp *hr_qp, > > static void free_rq_inline_buf(struct hns_roce_qp *hr_qp) > { > - kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list); > + if (hr_qp->rq_inl_buf.wqe_list) > + kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list); > kfree(hr_qp->rq_inl_buf.wqe_list); > } > > @@ -731,36 +661,36 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, > { > struct ib_device *ibdev = &hr_dev->ib_dev; > struct hns_roce_buf_attr buf_attr = {}; > - bool is_rq_buf_inline; > int ret; > > - is_rq_buf_inline = (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && > - hns_roce_qp_has_rq(init_attr); > - if (is_rq_buf_inline) { > + if (!udata && hr_qp->rq_inl_buf.wqe_cnt) { > ret = alloc_rq_inline_buf(hr_qp, init_attr); > if (ret) { > - ibdev_err(ibdev, "Failed to alloc inline RQ buffer\n"); > + ibdev_err(ibdev, > + "failed to alloc inline buf, ret = %d.\n", > + ret); > return ret; > } > + } else { > + hr_qp->rq_inl_buf.wqe_list = NULL; > } > > ret = split_wqe_buf_region(hr_dev, hr_qp, &buf_attr); > if (ret) { > - ibdev_err(ibdev, "Failed to split WQE buf, ret %d\n", ret); > + ibdev_err(ibdev, "failed to split WQE buf, ret = %d.\n", ret); > goto err_inline; > } > ret = hns_roce_mtr_create(hr_dev, &hr_qp->mtr, &buf_attr, > PAGE_ADDR_SHIFT + hr_dev->caps.mtt_ba_pg_sz, > udata, addr); > if (ret) { > - ibdev_err(ibdev, "Failed to create WQE mtr, ret %d\n", ret); > + ibdev_err(ibdev, "failed to create WQE mtr, ret = %d.\n", ret); > goto err_inline; > } > > return 0; > err_inline: > - if (is_rq_buf_inline) > - free_rq_inline_buf(hr_qp); > + free_rq_inline_buf(hr_qp); > > return ret; > } > @@ -768,9 +698,7 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, > static void free_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) > { > hns_roce_mtr_destroy(hr_dev, &hr_qp->mtr); > - if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && > - hr_qp->rq.wqe_cnt) > - free_rq_inline_buf(hr_qp); > + free_rq_inline_buf(hr_qp); > } > > static inline bool user_qp_has_sdb(struct hns_roce_dev *hr_dev, > @@ -935,10 +863,11 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, > else > hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR; > > - ret = set_rq_size(hr_dev, &init_attr->cap, udata, > - hns_roce_qp_has_rq(init_attr), hr_qp); > + ret = set_rq_size(hr_dev, &init_attr->cap, hr_qp, > + hns_roce_qp_has_rq(init_attr)); > if (ret) { > - ibdev_err(ibdev, "Failed to set user RQ size\n"); > + ibdev_err(ibdev, "failed to set user RQ size, ret = %d.\n", > + ret); > return ret; > } > >