HIP09 supports UD inline up to size of 1024 Bytes. When data size is smaller than 8 bytes, they will be stored in sqwqe. Otherwise, the data will be filled into extended sges. Signed-off-by: Weihang Li <liweihang@xxxxxxxxxx> --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 111 ++++++++++++++++++++++++++-- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 14 ++++ drivers/infiniband/hw/hns/hns_roce_qp.c | 6 ++ 4 files changed, 126 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 9a032d0..f54739b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -133,6 +133,7 @@ enum hns_roce_qp_caps { HNS_ROCE_QP_CAP_RQ_RECORD_DB = BIT(0), HNS_ROCE_QP_CAP_SQ_RECORD_DB = BIT(1), HNS_ROCE_QP_CAP_OWNER_DB = BIT(2), + HNS_ROCE_QP_CAP_UD_SQ_INL = BIT(3), }; enum hns_roce_cq_flags { @@ -222,6 +223,7 @@ enum { HNS_ROCE_CAP_FLAG_FRMR = BIT(8), HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL = BIT(9), HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10), + HNS_ROCE_CAP_FLAG_UD_SQ_INL = BIT(13), HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14), }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 57ff223..285d455 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -428,9 +428,6 @@ static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, struct ib_device *ib_dev = ah->ibah.device; struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); - roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M, - V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, ah->av.udp_sport); - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit); roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M, @@ -456,6 +453,90 @@ static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, return 0; } +static void fill_ud_inn_inl_data(const struct ib_send_wr *wr, + struct hns_roce_v2_ud_send_wqe *ud_sq_wqe) +{ + u8 data[HNS_ROCE_V2_MAX_UD_INL_INN_SZ] = {0}; + u32 *loc = (u32 *)data; + void *tmp = data; + unsigned int i; + u32 tmp_data; + + for (i = 0; i < wr->num_sge; i++) { + memcpy(tmp, ((void *)wr->sg_list[i].addr), + wr->sg_list[i].length); + tmp += wr->sg_list[i].length; + } + + roce_set_field(ud_sq_wqe->msg_len, + V2_UD_SEND_WQE_BYTE_8_INL_DATA_15_0_M, + V2_UD_SEND_WQE_BYTE_8_INL_DATA_15_0_S, + *loc & 0xffff); + + roce_set_field(ud_sq_wqe->byte_16, + V2_UD_SEND_WQE_BYTE_16_INL_DATA_23_16_M, + V2_UD_SEND_WQE_BYTE_16_INL_DATA_23_16_S, + (*loc >> 16) & 0xff); + + tmp_data = *loc >> 24; + loc++; + tmp_data |= ((*loc & 0xffff) << 8); + + roce_set_field(ud_sq_wqe->byte_20, + V2_UD_SEND_WQE_BYTE_20_INL_DATA_47_24_M, + V2_UD_SEND_WQE_BYTE_20_INL_DATA_47_24_S, + tmp_data); + + roce_set_field(ud_sq_wqe->byte_24, + V2_UD_SEND_WQE_BYTE_24_INL_DATA_63_48_M, + V2_UD_SEND_WQE_BYTE_24_INL_DATA_63_48_S, + *loc >> 16); +} + +static int set_ud_inl(struct hns_roce_qp *qp, const struct ib_send_wr *wr, + struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, + unsigned int *sge_idx) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device); + u32 msg_len = le32_to_cpu(ud_sq_wqe->msg_len); + struct ib_device *ibdev = &hr_dev->ib_dev; + unsigned int curr_idx = *sge_idx; + int ret; + + if (!(qp->en_flags & HNS_ROCE_QP_CAP_UD_SQ_INL)) { + ibdev_err(ibdev, "not support UD SQ inline!\n"); + return -EOPNOTSUPP; + } + + if (!check_inl_data_len(qp, msg_len)) + return -EINVAL; + + roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_INL_S, 1); + + if (msg_len <= HNS_ROCE_V2_MAX_UD_INL_INN_SZ) { + roce_set_bit(ud_sq_wqe->byte_20, + V2_UD_SEND_WQE_BYTE_20_INL_TYPE_S, 0); + + fill_ud_inn_inl_data(wr, ud_sq_wqe); + } else { + roce_set_bit(ud_sq_wqe->byte_20, + V2_UD_SEND_WQE_BYTE_20_INL_TYPE_S, 1); + + ret = fill_ext_sge_inl_data(qp, wr, &curr_idx, msg_len); + if (ret) + return ret; + + roce_set_field(ud_sq_wqe->byte_16, + V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M, + V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S, + curr_idx - *sge_idx); + } + + *sge_idx = curr_idx; + + return 0; +} + static inline int set_ud_wqe(struct hns_roce_qp *qp, const struct ib_send_wr *wr, void *wqe, unsigned int *sge_idx, @@ -486,9 +567,6 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_PD_M, V2_UD_SEND_WQE_BYTE_16_PD_S, to_hr_pd(qp->ibqp.pd)->pdn); - roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M, - V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); - roce_set_field(ud_sq_wqe->byte_20, V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M, V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, @@ -503,7 +581,23 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, if (ret) return ret; - set_extend_sge(qp, wr, &curr_idx, valid_num_sge); + if (wr->send_flags & IB_SEND_INLINE) { + ret = set_ud_inl(qp, wr, ud_sq_wqe, &curr_idx); + if (ret) + return ret; + } else { + roce_set_field(ud_sq_wqe->byte_16, + V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M, + V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S, + valid_num_sge); + + roce_set_field(ud_sq_wqe->byte_24, + V2_UD_SEND_WQE_BYTE_24_UDPSPN_M, + V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, + ah->av.udp_sport); + + set_extend_sge(qp, wr, &curr_idx, valid_num_sge); + } /* * The pipeline can sequentially post all valid WQEs into WQ buffer, @@ -1916,6 +2010,8 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->gmv_buf_pg_sz = 0; caps->gid_table_len[0] = caps->gmv_bt_num * (HNS_HW_PAGE_SIZE / caps->gmv_entry_sz); + caps->flags |= HNS_ROCE_CAP_FLAG_UD_SQ_INL; + caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INL_EXT; } } @@ -5125,6 +5221,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_attr->cur_qp_state = qp_attr->qp_state; qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt; qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs; + qp_attr->cap.max_inline_data = hr_qp->max_inline_data; if (!ibqp->uobject) { qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index c068517..1c1a773 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -61,6 +61,8 @@ #define HNS_ROCE_V2_MAX_SQ_SGE_NUM 64 #define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM 0x200000 #define HNS_ROCE_V2_MAX_SQ_INLINE 0x20 +#define HNS_ROCE_V2_MAX_SQ_INL_EXT 0x400 +#define HNS_ROCE_V2_MAX_UD_INL_INN_SZ 8 #define HNS_ROCE_V2_MAX_RC_INL_INN_SZ 32 #define HNS_ROCE_V2_UAR_NUM 256 #define HNS_ROCE_V2_PHY_UAR_NUM 1 @@ -1126,6 +1128,18 @@ struct hns_roce_v2_ud_send_wqe { #define V2_UD_SEND_WQE_BYTE_40_LBI_S 31 +#define V2_UD_SEND_WQE_BYTE_4_INL_S 12 +#define V2_UD_SEND_WQE_BYTE_20_INL_TYPE_S 31 + +#define V2_UD_SEND_WQE_BYTE_8_INL_DATA_15_0_S 16 +#define V2_UD_SEND_WQE_BYTE_8_INL_DATA_15_0_M GENMASK(31, 16) +#define V2_UD_SEND_WQE_BYTE_16_INL_DATA_23_16_S 24 +#define V2_UD_SEND_WQE_BYTE_16_INL_DATA_23_16_M GENMASK(31, 24) +#define V2_UD_SEND_WQE_BYTE_20_INL_DATA_47_24_S 0 +#define V2_UD_SEND_WQE_BYTE_20_INL_DATA_47_24_M GENMASK(23, 0) +#define V2_UD_SEND_WQE_BYTE_24_INL_DATA_63_48_S 0 +#define V2_UD_SEND_WQE_BYTE_24_INL_DATA_63_48_M GENMASK(15, 0) + struct hns_roce_v2_rc_send_wqe { __le32 byte_4; __le32 msg_len; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 5e505a3..1210061 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -862,6 +862,9 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, return ret; } + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_UD_SQ_INL) + hr_qp->en_flags |= HNS_ROCE_QP_CAP_UD_SQ_INL; + if (udata) { if (ib_copy_from_udata(ucmd, udata, sizeof(*ucmd))) { ibdev_err(ibdev, "Failed to copy QP ucmd\n"); @@ -946,6 +949,9 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, } if (udata) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_UD_SQ_INL) + resp.cap_flags |= HNS_ROCE_QP_CAP_UD_SQ_INL; + ret = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); if (ret) { -- 2.8.1