From: Jiaran Zhang <zhangjiaran@xxxxxxxxxx> Support extended atomic operations including cmp & swap and fetch & add of 8 bytes, 16 bytes, 32 bytes, 64 bytes on hip08. Signed-off-by: Jiaran Zhang <zhangjiaran@xxxxxxxxxx> Signed-off-by: Weihang Li <liweihang@xxxxxxxxxxxxx> --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 100 ++++++++++++++++++++++++----- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 8 +++ 2 files changed, 93 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 907c951..74ccb08 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -97,18 +97,68 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0); } -static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg, - const struct ib_atomic_wr *wr) +static void set_extend_atomic_seg(struct hns_roce_qp *qp, + u32 ex_sge_num, unsigned int *sge_idx, + u64 *data_addr) { - if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { - aseg->fetchadd_swap_data = cpu_to_le64(wr->swap); - aseg->cmp_data = cpu_to_le64(wr->compare_add); - } else { - aseg->fetchadd_swap_data = cpu_to_le64(wr->compare_add); - aseg->cmp_data = 0; + __le64 *ext_seg; + int i; + + for (i = 0; i < ex_sge_num; i += EXT_SGE_BYTE_8_NUM, (*sge_idx)++) { + ext_seg = get_send_extend_sge(qp, ((*sge_idx) & + (qp->sge.sge_cnt - 1))); + /* In the extended atomic scenario, the data_add parameter + * passes the address where the extended atomic data is stored. + */ + *ext_seg = data_addr ? cpu_to_le64(*(data_addr + i)) : 0; + *(ext_seg + 1) = data_addr ? + cpu_to_le64(*(data_addr + (i + 1))) : 0; } } +static int set_atomic_seg(struct hns_roce_qp *qp, + const struct ib_send_wr *wr, unsigned int msg_len, + void *dseg, unsigned int *sge_idx) +{ + struct hns_roce_wqe_atomic_seg *aseg; + u32 ex_sge_num; + + dseg += sizeof(struct hns_roce_v2_wqe_data_seg); + aseg = dseg; + + if (msg_len == STANDARD_ATOMIC_BYTE_8) { + if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + aseg->fetchadd_swap_data = + cpu_to_le64(atomic_wr(wr)->swap); + aseg->cmp_data = + cpu_to_le64(atomic_wr(wr)->compare_add); + } else { + aseg->fetchadd_swap_data = + cpu_to_le64(atomic_wr(wr)->compare_add); + aseg->cmp_data = 0; + } + } else if (msg_len == EXTEND_ATOMIC_BYTE_16 || + msg_len == EXTEND_ATOMIC_BYTE_32 || + msg_len == EXTEND_ATOMIC_BYTE_64) { + ex_sge_num = msg_len >> 3; + aseg->fetchadd_swap_data = 0; + aseg->cmp_data = 0; + if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + set_extend_atomic_seg(qp, ex_sge_num, sge_idx, + (u64 *)atomic_wr(wr)->swap); + set_extend_atomic_seg(qp, ex_sge_num, sge_idx, + (u64 *)atomic_wr(wr)->compare_add); + } else { + set_extend_atomic_seg(qp, ex_sge_num, sge_idx, + (u64 *)atomic_wr(wr)->compare_add); + set_extend_atomic_seg(qp, ex_sge_num, sge_idx, 0); + } + } else + return -EINVAL; + + return 0; +} + static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, unsigned int *sge_ind) { @@ -545,8 +595,12 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, dseg = wqe; set_data_seg_v2(dseg, wr->sg_list); - wqe += sizeof(struct hns_roce_v2_wqe_data_seg); - set_atomic_seg(wqe, atomic_wr(wr)); + ret = set_atomic_seg(qp, wr, rc_sq_wqe->msg_len, + dseg, &sge_idx); + if (ret) { + *bad_wr = wr; + goto out; + } roce_set_field(rc_sq_wqe->byte_16, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, @@ -1668,7 +1722,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->max_srq_desc_sz = HNS_ROCE_V2_MAX_SRQ_DESC_SZ; caps->qpc_entry_sz = HNS_ROCE_V2_QPC_ENTRY_SZ; caps->irrl_entry_sz = HNS_ROCE_V2_IRRL_ENTRY_SZ; - caps->trrl_entry_sz = HNS_ROCE_V2_TRRL_ENTRY_SZ; + caps->trrl_entry_sz = HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ; caps->cqc_entry_sz = HNS_ROCE_V2_CQC_ENTRY_SZ; caps->srqc_entry_sz = HNS_ROCE_V2_SRQC_ENTRY_SZ; caps->mtpt_entry_sz = HNS_ROCE_V2_MTPT_ENTRY_SZ; @@ -2860,19 +2914,19 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, break; case HNS_ROCE_SQ_OPCODE_ATOMIC_COMP_AND_SWAP: wc->opcode = IB_WC_COMP_SWAP; - wc->byte_len = 8; + wc->byte_len = le32_to_cpu(cqe->byte_cnt); break; case HNS_ROCE_SQ_OPCODE_ATOMIC_FETCH_AND_ADD: wc->opcode = IB_WC_FETCH_ADD; - wc->byte_len = 8; + wc->byte_len = le32_to_cpu(cqe->byte_cnt); break; case HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_COMP_AND_SWAP: wc->opcode = IB_WC_MASKED_COMP_SWAP; - wc->byte_len = 8; + wc->byte_len = le32_to_cpu(cqe->byte_cnt); break; case HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_FETCH_AND_ADD: wc->opcode = IB_WC_MASKED_FETCH_ADD; - wc->byte_len = 8; + wc->byte_len = le32_to_cpu(cqe->byte_cnt); break; case HNS_ROCE_SQ_OPCODE_FAST_REG_WR: wc->opcode = IB_WC_REG_MR; @@ -3211,6 +3265,9 @@ static void set_access_flags(struct hns_roce_qp *hr_qp, roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0); + roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S, + !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); + roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_EXT_ATE_S, 0); } static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp, @@ -3578,6 +3635,12 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, IB_ACCESS_REMOTE_ATOMIC)); roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0); + roce_set_bit(context->byte_76_srqn_op_en, + V2_QPC_BYTE_76_EXT_ATE_S, + !!(attr->qp_access_flags & + IB_ACCESS_REMOTE_ATOMIC)); + roce_set_bit(qpc_mask->byte_76_srqn_op_en, + V2_QPC_BYTE_76_EXT_ATE_S, 0); } else { roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, !!(hr_qp->access_flags & IB_ACCESS_REMOTE_READ)); @@ -3593,6 +3656,13 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC)); roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0); + + roce_set_bit(context->byte_76_srqn_op_en, + V2_QPC_BYTE_76_EXT_ATE_S, + !!(hr_qp->access_flags & + IB_ACCESS_REMOTE_ATOMIC)); + roce_set_bit(qpc_mask->byte_76_srqn_op_en, + V2_QPC_BYTE_76_EXT_ATE_S, 0); } roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 76a14db..0a9d1e5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -81,6 +81,7 @@ #define HNS_ROCE_V2_QPC_ENTRY_SZ 256 #define HNS_ROCE_V2_IRRL_ENTRY_SZ 64 #define HNS_ROCE_V2_TRRL_ENTRY_SZ 48 +#define HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ 100 #define HNS_ROCE_V2_CQC_ENTRY_SZ 64 #define HNS_ROCE_V2_SRQC_ENTRY_SZ 64 #define HNS_ROCE_V2_MTPT_ENTRY_SZ 64 @@ -158,6 +159,12 @@ enum { #define HNS_ROCE_V2_CQE_QPN_MASK 0x3ffff +#define EXT_SGE_BYTE_8_NUM 2 +#define STANDARD_ATOMIC_BYTE_8 0x8 +#define EXTEND_ATOMIC_BYTE_16 0x10 +#define EXTEND_ATOMIC_BYTE_32 0x20 +#define EXTEND_ATOMIC_BYTE_64 0x40 + enum { HNS_ROCE_V2_WQE_OP_SEND = 0x0, HNS_ROCE_V2_WQE_OP_SEND_WITH_INV = 0x1, @@ -644,6 +651,7 @@ struct hns_roce_v2_qp_context { #define V2_QPC_BYTE_76_RQIE_S 28 +#define V2_QPC_BYTE_76_EXT_ATE_S 29 #define V2_QPC_BYTE_76_RQ_VLAN_EN_S 30 #define V2_QPC_BYTE_80_RX_CQN_S 0 #define V2_QPC_BYTE_80_RX_CQN_M GENMASK(23, 0) -- 2.8.1