On 2018/9/8 1:49, Leon Romanovsky wrote: > On Thu, Sep 06, 2018 at 04:49:40PM +0800, Yixian Liu wrote: >> This patch adds memory window (mw) allocation support in >> the kernel space. >> >> Signed-off-by: Yixian Liu <liuyixian@xxxxxxxxxx> >> --- >> drivers/infiniband/hw/hns/hns_roce_device.h | 21 +++++ >> drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 48 +++++++++++ >> drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 10 +++ >> drivers/infiniband/hw/hns/hns_roce_main.c | 6 ++ >> drivers/infiniband/hw/hns/hns_roce_mr.c | 120 ++++++++++++++++++++++++++++ >> 5 files changed, 205 insertions(+) >> >> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h >> index cfb88a4..4fe4a3b 100644 >> --- a/drivers/infiniband/hw/hns/hns_roce_device.h >> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h >> @@ -193,6 +193,7 @@ enum { >> HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2), >> HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3), >> HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4), >> + HNS_ROCE_CAP_FLAG_MW = BIT(7), >> }; >> >> enum hns_roce_mtt_type { >> @@ -293,6 +294,16 @@ struct hns_roce_mtt { >> enum hns_roce_mtt_type mtt_type; >> }; >> >> +struct hns_roce_mw { >> + struct ib_mw ibmw; >> + u32 pdn; >> + u32 rkey; >> + int enabled; /* MW's active status */ >> + u32 pbl_hop_num; >> + u32 pbl_ba_pg_sz; >> + u32 pbl_buf_pg_sz; >> +}; >> + >> /* Only support 4K page size for mr register */ >> #define MR_SIZE_4K 0 >> >> @@ -765,6 +776,8 @@ struct hns_roce_hw { >> struct hns_roce_mr *mr, int flags, u32 pdn, >> int mr_access_flags, u64 iova, u64 size, >> void *mb_buf); >> + int (*mw_write_mtpt)(void *mb_buf, struct hns_roce_mw *mw, >> + unsigned long mtpt_idx); >> void (*write_cqc)(struct hns_roce_dev *hr_dev, >> struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts, >> dma_addr_t dma_handle, int nent, u32 vector); >> @@ -864,6 +877,11 @@ static inline struct hns_roce_mr *to_hr_mr(struct ib_mr *ibmr) >> return container_of(ibmr, struct hns_roce_mr, ibmr); >> } >> >> +static inline struct hns_roce_mw *to_hr_mw(struct ib_mw *ibmw) >> +{ >> + return container_of(ibmw, struct hns_roce_mw, ibmw); >> +} >> + >> static inline struct hns_roce_qp *to_hr_qp(struct ib_qp *ibqp) >> { >> return container_of(ibqp, struct hns_roce_qp, ibqp); >> @@ -975,6 +993,9 @@ int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev, >> unsigned long mpt_index); >> unsigned long key_to_hw_index(u32 key); >> >> +struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type, >> + struct ib_udata *udata); >> + >> void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size, >> struct hns_roce_buf *buf); >> int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, >> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c >> index b16ad95..5b65f2b 100644 >> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c >> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c >> @@ -1255,6 +1255,10 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) >> HNS_ROCE_CAP_FLAG_RQ_INLINE | >> HNS_ROCE_CAP_FLAG_RECORD_DB | >> HNS_ROCE_CAP_FLAG_SQ_RECORD_DB; >> + >> + if (hr_dev->pci_dev->revision == 0x21) >> + caps->flags |= HNS_ROCE_CAP_FLAG_MW; >> + >> caps->pkey_table_len[0] = 1; >> caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM; >> caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM; >> @@ -1817,6 +1821,49 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev, >> return 0; >> } >> >> +static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw, >> + unsigned long mtpt_idx) >> +{ >> + struct hns_roce_v2_mpt_entry *mpt_entry; >> + >> + mpt_entry = mb_buf; >> + memset(mpt_entry, 0, sizeof(*mpt_entry)); >> + >> + roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, >> + V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE); >> + roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, >> + V2_MPT_BYTE_4_PD_S, mw->pdn); >> + roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M, >> + V2_MPT_BYTE_4_PBL_HOP_NUM_S, mw->pbl_hop_num == >> + HNS_ROCE_HOP_NUM_0 ? 0 : mw->pbl_hop_num); >> + roce_set_field(mpt_entry->byte_4_pd_hop_st, >> + V2_MPT_BYTE_4_PBL_BA_PG_SZ_M, >> + V2_MPT_BYTE_4_PBL_BA_PG_SZ_S, >> + mw->pbl_ba_pg_sz + PG_SHIFT_OFFSET); >> + mpt_entry->byte_4_pd_hop_st = cpu_to_le32(mpt_entry->byte_4_pd_hop_st); >> + >> + roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1); >> + roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1); >> + mpt_entry->byte_8_mw_cnt_en = cpu_to_le32(mpt_entry->byte_8_mw_cnt_en); >> + >> + roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0); >> + roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 1); >> + roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1); >> + roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BQP_S, >> + mw->ibmw.type == IB_MW_TYPE_1 ? 0 : 1); >> + mpt_entry->byte_12_mw_pa = cpu_to_le32(mpt_entry->byte_12_mw_pa); >> + >> + roce_set_field(mpt_entry->byte_64_buf_pa1, >> + V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M, >> + V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S, >> + mw->pbl_buf_pg_sz + PG_SHIFT_OFFSET); >> + mpt_entry->byte_64_buf_pa1 = cpu_to_le32(mpt_entry->byte_64_buf_pa1); >> + >> + mpt_entry->lkey = cpu_to_le32(mw->rkey); >> + >> + return 0; >> +} >> + >> static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n) >> { >> return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf, >> @@ -5159,6 +5206,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { >> .set_mac = hns_roce_v2_set_mac, >> .write_mtpt = hns_roce_v2_write_mtpt, >> .rereg_write_mtpt = hns_roce_v2_rereg_write_mtpt, >> + .mw_write_mtpt = hns_roce_v2_mw_write_mtpt, >> .write_cqc = hns_roce_v2_write_cqc, >> .set_hem = hns_roce_v2_set_hem, >> .clear_hem = hns_roce_v2_clear_hem, >> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h >> index 14aa308..746fe80 100644 >> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h >> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h >> @@ -324,6 +324,7 @@ struct hns_roce_v2_cq_context { >> >> enum{ >> V2_MPT_ST_VALID = 0x1, >> + V2_MPT_ST_FREE = 0x2, >> }; >> >> enum hns_roce_v2_qp_state { >> @@ -878,8 +879,17 @@ struct hns_roce_v2_mpt_entry { >> >> #define V2_MPT_BYTE_8_LW_EN_S 7 >> >> +#define V2_MPT_BYTE_8_MW_CNT_S 8 >> +#define V2_MPT_BYTE_8_MW_CNT_M GENMASK(31, 8) >> + >> #define V2_MPT_BYTE_12_PA_S 1 >> >> +#define V2_MPT_BYTE_12_MR_MW_S 4 >> + >> +#define V2_MPT_BYTE_12_BPD_S 5 >> + >> +#define V2_MPT_BYTE_12_BQP_S 6 >> + >> #define V2_MPT_BYTE_12_INNER_PA_VLD_S 7 >> >> #define V2_MPT_BYTE_12_MW_BIND_QPN_S 8 >> diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c >> index c5cae9a..ff71d91 100644 >> --- a/drivers/infiniband/hw/hns/hns_roce_main.c >> +++ b/drivers/infiniband/hw/hns/hns_roce_main.c >> @@ -584,6 +584,12 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) >> ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR); >> } >> >> + /* MW */ >> + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) { >> + ib_dev->alloc_mw = hns_roce_alloc_mw; >> + ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_ALLOC_MW); >> + } >> + >> /* OTHERS */ >> ib_dev->get_port_immutable = hns_roce_port_immutable; >> ib_dev->disassociate_ucontext = hns_roce_disassociate_ucontext; >> diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c >> index eb26a5f..84779dd 100644 >> --- a/drivers/infiniband/hw/hns/hns_roce_mr.c >> +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c >> @@ -1201,3 +1201,123 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr) >> >> return ret; >> } >> + >> +static int hns_roce_mw_free(struct hns_roce_dev *hr_dev, >> + struct hns_roce_mw *mw) >> +{ >> + struct device *dev = hr_dev->dev; >> + int ret = 0; >> + >> + if (mw->enabled) { >> + ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mw->rkey) >> + & (hr_dev->caps.num_mtpts - 1)); >> + if (ret) { >> + dev_warn(dev, "MW HW2SW_MPT failed (%d)\n", ret); >> + return ret; >> + } >> + } >> + >> + if (mw->enabled) > > I see two "if (mw->enabled)", one after another. > It should be combined. Thanks, I will fix it next version. > >> + hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, >> + key_to_hw_index(mw->rkey)); >> + >> + hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, >> + key_to_hw_index(mw->rkey), BITMAP_NO_RR); >> + >> + return ret; >> +}