On Wed, Feb 05, 2020 at 02:04:09PM +0800, Weihang Li wrote: > > > On 2020/1/27 13:52, Leon Romanovsky wrote: > > On Sun, Jan 26, 2020 at 10:58:35PM +0800, Weihang Li wrote: > >> From: Xi Wang <wangxi11@xxxxxxxxxx> > >> > >> The eqe has a private multi-hop addressing implementation, but there is > >> already a set of interfaces in the hns driver that can achieve this. > >> > >> So, simplify the eqe buffer allocation process by using the mtr interface > >> and remove large amount of repeated logic. > >> > >> Signed-off-by: Xi Wang <wangxi11@xxxxxxxxxx> > >> Signed-off-by: Weihang Li <liweihang@xxxxxxxxxx> > >> --- > >> drivers/infiniband/hw/hns/hns_roce_device.h | 10 +- > >> drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 481 ++++++---------------------- > >> 2 files changed, 108 insertions(+), 383 deletions(-) > >> > >> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h > >> index a4c45bf..dab3f3c 100644 > >> --- a/drivers/infiniband/hw/hns/hns_roce_device.h > >> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h > >> @@ -757,14 +757,8 @@ struct hns_roce_eq { > >> int eqe_ba_pg_sz; > >> int eqe_buf_pg_sz; > >> int hop_num; > >> - u64 *bt_l0; /* Base address table for L0 */ > >> - u64 **bt_l1; /* Base address table for L1 */ > >> - u64 **buf; > >> - dma_addr_t l0_dma; > >> - dma_addr_t *l1_dma; > >> - dma_addr_t *buf_dma; > >> - u32 l0_last_num; /* L0 last chunk num */ > >> - u32 l1_last_num; /* L1 last chunk num */ > >> + struct hns_roce_mtr mtr; > >> + struct hns_roce_buf buf; > >> int eq_max_cnt; > >> int eq_period; > >> int shift; > >> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > >> index c462b19..88f2e76 100644 > >> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > >> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > >> @@ -5287,44 +5287,24 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq) > >> hns_roce_write64(hr_dev, doorbell, eq->doorbell); > >> } > >> > >> -static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry) > >> +static inline void *get_eqe_buf(struct hns_roce_eq *eq, unsigned long offset) > >> { > >> u32 buf_chk_sz; > >> - unsigned long off; > >> > >> buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT); > >> - off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE; > >> - > >> - return (struct hns_roce_aeqe *)((char *)(eq->buf_list->buf) + > >> - off % buf_chk_sz); > >> -} > >> - > >> -static struct hns_roce_aeqe *mhop_get_aeqe(struct hns_roce_eq *eq, u32 entry) > >> -{ > >> - u32 buf_chk_sz; > >> - unsigned long off; > >> - > >> - buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT); > >> - > >> - off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE; > >> - > >> - if (eq->hop_num == HNS_ROCE_HOP_NUM_0) > >> - return (struct hns_roce_aeqe *)((u8 *)(eq->bt_l0) + > >> - off % buf_chk_sz); > >> + if (eq->buf.nbufs == 1) > >> + return eq->buf.direct.buf + offset % buf_chk_sz; > >> else > >> - return (struct hns_roce_aeqe *)((u8 *) > >> - (eq->buf[off / buf_chk_sz]) + off % buf_chk_sz); > >> + return eq->buf.page_list[offset / buf_chk_sz].buf + > >> + offset % buf_chk_sz; > >> } > >> > >> static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq) > >> { > >> struct hns_roce_aeqe *aeqe; > >> > >> - if (!eq->hop_num) > >> - aeqe = get_aeqe_v2(eq, eq->cons_index); > >> - else > >> - aeqe = mhop_get_aeqe(eq, eq->cons_index); > >> - > >> + aeqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) * > >> + HNS_ROCE_AEQ_ENTRY_SIZE); > >> return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^ > >> !!(eq->cons_index & eq->entries)) ? aeqe : NULL; > >> } > >> @@ -5417,44 +5397,12 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, > >> return aeqe_found; > >> } > >> > >> -static struct hns_roce_ceqe *get_ceqe_v2(struct hns_roce_eq *eq, u32 entry) > >> -{ > >> - u32 buf_chk_sz; > >> - unsigned long off; > >> - > >> - buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT); > >> - off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE; > >> - > >> - return (struct hns_roce_ceqe *)((char *)(eq->buf_list->buf) + > >> - off % buf_chk_sz); > >> -} > >> - > >> -static struct hns_roce_ceqe *mhop_get_ceqe(struct hns_roce_eq *eq, u32 entry) > >> -{ > >> - u32 buf_chk_sz; > >> - unsigned long off; > >> - > >> - buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT); > >> - > >> - off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE; > >> - > >> - if (eq->hop_num == HNS_ROCE_HOP_NUM_0) > >> - return (struct hns_roce_ceqe *)((u8 *)(eq->bt_l0) + > >> - off % buf_chk_sz); > >> - else > >> - return (struct hns_roce_ceqe *)((u8 *)(eq->buf[off / > >> - buf_chk_sz]) + off % buf_chk_sz); > >> -} > >> - > >> static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq) > >> { > >> struct hns_roce_ceqe *ceqe; > >> > >> - if (!eq->hop_num) > >> - ceqe = get_ceqe_v2(eq, eq->cons_index); > >> - else > >> - ceqe = mhop_get_ceqe(eq, eq->cons_index); > >> - > >> + ceqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) * > >> + HNS_ROCE_CEQ_ENTRY_SIZE); > >> return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^ > >> (!!(eq->cons_index & eq->entries)) ? ceqe : NULL; > >> } > >> @@ -5614,90 +5562,11 @@ static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, int eqn) > >> dev_err(dev, "[mailbox cmd] destroy eqc(%d) failed.\n", eqn); > >> } > >> > >> -static void hns_roce_mhop_free_eq(struct hns_roce_dev *hr_dev, > >> - struct hns_roce_eq *eq) > >> +static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) > >> { > >> - struct device *dev = hr_dev->dev; > >> - u64 idx; > >> - u64 size; > >> - u32 buf_chk_sz; > >> - u32 bt_chk_sz; > >> - u32 mhop_num; > >> - int eqe_alloc; > >> - int i = 0; > >> - int j = 0; > >> - > >> - mhop_num = hr_dev->caps.eqe_hop_num; > >> - buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT); > >> - bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT); > >> - > >> - if (mhop_num == HNS_ROCE_HOP_NUM_0) { > >> - dma_free_coherent(dev, (unsigned int)(eq->entries * > >> - eq->eqe_size), eq->bt_l0, eq->l0_dma); > >> - return; > >> - } > >> - > >> - dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma); > >> - if (mhop_num == 1) { > >> - for (i = 0; i < eq->l0_last_num; i++) { > >> - if (i == eq->l0_last_num - 1) { > >> - eqe_alloc = i * (buf_chk_sz / eq->eqe_size); > >> - size = (eq->entries - eqe_alloc) * eq->eqe_size; > >> - dma_free_coherent(dev, size, eq->buf[i], > >> - eq->buf_dma[i]); > >> - break; > >> - } > >> - dma_free_coherent(dev, buf_chk_sz, eq->buf[i], > >> - eq->buf_dma[i]); > >> - } > >> - } else if (mhop_num == 2) { > >> - for (i = 0; i < eq->l0_last_num; i++) { > >> - dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i], > >> - eq->l1_dma[i]); > >> - > >> - for (j = 0; j < bt_chk_sz / BA_BYTE_LEN; j++) { > >> - idx = i * (bt_chk_sz / BA_BYTE_LEN) + j; > >> - if ((i == eq->l0_last_num - 1) > >> - && j == eq->l1_last_num - 1) { > >> - eqe_alloc = (buf_chk_sz / eq->eqe_size) > >> - * idx; > >> - size = (eq->entries - eqe_alloc) > >> - * eq->eqe_size; > >> - dma_free_coherent(dev, size, > >> - eq->buf[idx], > >> - eq->buf_dma[idx]); > >> - break; > >> - } > >> - dma_free_coherent(dev, buf_chk_sz, eq->buf[idx], > >> - eq->buf_dma[idx]); > >> - } > >> - } > >> - } > >> - kfree(eq->buf_dma); > >> - kfree(eq->buf); > >> - kfree(eq->l1_dma); > >> - kfree(eq->bt_l1); > >> - eq->buf_dma = NULL; > >> - eq->buf = NULL; > >> - eq->l1_dma = NULL; > >> - eq->bt_l1 = NULL; > >> -} > >> - > >> -static void hns_roce_v2_free_eq(struct hns_roce_dev *hr_dev, > >> - struct hns_roce_eq *eq) > >> -{ > >> - u32 buf_chk_sz; > >> - > >> - buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT); > >> - > >> - if (hr_dev->caps.eqe_hop_num) { > >> - hns_roce_mhop_free_eq(hr_dev, eq); > >> - return; > >> - } > >> - > >> - dma_free_coherent(hr_dev->dev, buf_chk_sz, eq->buf_list->buf, > >> - eq->buf_list->map); > >> - kfree(eq->buf_list); > >> + if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0) > >> + hns_roce_mtr_cleanup(hr_dev, &eq->mtr); > >> + hns_roce_buf_free(hr_dev, eq->buf.size, &eq->buf); > >> } > >> > >> static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, > >> @@ -5705,6 +5574,8 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, > >> void *mb_buf) > >> { > >> struct hns_roce_eq_context *eqc; > >> + u64 ba[MTT_MIN_COUNT] = { 0 }; > >> + int count; > >> > >> eqc = mb_buf; > >> memset(eqc, 0, sizeof(struct hns_roce_eq_context)); > >> @@ -5720,10 +5591,23 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, > >> eq->eqe_buf_pg_sz = hr_dev->caps.eqe_buf_pg_sz; > >> eq->shift = ilog2((unsigned int)eq->entries); > >> > >> - if (!eq->hop_num) > >> - eq->eqe_ba = eq->buf_list->map; > >> - else > >> - eq->eqe_ba = eq->l0_dma; > >> + /* if not muti-hop, eqe buffer only use one trunk */ > >> + if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0) { > >> + eq->eqe_ba = eq->buf.direct.map; > >> + eq->cur_eqe_ba = eq->eqe_ba; > >> + if (eq->buf.npages > 1) > >> + eq->nxt_eqe_ba = eq->eqe_ba + (1 << eq->eqe_buf_pg_sz); > >> + else > >> + eq->nxt_eqe_ba = eq->eqe_ba; > >> + } else { > >> + count = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, ba, > >> + MTT_MIN_COUNT, &eq->eqe_ba); > >> + eq->cur_eqe_ba = ba[0]; > >> + if (count > 1) > >> + eq->nxt_eqe_ba = ba[1]; > >> + else > >> + eq->nxt_eqe_ba = ba[0]; > >> + } > >> > >> /* set eqc state */ > >> roce_set_field(eqc->byte_4, HNS_ROCE_EQC_EQ_ST_M, HNS_ROCE_EQC_EQ_ST_S, > >> @@ -5821,220 +5705,97 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, > >> HNS_ROCE_EQC_NXT_EQE_BA_H_S, eq->nxt_eqe_ba >> 44); > >> } > >> > >> -static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev, > >> - struct hns_roce_eq *eq) > >> +static int map_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq, > >> + u32 page_shift) > >> { > >> - struct device *dev = hr_dev->dev; > >> - int eq_alloc_done = 0; > >> - int eq_buf_cnt = 0; > >> - int eqe_alloc; > >> - u32 buf_chk_sz; > >> - u32 bt_chk_sz; > >> - u32 mhop_num; > >> - u64 size; > >> - u64 idx; > >> + struct hns_roce_buf_region region = {}; > >> + dma_addr_t *buf_list = NULL; > >> int ba_num; > >> - int bt_num; > >> - int record_i; > >> - int record_j; > >> - int i = 0; > >> - int j = 0; > >> - > >> - mhop_num = hr_dev->caps.eqe_hop_num; > >> - buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT); > >> - bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT); > >> + int ret; > >> > >> ba_num = DIV_ROUND_UP(PAGE_ALIGN(eq->entries * eq->eqe_size), > >> - buf_chk_sz); > >> - bt_num = DIV_ROUND_UP(ba_num, bt_chk_sz / BA_BYTE_LEN); > >> + 1 << page_shift); > >> + hns_roce_init_buf_region(®ion, hr_dev->caps.eqe_hop_num, 0, ba_num); > >> > >> - if (mhop_num == HNS_ROCE_HOP_NUM_0) { > >> - if (eq->entries > buf_chk_sz / eq->eqe_size) { > >> - dev_err(dev, "eq entries %d is larger than buf_pg_sz!", > >> - eq->entries); > >> - return -EINVAL; > >> - } > >> - eq->bt_l0 = dma_alloc_coherent(dev, eq->entries * eq->eqe_size, > >> - &(eq->l0_dma), GFP_KERNEL); > >> - if (!eq->bt_l0) > >> - return -ENOMEM; > >> - > >> - eq->cur_eqe_ba = eq->l0_dma; > >> - eq->nxt_eqe_ba = 0; > >> + /* alloc a tmp list for storing eq buf address */ > >> + ret = hns_roce_alloc_buf_list(®ion, &buf_list, 1); > >> + if (ret) { > >> + dev_err(hr_dev->dev, "alloc eq buf_list error\n"); > > > > The same comment like we gave for bnxt driver, no dev_* prints inside > > driver, use ibdev_*. > > > > Thanks > > > > Hi Leon, > > map_eq_buf() is called before ib_register_device(), so we can't use > ibdev_* here. As long as map_eq_buf() is called after ib_alloc_device(), you will be fine. Thanks > > Thanks for your reminder, another patch that replace other dev_* in > hns driver with ibdev_* is on preparing. > > Weihang > > > . > > >