Re: [PATCH for-next 1/6] RDMA/hns: Add rq inline data support for hip08 RoCE

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sat, Dec 23, 2017 at 04:22:17PM +0800, Lijun Ou wrote:
> This patch mainly implement rq inline data feature for hip08
> RoCE in kernel mode.
>
> Signed-off-by: Lijun Ou <oulijun@xxxxxxxxxx>
> ---
>  drivers/infiniband/hw/hns/hns_roce_device.h | 18 +++++++++
>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 63 ++++++++++++++++++++++++++++-
>  drivers/infiniband/hw/hns/hns_roce_qp.c     | 52 ++++++++++++++++++++----
>  3 files changed, 125 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
> index dcfd209..8d123d3 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_device.h
> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h
> @@ -178,6 +178,7 @@ enum {
>  enum {
>  	HNS_ROCE_CAP_FLAG_REREG_MR		= BIT(0),
>  	HNS_ROCE_CAP_FLAG_ROCE_V1_V2		= BIT(1),
> +	HNS_ROCE_CAP_FLAG_RQ_INLINE		= BIT(2)
>  };
>
>  enum hns_roce_mtt_type {
> @@ -446,6 +447,21 @@ struct hns_roce_cmd_mailbox {
>
>  struct hns_roce_dev;
>
> +struct hns_roce_rinl_sge {
> +	void			*addr;
> +	u32			len;
> +};
> +
> +struct hns_roce_rinl_wqe {
> +	struct hns_roce_rinl_sge *sg_list;
> +	u32			 sge_cnt;
> +};
> +
> +struct hns_roce_rinl_buf {
> +	struct hns_roce_rinl_wqe *wqe_list;
> +	u32			 wqe_cnt;
> +};
> +
>  struct hns_roce_qp {
>  	struct ib_qp		ibqp;
>  	struct hns_roce_buf	hr_buf;
> @@ -477,6 +493,8 @@ struct hns_roce_qp {
>
>  	struct hns_roce_sge	sge;
>  	u32			next_sge;
> +
> +	struct hns_roce_rinl_buf rq_inl_buf;
>  };
>
>  struct hns_roce_sqp {
> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> index 4d3e976..b17dcfa 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> @@ -299,6 +299,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
>  	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
>  	struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
>  	struct hns_roce_v2_wqe_data_seg *dseg;
> +	struct hns_roce_rinl_sge *sge_list;
>  	struct device *dev = hr_dev->dev;
>  	struct hns_roce_v2_db rq_db;
>  	unsigned long flags;
> @@ -347,6 +348,14 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
>  			dseg[i].addr = 0;
>  		}
>
> +		/* rq support inline data */
> +		sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list;
> +		hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt = (u32)wr->num_sge;
> +		for (i = 0; i < wr->num_sge; i++) {
> +			sge_list[i].addr = (void *)(u64)wr->sg_list[i].addr;
> +			sge_list[i].len = wr->sg_list[i].length;
> +		}
> +
>  		hr_qp->rq.wrid[ind] = wr->wr_id;
>
>  		ind = (ind + 1) & (hr_qp->rq.wqe_cnt - 1);
> @@ -961,7 +970,8 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
>  	caps->chunk_sz		= HNS_ROCE_V2_TABLE_CHUNK_SIZE;
>
>  	caps->flags		= HNS_ROCE_CAP_FLAG_REREG_MR |
> -				  HNS_ROCE_CAP_FLAG_ROCE_V1_V2;
> +				  HNS_ROCE_CAP_FLAG_ROCE_V1_V2 |
> +				  HNS_ROCE_CAP_FLAG_RQ_INLINE;
>  	caps->pkey_table_len[0] = 1;
>  	caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
>  	caps->ceqe_depth	= HNS_ROCE_V2_COMP_EQE_NUM;
> @@ -1476,6 +1486,7 @@ static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq,
>  static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
>  				struct hns_roce_qp **cur_qp, struct ib_wc *wc)
>  {
> +	struct hns_roce_rinl_sge *sge_list;
>  	struct hns_roce_dev *hr_dev;
>  	struct hns_roce_v2_cqe *cqe;
>  	struct hns_roce_qp *hr_qp;
> @@ -1673,6 +1684,46 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
>  			break;
>  		}
>
> +		if ((wc->qp->qp_type == IB_QPT_RC ||
> +		     wc->qp->qp_type == IB_QPT_UC) &&
> +		    (opcode == HNS_ROCE_V2_OPCODE_SEND ||
> +		    opcode == HNS_ROCE_V2_OPCODE_SEND_WITH_IMM ||
> +		    opcode == HNS_ROCE_V2_OPCODE_SEND_WITH_INV) &&
> +		    (roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_RQ_INLINE_S))) {

It is better to put in separate function or inline function if you very worried about performance issues.

> +			u32 wr_num, wr_cnt, sge_num, sge_cnt;
> +			u32 data_len, size;
> +			u8 *wqe_buf;
> +
> +			wr_num = (u16)roce_get_field(cqe->byte_4,
> +					V2_CQE_BYTE_4_WQE_INDX_M,
> +					V2_CQE_BYTE_4_WQE_INDX_S) & 0xffff;
> +			wr_cnt = wr_num & ((*cur_qp)->rq.wqe_cnt - 1);
> +
> +			sge_list =
> +				(*cur_qp)->rq_inl_buf.wqe_list[wr_cnt].sg_list;
> +			sge_num =
> +				(*cur_qp)->rq_inl_buf.wqe_list[wr_cnt].sge_cnt;

Such level of indirection means that it is time to introduce temp variables.

> +			wqe_buf = (u8 *)get_recv_wqe(*cur_qp, wr_cnt);
> +			data_len = wc->byte_len;
> +
> +			for (sge_cnt = 0; (sge_cnt < sge_num) && (data_len);
> +			     sge_cnt++) {
> +				size = sge_list[sge_cnt].len < data_len ?
> +				       sge_list[sge_cnt].len : data_len;

Use min(a, b) macro available in kernel headers.

> +
> +				memcpy((void *)sge_list[sge_cnt].addr,
> +					(void *)wqe_buf, size);
> +
> +				data_len -= size;
> +				wqe_buf += size;
> +			}
> +
> +			if (data_len) {
> +				wc->status = IB_WC_LOC_LEN_ERR;
> +				return -EAGAIN;
> +			}
> +		}

To be honest the whole new chunk is very strange, it has casting in
almost every line and looks very suspicious. Can you clean the
variables/function return values to avoid castings?

> +
>  		/* Update tail pointer, record wr_id */
>  		wq = &(*cur_qp)->rq;
>  		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
> @@ -1972,6 +2023,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
>  		     !!(attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC));
>  	roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0);
>
> +	roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 1);
>  	roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 0);
>
>  	roce_set_field(context->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M,
> @@ -3114,6 +3166,15 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
>  		hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
>  	}
>
> +	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
> +		if (hr_qp->rq_inl_buf.wqe_list) {
> +			kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
> +			kfree(hr_qp->rq_inl_buf.wqe_list);
> +			hr_qp->rq_inl_buf.wqe_list[0].sg_list = NULL;

Did it work? You called to kfree the hr_qp->rq_inl_buf.wqe_list a line above?

> +			hr_qp->rq_inl_buf.wqe_list = NULL;
> +		}
> +	}
> +
>  	return 0;
>  }
>
> diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
> index 69e2584..7c75f21b 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_qp.c
> +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
> @@ -494,6 +494,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
>  	int ret = 0;
>  	u32 page_shift;
>  	u32 npages;
> +	int i;
>
>  	mutex_init(&hr_qp->mutex);
>  	spin_lock_init(&hr_qp->sq.lock);
> @@ -513,18 +514,42 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
>  		goto err_out;
>  	}
>
> +	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
> +		/* allocate recv inline buf */
> +		hr_qp->rq_inl_buf.wqe_list = kmalloc_array(hr_qp->rq.wqe_cnt,
> +					       sizeof(struct hns_roce_rinl_wqe),
> +					       GFP_KERNEL);
> +		if (!hr_qp->rq_inl_buf.wqe_list)
> +			goto err_out;
> +
> +		hr_qp->rq_inl_buf.wqe_cnt = hr_qp->rq.wqe_cnt;
> +
> +		hr_qp->rq_inl_buf.wqe_list[0].sg_list =
> +				kmalloc_array(hr_qp->rq_inl_buf.wqe_cnt *
> +					      init_attr->cap.max_recv_sge,
> +					      sizeof(struct hns_roce_rinl_sge),
> +					      GFP_KERNEL);
> +		if (!hr_qp->rq_inl_buf.wqe_list[0].sg_list)
> +			goto err_wqe_list;
> +
> +		for (i = 1; i < hr_qp->rq_inl_buf.wqe_cnt; i++)
> +			hr_qp->rq_inl_buf.wqe_list[i].sg_list =
> +				&hr_qp->rq_inl_buf.wqe_list[0].sg_list[i *
> +				init_attr->cap.max_recv_sge];
> +	}

I'm not an expert in this area of code, but the call to kmalloc_array on
the first field of already kmalloc_array created pointer looks strange.

Don't know what you can do with that comment, maybe other reviewers can suggest.

> +
>  	if (ib_pd->uobject) {
>  		if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
>  			dev_err(dev, "ib_copy_from_udata error for create qp\n");
>  			ret = -EFAULT;
> -			goto err_out;
> +			goto err_rq_sge_list;
>  		}
>
>  		ret = hns_roce_set_user_sq_size(hr_dev, &init_attr->cap, hr_qp,
>  						&ucmd);
>  		if (ret) {
>  			dev_err(dev, "hns_roce_set_user_sq_size error for create qp\n");
> -			goto err_out;
> +			goto err_rq_sge_list;
>  		}
>
>  		hr_qp->umem = ib_umem_get(ib_pd->uobject->context,
> @@ -533,7 +558,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
>  		if (IS_ERR(hr_qp->umem)) {
>  			dev_err(dev, "ib_umem_get error for create qp\n");
>  			ret = PTR_ERR(hr_qp->umem);
> -			goto err_out;
> +			goto err_rq_sge_list;
>  		}
>
>  		hr_qp->mtt.mtt_type = MTT_TYPE_WQE;
> @@ -567,13 +592,13 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
>  		    IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
>  			dev_err(dev, "init_attr->create_flags error!\n");
>  			ret = -EINVAL;
> -			goto err_out;
> +			goto err_rq_sge_list;
>  		}
>
>  		if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) {
>  			dev_err(dev, "init_attr->create_flags error!\n");
>  			ret = -EINVAL;
> -			goto err_out;
> +			goto err_rq_sge_list;
>  		}
>
>  		/* Set SQ size */
> @@ -581,7 +606,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
>  						  hr_qp);
>  		if (ret) {
>  			dev_err(dev, "hns_roce_set_kernel_sq_size error!\n");
> -			goto err_out;
> +			goto err_rq_sge_list;
>  		}
>
>  		/* QP doorbell register address */
> @@ -597,7 +622,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
>  				       &hr_qp->hr_buf, page_shift)) {
>  			dev_err(dev, "hns_roce_buf_alloc error!\n");
>  			ret = -ENOMEM;
> -			goto err_out;
> +			goto err_rq_sge_list;
>  		}
>
>  		hr_qp->mtt.mtt_type = MTT_TYPE_WQE;
> @@ -679,6 +704,19 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
>  	else
>  		hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
>
> +err_rq_sge_list:
> +	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE)
> +		if (hr_qp->rq_inl_buf.wqe_list) {
> +			kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
> +			hr_qp->rq_inl_buf.wqe_list[0].sg_list = NULL;

IMHO, in general case, it is preferable to avoid setting NULL after kfree.

> +		}
> +
> +err_wqe_list:
> +	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
> +		kfree(hr_qp->rq_inl_buf.wqe_list);
> +		hr_qp->rq_inl_buf.wqe_list = NULL;
> +	}
> +
>  err_out:
>  	return ret;
>  }
> --
> 1.9.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

Attachment: signature.asc
Description: PGP signature


[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux