Re: [PATCH for-next 1/6] RDMA/hns: Add rq inline data support for hip08 RoCE

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



在 2017/12/25 17:34, Leon Romanovsky 写道:
> On Sat, Dec 23, 2017 at 04:22:17PM +0800, Lijun Ou wrote:
>> This patch mainly implement rq inline data feature for hip08
>> RoCE in kernel mode.
>>
>> Signed-off-by: Lijun Ou <oulijun@xxxxxxxxxx>
>> ---
>>  drivers/infiniband/hw/hns/hns_roce_device.h | 18 +++++++++
>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 63 ++++++++++++++++++++++++++++-
>>  drivers/infiniband/hw/hns/hns_roce_qp.c     | 52 ++++++++++++++++++++----
>>  3 files changed, 125 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
>> index dcfd209..8d123d3 100644
>> --- a/drivers/infiniband/hw/hns/hns_roce_device.h
>> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h
>> @@ -178,6 +178,7 @@ enum {
>>  enum {
>>  	HNS_ROCE_CAP_FLAG_REREG_MR		= BIT(0),
>>  	HNS_ROCE_CAP_FLAG_ROCE_V1_V2		= BIT(1),
>> +	HNS_ROCE_CAP_FLAG_RQ_INLINE		= BIT(2)
>>  };
>>
>>  enum hns_roce_mtt_type {
>> @@ -446,6 +447,21 @@ struct hns_roce_cmd_mailbox {
>>
>>  struct hns_roce_dev;
>>
>> +struct hns_roce_rinl_sge {
>> +	void			*addr;
>> +	u32			len;
>> +};
>> +
>> +struct hns_roce_rinl_wqe {
>> +	struct hns_roce_rinl_sge *sg_list;
>> +	u32			 sge_cnt;
>> +};
>> +
>> +struct hns_roce_rinl_buf {
>> +	struct hns_roce_rinl_wqe *wqe_list;
>> +	u32			 wqe_cnt;
>> +};
>> +
>>  struct hns_roce_qp {
>>  	struct ib_qp		ibqp;
>>  	struct hns_roce_buf	hr_buf;
>> @@ -477,6 +493,8 @@ struct hns_roce_qp {
>>
>>  	struct hns_roce_sge	sge;
>>  	u32			next_sge;
>> +
>> +	struct hns_roce_rinl_buf rq_inl_buf;
>>  };
>>
>>  struct hns_roce_sqp {
>> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> index 4d3e976..b17dcfa 100644
>> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> @@ -299,6 +299,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
>>  	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
>>  	struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
>>  	struct hns_roce_v2_wqe_data_seg *dseg;
>> +	struct hns_roce_rinl_sge *sge_list;
>>  	struct device *dev = hr_dev->dev;
>>  	struct hns_roce_v2_db rq_db;
>>  	unsigned long flags;
>> @@ -347,6 +348,14 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
>>  			dseg[i].addr = 0;
>>  		}
>>
>> +		/* rq support inline data */
>> +		sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list;
>> +		hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt = (u32)wr->num_sge;
>> +		for (i = 0; i < wr->num_sge; i++) {
>> +			sge_list[i].addr = (void *)(u64)wr->sg_list[i].addr;
>> +			sge_list[i].len = wr->sg_list[i].length;
>> +		}
>> +
>>  		hr_qp->rq.wrid[ind] = wr->wr_id;
>>
>>  		ind = (ind + 1) & (hr_qp->rq.wqe_cnt - 1);
>> @@ -961,7 +970,8 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
>>  	caps->chunk_sz		= HNS_ROCE_V2_TABLE_CHUNK_SIZE;
>>
>>  	caps->flags		= HNS_ROCE_CAP_FLAG_REREG_MR |
>> -				  HNS_ROCE_CAP_FLAG_ROCE_V1_V2;
>> +				  HNS_ROCE_CAP_FLAG_ROCE_V1_V2 |
>> +				  HNS_ROCE_CAP_FLAG_RQ_INLINE;
>>  	caps->pkey_table_len[0] = 1;
>>  	caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
>>  	caps->ceqe_depth	= HNS_ROCE_V2_COMP_EQE_NUM;
>> @@ -1476,6 +1486,7 @@ static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq,
>>  static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
>>  				struct hns_roce_qp **cur_qp, struct ib_wc *wc)
>>  {
>> +	struct hns_roce_rinl_sge *sge_list;
>>  	struct hns_roce_dev *hr_dev;
>>  	struct hns_roce_v2_cqe *cqe;
>>  	struct hns_roce_qp *hr_qp;
>> @@ -1673,6 +1684,46 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
>>  			break;
>>  		}
>>
>> +		if ((wc->qp->qp_type == IB_QPT_RC ||
>> +		     wc->qp->qp_type == IB_QPT_UC) &&
>> +		    (opcode == HNS_ROCE_V2_OPCODE_SEND ||
>> +		    opcode == HNS_ROCE_V2_OPCODE_SEND_WITH_IMM ||
>> +		    opcode == HNS_ROCE_V2_OPCODE_SEND_WITH_INV) &&
>> +		    (roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_RQ_INLINE_S))) {
> 
> It is better to put in separate function or inline function if you very worried about performance issues.
> 
Yes, I will refactor it as function.
>> +			u32 wr_num, wr_cnt, sge_num, sge_cnt;
>> +			u32 data_len, size;
>> +			u8 *wqe_buf;
>> +
>> +			wr_num = (u16)roce_get_field(cqe->byte_4,
>> +					V2_CQE_BYTE_4_WQE_INDX_M,
>> +					V2_CQE_BYTE_4_WQE_INDX_S) & 0xffff;
>> +			wr_cnt = wr_num & ((*cur_qp)->rq.wqe_cnt - 1);
>> +
>> +			sge_list =
>> +				(*cur_qp)->rq_inl_buf.wqe_list[wr_cnt].sg_list;
>> +			sge_num =
>> +				(*cur_qp)->rq_inl_buf.wqe_list[wr_cnt].sge_cnt;
> 
> Such level of indirection means that it is time to introduce temp variables.
> 
Maybe, it is ok after refactoring it.
>> +			wqe_buf = (u8 *)get_recv_wqe(*cur_qp, wr_cnt);
>> +			data_len = wc->byte_len;
>> +
>> +			for (sge_cnt = 0; (sge_cnt < sge_num) && (data_len);
>> +			     sge_cnt++) {
>> +				size = sge_list[sge_cnt].len < data_len ?
>> +				       sge_list[sge_cnt].len : data_len;
> 
> Use min(a, b) macro available in kernel headers.
yes, thanks.
> 
>> +
>> +				memcpy((void *)sge_list[sge_cnt].addr,
>> +					(void *)wqe_buf, size);
>> +
>> +				data_len -= size;
>> +				wqe_buf += size;
>> +			}
>> +
>> +			if (data_len) {
>> +				wc->status = IB_WC_LOC_LEN_ERR;
>> +				return -EAGAIN;
>> +			}
>> +		}
> 
> To be honest the whole new chunk is very strange, it has casting in
> almost every line and looks very suspicious. Can you clean the
> variables/function return values to avoid castings?
> 
>> +
>>  		/* Update tail pointer, record wr_id */
>>  		wq = &(*cur_qp)->rq;
>>  		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
>> @@ -1972,6 +2023,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
>>  		     !!(attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC));
>>  	roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0);
>>
>> +	roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 1);
>>  	roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 0);
>>
>>  	roce_set_field(context->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M,
>> @@ -3114,6 +3166,15 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
>>  		hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
>>  	}
>>
>> +	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
>> +		if (hr_qp->rq_inl_buf.wqe_list) {
>> +			kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
>> +			kfree(hr_qp->rq_inl_buf.wqe_list);
>> +			hr_qp->rq_inl_buf.wqe_list[0].sg_list = NULL;
> 
> Did it work? You called to kfree the hr_qp->rq_inl_buf.wqe_list a line above?
> 
>> +			hr_qp->rq_inl_buf.wqe_list = NULL;
>> +		}
>> +	}
>> +
>>  	return 0;
>>  }
>>
>> diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
>> index 69e2584..7c75f21b 100644
>> --- a/drivers/infiniband/hw/hns/hns_roce_qp.c
>> +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
>> @@ -494,6 +494,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
>>  	int ret = 0;
>>  	u32 page_shift;
>>  	u32 npages;
>> +	int i;
>>
>>  	mutex_init(&hr_qp->mutex);
>>  	spin_lock_init(&hr_qp->sq.lock);
>> @@ -513,18 +514,42 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
>>  		goto err_out;
>>  	}
>>
>> +	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
>> +		/* allocate recv inline buf */
>> +		hr_qp->rq_inl_buf.wqe_list = kmalloc_array(hr_qp->rq.wqe_cnt,
>> +					       sizeof(struct hns_roce_rinl_wqe),
>> +					       GFP_KERNEL);
>> +		if (!hr_qp->rq_inl_buf.wqe_list)
>> +			goto err_out;
>> +
>> +		hr_qp->rq_inl_buf.wqe_cnt = hr_qp->rq.wqe_cnt;
>> +
>> +		hr_qp->rq_inl_buf.wqe_list[0].sg_list =
>> +				kmalloc_array(hr_qp->rq_inl_buf.wqe_cnt *
>> +					      init_attr->cap.max_recv_sge,
>> +					      sizeof(struct hns_roce_rinl_sge),
>> +					      GFP_KERNEL);
>> +		if (!hr_qp->rq_inl_buf.wqe_list[0].sg_list)
>> +			goto err_wqe_list;
>> +
>> +		for (i = 1; i < hr_qp->rq_inl_buf.wqe_cnt; i++)
>> +			hr_qp->rq_inl_buf.wqe_list[i].sg_list =
>> +				&hr_qp->rq_inl_buf.wqe_list[0].sg_list[i *
>> +				init_attr->cap.max_recv_sge];
>> +	}
> 
> I'm not an expert in this area of code, but the call to kmalloc_array on
> the first field of already kmalloc_array created pointer looks strange.
> 
> Don't know what you can do with that comment, maybe other reviewers can suggest.
> 
>> +
>>  	if (ib_pd->uobject) {
>>  		if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
>>  			dev_err(dev, "ib_copy_from_udata error for create qp\n");
>>  			ret = -EFAULT;
>> -			goto err_out;
>> +			goto err_rq_sge_list;
>>  		}
>>
>>  		ret = hns_roce_set_user_sq_size(hr_dev, &init_attr->cap, hr_qp,
>>  						&ucmd);
>>  		if (ret) {
>>  			dev_err(dev, "hns_roce_set_user_sq_size error for create qp\n");
>> -			goto err_out;
>> +			goto err_rq_sge_list;
>>  		}
>>
>>  		hr_qp->umem = ib_umem_get(ib_pd->uobject->context,
>> @@ -533,7 +558,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
>>  		if (IS_ERR(hr_qp->umem)) {
>>  			dev_err(dev, "ib_umem_get error for create qp\n");
>>  			ret = PTR_ERR(hr_qp->umem);
>> -			goto err_out;
>> +			goto err_rq_sge_list;
>>  		}
>>
>>  		hr_qp->mtt.mtt_type = MTT_TYPE_WQE;
>> @@ -567,13 +592,13 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
>>  		    IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
>>  			dev_err(dev, "init_attr->create_flags error!\n");
>>  			ret = -EINVAL;
>> -			goto err_out;
>> +			goto err_rq_sge_list;
>>  		}
>>
>>  		if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) {
>>  			dev_err(dev, "init_attr->create_flags error!\n");
>>  			ret = -EINVAL;
>> -			goto err_out;
>> +			goto err_rq_sge_list;
>>  		}
>>
>>  		/* Set SQ size */
>> @@ -581,7 +606,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
>>  						  hr_qp);
>>  		if (ret) {
>>  			dev_err(dev, "hns_roce_set_kernel_sq_size error!\n");
>> -			goto err_out;
>> +			goto err_rq_sge_list;
>>  		}
>>
>>  		/* QP doorbell register address */
>> @@ -597,7 +622,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
>>  				       &hr_qp->hr_buf, page_shift)) {
>>  			dev_err(dev, "hns_roce_buf_alloc error!\n");
>>  			ret = -ENOMEM;
>> -			goto err_out;
>> +			goto err_rq_sge_list;
>>  		}
>>
>>  		hr_qp->mtt.mtt_type = MTT_TYPE_WQE;
>> @@ -679,6 +704,19 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
>>  	else
>>  		hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
>>
>> +err_rq_sge_list:
>> +	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE)
>> +		if (hr_qp->rq_inl_buf.wqe_list) {
>> +			kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
>> +			hr_qp->rq_inl_buf.wqe_list[0].sg_list = NULL;
> 
> IMHO, in general case, it is preferable to avoid setting NULL after kfree.
> 
>> +		}
>> +
>> +err_wqe_list:
>> +	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
>> +		kfree(hr_qp->rq_inl_buf.wqe_list);
>> +		hr_qp->rq_inl_buf.wqe_list = NULL;
>> +	}
>> +
>>  err_out:
>>  	return ret;
>>  }
>> --
>> 1.9.1
>>
Thanks your reviewing. I will send the next patch for fix them.
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
>> the body of a message to majordomo@xxxxxxxxxxxxxxx
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux