Re: [PATCH for-next] RDMA/hns: Fix mbox timing out by adding retry mechanism

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Jan 23, 2025 at 09:29:30AM +0800, Junxian Huang wrote:
> If a QP is modified to error state and a flush CQE process is triggered,
> the subsequent QP destruction mbox can still be successfully posted but
> will be blocked in HW until the flush CQE process finishes. This causes
> further mbox posting timeouts in driver. The blocking time is related
> to QP depth. Considering an extreme case where SQ depth and RQ depth
> are both 32K, the blocking time can reach about 135ms.
> 
> This patch adds a retry mechanism for mbox posting. For each try, FW
> waits 15ms for HW to complete the previous mbox, otherwise return a
> timeout error code to driver. Counting other time consumption in FW,
> set 8 tries for mbox posting and a 5ms time gap before each retry to
> increase to a sufficient timeout limit.
> 
> Fixes: 0425e3e6e0c7 ("RDMA/hns: Support flush cqe for hip08 in kernel space")
> Signed-off-by: Junxian Huang <huangjunxian6@xxxxxxxxxxxxx>
> ---
>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 93 ++++++++++++++++------
>  drivers/infiniband/hw/hns/hns_roce_hw_v2.h |  6 +-
>  2 files changed, 74 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> index 5c911d1def03..512866324f59 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> @@ -1268,24 +1268,27 @@ static int hns_roce_cmd_err_convert_errno(u16 desc_ret)
>  	return -EIO;
>  }
>  
> -static u32 hns_roce_cmdq_tx_timeout(u16 opcode, u32 tx_timeout)
> +static void hns_roce_get_cmdq_param(u16 opcode, u32 *tx_timeout, u8 *try_cnt,
> +				    u8 *retry_gap_msec)
>  {
> -	static const struct hns_roce_cmdq_tx_timeout_map cmdq_tx_timeout[] = {
> -		{HNS_ROCE_OPC_POST_MB, HNS_ROCE_OPC_POST_MB_TIMEOUT},
> +	static const struct hns_roce_cmdq_param_map param[] = {
> +		{HNS_ROCE_OPC_POST_MB, HNS_ROCE_OPC_POST_MB_TIMEOUT,
> +		 HNS_ROCE_OPC_POST_MB_TRY_CNT,
> +		 HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC},
>  	};
>  	int i;
>  
> -	for (i = 0; i < ARRAY_SIZE(cmdq_tx_timeout); i++)
> -		if (cmdq_tx_timeout[i].opcode == opcode)
> -			return cmdq_tx_timeout[i].tx_timeout;
> -
> -	return tx_timeout;
> +	for (i = 0; i < ARRAY_SIZE(param); i++)
> +		if (param[i].opcode == opcode) {
> +			*tx_timeout = param[i].tx_timeout;
> +			*try_cnt = param[i].try_cnt;
> +			*retry_gap_msec = param[i].retry_gap_msec;
> +			return;
> +		}

The param is one size array with constant values. There is no need in
any loop and pointer complexity here.

>  }
>  
> -static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u16 opcode)
> +static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u32 tx_timeout)
>  {
> -	struct hns_roce_v2_priv *priv = hr_dev->priv;
> -	u32 tx_timeout = hns_roce_cmdq_tx_timeout(opcode, priv->cmq.tx_timeout);
>  	u32 timeout = 0;
>  
>  	do {
> @@ -1295,8 +1298,9 @@ static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u16 opcode)
>  	} while (++timeout < tx_timeout);
>  }
>  
> -static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
> -			       struct hns_roce_cmq_desc *desc, int num)
> +static int __hns_roce_cmq_send_one(struct hns_roce_dev *hr_dev,
> +				   struct hns_roce_cmq_desc *desc,
> +				   int num, u32 tx_timeout)
>  {
>  	struct hns_roce_v2_priv *priv = hr_dev->priv;
>  	struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq;
> @@ -1305,8 +1309,6 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
>  	int ret;
>  	int i;
>  
> -	spin_lock_bh(&csq->lock);
> -
>  	tail = csq->head;
>  
>  	for (i = 0; i < num; i++) {
> @@ -1320,22 +1322,17 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
>  
>  	atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_CNT]);
>  
> -	hns_roce_wait_csq_done(hr_dev, le16_to_cpu(desc->opcode));
> +	hns_roce_wait_csq_done(hr_dev, tx_timeout);
>  	if (hns_roce_cmq_csq_done(hr_dev)) {
>  		ret = 0;
>  		for (i = 0; i < num; i++) {
>  			/* check the result of hardware write back */
> -			desc[i] = csq->desc[tail++];
> +			desc_ret = le16_to_cpu(csq->desc[tail++].retval);
>  			if (tail == csq->desc_num)
>  				tail = 0;
> -
> -			desc_ret = le16_to_cpu(desc[i].retval);
>  			if (likely(desc_ret == CMD_EXEC_SUCCESS))
>  				continue;
>  
> -			dev_err_ratelimited(hr_dev->dev,
> -					    "Cmdq IO error, opcode = 0x%x, return = 0x%x.\n",
> -					    desc->opcode, desc_ret);
>  			ret = hns_roce_cmd_err_convert_errno(desc_ret);
>  		}
>  	} else {
> @@ -1350,14 +1347,62 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
>  		ret = -EAGAIN;
>  	}
>  
> -	spin_unlock_bh(&csq->lock);
> -
>  	if (ret)
>  		atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_ERR_CNT]);
>  
>  	return ret;
>  }
>  
> +static bool check_cmq_retry(u16 opcode, int ret)
> +{
> +	return opcode == HNS_ROCE_OPC_POST_MB && ret == -ETIME;
> +}
> +
> +static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
> +			       struct hns_roce_cmq_desc *desc, int num)
> +{
> +	struct hns_roce_v2_priv *priv = hr_dev->priv;
> +	struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq;
> +	u16 opcode = le16_to_cpu(desc->opcode);
> +	u32 tx_timeout = priv->cmq.tx_timeout;
> +	u8 retry_gap_msec = 0;
> +	u8 try_cnt = 1;
> +	u32 rsv_tail;
> +	int ret;
> +	int i;
> +
> +	hns_roce_get_cmdq_param(opcode, &tx_timeout,
> +				&try_cnt, &retry_gap_msec);

You are using constant values, please simplify this patch.

Thanks

> +
> +	while (try_cnt) {
> +		try_cnt--;
> +
> +		spin_lock_bh(&csq->lock);
> +		rsv_tail = csq->head;
> +		ret = __hns_roce_cmq_send_one(hr_dev, desc, num, tx_timeout);
> +		if (check_cmq_retry(opcode, ret) && try_cnt) {
> +			spin_unlock_bh(&csq->lock);
> +			mdelay(retry_gap_msec);
> +			continue;
> +		}
> +
> +		for (i = 0; i < num; i++) {
> +			desc[i] = csq->desc[rsv_tail++];
> +			if (rsv_tail == csq->desc_num)
> +				rsv_tail = 0;
> +		}
> +		spin_unlock_bh(&csq->lock);
> +		break;
> +	}
> +
> +	if (ret)
> +		dev_err_ratelimited(hr_dev->dev,
> +				    "Cmdq IO error, opcode = 0x%x, return = %d.\n",
> +				    opcode, ret);
> +
> +	return ret;
> +}
> +
>  static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
>  			     struct hns_roce_cmq_desc *desc, int num)
>  {
> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
> index cbdbc9edbce6..2e91babf333c 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
> @@ -230,9 +230,13 @@ enum hns_roce_opcode_type {
>  };
>  
>  #define HNS_ROCE_OPC_POST_MB_TIMEOUT 35000
> -struct hns_roce_cmdq_tx_timeout_map {
> +#define HNS_ROCE_OPC_POST_MB_TRY_CNT 8
> +#define HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC 5
> +struct hns_roce_cmdq_param_map {
>  	u16 opcode;
>  	u32 tx_timeout;
> +	u8 try_cnt;
> +	u8 retry_gap_msec;
>  };
>  
>  enum {
> -- 
> 2.33.0
> 
> 




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux