On Thu, Jan 23, 2025 at 09:29:30AM +0800, Junxian Huang wrote: > If a QP is modified to error state and a flush CQE process is triggered, > the subsequent QP destruction mbox can still be successfully posted but > will be blocked in HW until the flush CQE process finishes. This causes > further mbox posting timeouts in driver. The blocking time is related > to QP depth. Considering an extreme case where SQ depth and RQ depth > are both 32K, the blocking time can reach about 135ms. > > This patch adds a retry mechanism for mbox posting. For each try, FW > waits 15ms for HW to complete the previous mbox, otherwise return a > timeout error code to driver. Counting other time consumption in FW, > set 8 tries for mbox posting and a 5ms time gap before each retry to > increase to a sufficient timeout limit. > > Fixes: 0425e3e6e0c7 ("RDMA/hns: Support flush cqe for hip08 in kernel space") > Signed-off-by: Junxian Huang <huangjunxian6@xxxxxxxxxxxxx> > --- > drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 93 ++++++++++++++++------ > drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 6 +- > 2 files changed, 74 insertions(+), 25 deletions(-) > > diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > index 5c911d1def03..512866324f59 100644 > --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > @@ -1268,24 +1268,27 @@ static int hns_roce_cmd_err_convert_errno(u16 desc_ret) > return -EIO; > } > > -static u32 hns_roce_cmdq_tx_timeout(u16 opcode, u32 tx_timeout) > +static void hns_roce_get_cmdq_param(u16 opcode, u32 *tx_timeout, u8 *try_cnt, > + u8 *retry_gap_msec) > { > - static const struct hns_roce_cmdq_tx_timeout_map cmdq_tx_timeout[] = { > - {HNS_ROCE_OPC_POST_MB, HNS_ROCE_OPC_POST_MB_TIMEOUT}, > + static const struct hns_roce_cmdq_param_map param[] = { > + {HNS_ROCE_OPC_POST_MB, HNS_ROCE_OPC_POST_MB_TIMEOUT, > + HNS_ROCE_OPC_POST_MB_TRY_CNT, > + HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC}, > }; > int i; > > - for (i = 0; i < ARRAY_SIZE(cmdq_tx_timeout); i++) > - if (cmdq_tx_timeout[i].opcode == opcode) > - return cmdq_tx_timeout[i].tx_timeout; > - > - return tx_timeout; > + for (i = 0; i < ARRAY_SIZE(param); i++) > + if (param[i].opcode == opcode) { > + *tx_timeout = param[i].tx_timeout; > + *try_cnt = param[i].try_cnt; > + *retry_gap_msec = param[i].retry_gap_msec; > + return; > + } The param is one size array with constant values. There is no need in any loop and pointer complexity here. > } > > -static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u16 opcode) > +static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u32 tx_timeout) > { > - struct hns_roce_v2_priv *priv = hr_dev->priv; > - u32 tx_timeout = hns_roce_cmdq_tx_timeout(opcode, priv->cmq.tx_timeout); > u32 timeout = 0; > > do { > @@ -1295,8 +1298,9 @@ static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u16 opcode) > } while (++timeout < tx_timeout); > } > > -static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, > - struct hns_roce_cmq_desc *desc, int num) > +static int __hns_roce_cmq_send_one(struct hns_roce_dev *hr_dev, > + struct hns_roce_cmq_desc *desc, > + int num, u32 tx_timeout) > { > struct hns_roce_v2_priv *priv = hr_dev->priv; > struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; > @@ -1305,8 +1309,6 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, > int ret; > int i; > > - spin_lock_bh(&csq->lock); > - > tail = csq->head; > > for (i = 0; i < num; i++) { > @@ -1320,22 +1322,17 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, > > atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_CNT]); > > - hns_roce_wait_csq_done(hr_dev, le16_to_cpu(desc->opcode)); > + hns_roce_wait_csq_done(hr_dev, tx_timeout); > if (hns_roce_cmq_csq_done(hr_dev)) { > ret = 0; > for (i = 0; i < num; i++) { > /* check the result of hardware write back */ > - desc[i] = csq->desc[tail++]; > + desc_ret = le16_to_cpu(csq->desc[tail++].retval); > if (tail == csq->desc_num) > tail = 0; > - > - desc_ret = le16_to_cpu(desc[i].retval); > if (likely(desc_ret == CMD_EXEC_SUCCESS)) > continue; > > - dev_err_ratelimited(hr_dev->dev, > - "Cmdq IO error, opcode = 0x%x, return = 0x%x.\n", > - desc->opcode, desc_ret); > ret = hns_roce_cmd_err_convert_errno(desc_ret); > } > } else { > @@ -1350,14 +1347,62 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, > ret = -EAGAIN; > } > > - spin_unlock_bh(&csq->lock); > - > if (ret) > atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_ERR_CNT]); > > return ret; > } > > +static bool check_cmq_retry(u16 opcode, int ret) > +{ > + return opcode == HNS_ROCE_OPC_POST_MB && ret == -ETIME; > +} > + > +static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, > + struct hns_roce_cmq_desc *desc, int num) > +{ > + struct hns_roce_v2_priv *priv = hr_dev->priv; > + struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; > + u16 opcode = le16_to_cpu(desc->opcode); > + u32 tx_timeout = priv->cmq.tx_timeout; > + u8 retry_gap_msec = 0; > + u8 try_cnt = 1; > + u32 rsv_tail; > + int ret; > + int i; > + > + hns_roce_get_cmdq_param(opcode, &tx_timeout, > + &try_cnt, &retry_gap_msec); You are using constant values, please simplify this patch. Thanks > + > + while (try_cnt) { > + try_cnt--; > + > + spin_lock_bh(&csq->lock); > + rsv_tail = csq->head; > + ret = __hns_roce_cmq_send_one(hr_dev, desc, num, tx_timeout); > + if (check_cmq_retry(opcode, ret) && try_cnt) { > + spin_unlock_bh(&csq->lock); > + mdelay(retry_gap_msec); > + continue; > + } > + > + for (i = 0; i < num; i++) { > + desc[i] = csq->desc[rsv_tail++]; > + if (rsv_tail == csq->desc_num) > + rsv_tail = 0; > + } > + spin_unlock_bh(&csq->lock); > + break; > + } > + > + if (ret) > + dev_err_ratelimited(hr_dev->dev, > + "Cmdq IO error, opcode = 0x%x, return = %d.\n", > + opcode, ret); > + > + return ret; > +} > + > static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, > struct hns_roce_cmq_desc *desc, int num) > { > diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h > index cbdbc9edbce6..2e91babf333c 100644 > --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h > +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h > @@ -230,9 +230,13 @@ enum hns_roce_opcode_type { > }; > > #define HNS_ROCE_OPC_POST_MB_TIMEOUT 35000 > -struct hns_roce_cmdq_tx_timeout_map { > +#define HNS_ROCE_OPC_POST_MB_TRY_CNT 8 > +#define HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC 5 > +struct hns_roce_cmdq_param_map { > u16 opcode; > u32 tx_timeout; > + u8 try_cnt; > + u8 retry_gap_msec; > }; > > enum { > -- > 2.33.0 > >