From: Lang Cheng <chenglang@xxxxxxxxxx> If the hardware is resetting, the driver should not perform the mailbox operation.Function-clear needs to add relevant judgment. Signed-off-by: Lang Cheng <chenglang@xxxxxxxxxx> --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 98 +++++++++++++++++++++++++++++- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 2 + 2 files changed, 98 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 7730983..ecd0283 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1125,26 +1125,118 @@ static int hns_roce_cmq_query_hw_info(struct hns_roce_dev *hr_dev) return 0; } +static bool hns_roce_func_clr_chk_rst(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hnae3_handle *handle = priv->handle; + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + unsigned long reset_cnt; + bool sw_resetting; + bool hw_resetting; + + reset_cnt = ops->ae_dev_reset_cnt(handle); + hw_resetting = ops->get_hw_reset_stat(handle); + sw_resetting = ops->ae_dev_resetting(handle); + + if (reset_cnt != hr_dev->reset_cnt || hw_resetting || sw_resetting) + return true; + + return false; +} + +static void hns_roce_func_clr_rst_prc(struct hns_roce_dev *hr_dev, int retval, + int flag) +{ + struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hnae3_handle *handle = priv->handle; + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + unsigned long instance_stage; + unsigned long reset_cnt; + unsigned long end; + bool sw_resetting; + bool hw_resetting; + + instance_stage = handle->rinfo.instance_state; + reset_cnt = ops->ae_dev_reset_cnt(handle); + hw_resetting = ops->get_hw_reset_stat(handle); + sw_resetting = ops->ae_dev_resetting(handle); + + if (reset_cnt != hr_dev->reset_cnt) { + hr_dev->dis_db = true; + hr_dev->is_reset = true; + dev_info(hr_dev->dev, "Func clear success after reset.\n"); + } else if (hw_resetting) { + hr_dev->dis_db = true; + + dev_warn(hr_dev->dev, + "Func clear is pending, device in resetting state.\n"); + end = HNS_ROCE_V2_HW_RST_TIMEOUT; + while (end) { + if (!ops->get_hw_reset_stat(handle)) { + hr_dev->is_reset = true; + dev_info(hr_dev->dev, + "Func clear success after reset.\n"); + return; + } + msleep(HNS_ROCE_V2_HW_RST_COMPLETION_WAIT); + end -= HNS_ROCE_V2_HW_RST_COMPLETION_WAIT; + } + + dev_warn(hr_dev->dev, "Func clear failed.\n"); + } else if (sw_resetting && instance_stage == HNS_ROCE_STATE_INIT) { + hr_dev->dis_db = true; + + dev_warn(hr_dev->dev, + "Func clear is pending, device in resetting state.\n"); + end = HNS_ROCE_V2_HW_RST_TIMEOUT; + while (end) { + if (ops->ae_dev_reset_cnt(handle) != + hr_dev->reset_cnt) { + hr_dev->is_reset = true; + dev_info(hr_dev->dev, + "Func clear success after sw reset\n"); + return; + } + msleep(HNS_ROCE_V2_HW_RST_COMPLETION_WAIT); + end -= HNS_ROCE_V2_HW_RST_COMPLETION_WAIT; + } + + dev_warn(hr_dev->dev, "Func clear failed because of unfinished sw reset\n"); + } else { + if (retval && !flag) + dev_warn(hr_dev->dev, + "Func clear read failed, ret = %d.\n", retval); + + dev_warn(hr_dev->dev, "Func clear failed.\n"); + } +} static void hns_roce_function_clear(struct hns_roce_dev *hr_dev) { + bool fclr_write_fail_flag = false; struct hns_roce_func_clear *resp; struct hns_roce_cmq_desc desc; unsigned long end; - int ret; + int ret = 0; + + if (hns_roce_func_clr_chk_rst(hr_dev)) + goto out; hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_FUNC_CLEAR, false); resp = (struct hns_roce_func_clear *)desc.data; ret = hns_roce_cmq_send(hr_dev, &desc, 1); if (ret) { + fclr_write_fail_flag = true; dev_err(hr_dev->dev, "Func clear write failed, ret = %d.\n", ret); - return; + goto out; } msleep(HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_INTERVAL); end = HNS_ROCE_V2_FUNC_CLEAR_TIMEOUT_MSECS; while (end) { + if (hns_roce_func_clr_chk_rst(hr_dev)) + goto out; msleep(HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT); end -= HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT; @@ -1161,7 +1253,9 @@ static void hns_roce_function_clear(struct hns_roce_dev *hr_dev) } } +out: dev_err(hr_dev->dev, "Func clear fail.\n"); + hns_roce_func_clr_rst_prc(hr_dev, ret, fclr_write_fail_flag); } static int hns_roce_query_fw_ver(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 1301629..43219d2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -99,6 +99,8 @@ #define HNS_ROCE_V2_HW_RST_TIMEOUT 1000 #define HNS_ROCE_V2_HW_RST_UNINT_DELAY 100 +#define HNS_ROCE_V2_HW_RST_COMPLETION_WAIT 20 + #define HNS_ROCE_CONTEXT_HOP_NUM 1 #define HNS_ROCE_SCCC_HOP_NUM 1 #define HNS_ROCE_MTT_HOP_NUM 1 -- 2.8.1