On Mon, Jul 08, 2024 at 03:46:26PM +0800, Junxian Huang wrote: > > > On 2024/7/8 15:33, Leon Romanovsky wrote: > > On Mon, Jul 08, 2024 at 02:50:50PM +0800, Junxian Huang wrote: > >> > >> > >> On 2024/7/8 13:38, Leon Romanovsky wrote: > >>> On Mon, Jul 08, 2024 at 10:29:54AM +0800, Junxian Huang wrote: > >>>> > >>>> > >>>> On 2024/7/7 16:30, Leon Romanovsky wrote: > >>>>> On Fri, Jul 05, 2024 at 04:59:30PM +0800, Junxian Huang wrote: > >>>>>> From: wenglianfa <wenglianfa@xxxxxxxxxx> > >>>>>> > >>>>>> During reset, cmdq events won't be reported, leading to a long and > >>>>>> unnecessary wait. Notify all the cmdqs to stop waiting at the beginning > >>>>>> of reset. > >>>>>> > >>>>>> Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver") > >>>>>> Signed-off-by: wenglianfa <wenglianfa@xxxxxxxxxx> > >>>>>> Signed-off-by: Junxian Huang <huangjunxian6@xxxxxxxxxxxxx> > >>>>>> --- > >>>>>> drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 18 ++++++++++++++++++ > >>>>>> 1 file changed, 18 insertions(+) > >>>>>> > >>>>>> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > >>>>>> index a5d746a5cc68..ff135df1a761 100644 > >>>>>> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > >>>>>> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c > >>>>>> @@ -6977,6 +6977,21 @@ static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, > >>>>>> > >>>>>> handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT; > >>>>>> } > >>>>>> + > >>>>>> +static void hns_roce_v2_reset_notify_cmd(struct hns_roce_dev *hr_dev) > >>>>>> +{ > >>>>>> + struct hns_roce_cmdq *hr_cmd = &hr_dev->cmd; > >>>>>> + int i; > >>>>>> + > >>>>>> + if (!hr_dev->cmd_mod) > >>>>> > >>>>> What prevents cmd_mod from being changed? > >>>>> > >>>> > >>>> It's set when the device is being initialized, and won't be changed after that. > >>> > >>> This is exactly the point, you are assuming that the device is already > >>> ininitialized or not initialized at all. What prevents hns_roce_v2_reset_notify_cmd() > >>> from being called in the middle of initialization? > >>> > >>> Thanks > >>> > >> > >> This is ensured by hns3 NIC driver. > >> > >> Initialization and reset of hns RoCE are both called by hns3. It will check the state > >> of RoCE device (see line 3798), and notify RoCE device to reset (hns_roce_v2_reset_notify_cmd() > >> is called) only if the RoCE device has been already initialized: > > > > So why do you have "if (!hr_dev->cmd_mod)" check in the code? > > > > Thanks > > > > cmd_mod indicates the mode of cmdq (0: poll mode, 1: event mode). > This patch only affects event mode because HW won't report events during reset. You set cmd_mod to 1 in hns_roce_hw_v2_init_instance() without any condition, I don't see when hns v2 IB device is created and continue to operate in polling mode. Thanks > > Junxian > > >> > >> 3791 static int hclge_notify_roce_client(struct hclge_dev *hdev, > >> 3792 enum hnae3_reset_notify_type type) > >> 3793 { > >> 3794 struct hnae3_handle *handle = &hdev->vport[0].roce; > >> 3795 struct hnae3_client *client = hdev->roce_client; > >> 3796 int ret; > >> 3797 > >> 3798 if (!test_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state) || !client) > >> 3799 return 0; > >> 3800 > >> 3801 if (!client->ops->reset_notify) > >> 3802 return -EOPNOTSUPP; > >> 3803 > >> 3804 ret = client->ops->reset_notify(handle, type); > >> 3805 if (ret) > >> 3806 dev_err(&hdev->pdev->dev, "notify roce client failed %d(%d)", > >> 3807 type, ret); > >> 3808 > >> 3809 return ret; > >> 3810 } > >> > >> And the bit is set (see line 11246) after the initialization has been done (line 11242): > >> > >> 11224 static int hclge_init_roce_client_instance(struct hnae3_ae_dev *ae_dev, > >> 11225 struct hclge_vport *vport) > >> 11226 { > >> 11227 struct hclge_dev *hdev = ae_dev->priv; > >> 11228 struct hnae3_client *client; > >> 11229 int rst_cnt; > >> 11230 int ret; > >> 11231 > >> 11232 if (!hnae3_dev_roce_supported(hdev) || !hdev->roce_client || > >> 11233 !hdev->nic_client) > >> 11234 return 0; > >> 11235 > >> 11236 client = hdev->roce_client; > >> 11237 ret = hclge_init_roce_base_info(vport); > >> 11238 if (ret) > >> 11239 return ret; > >> 11240 > >> 11241 rst_cnt = hdev->rst_stats.reset_cnt; > >> 11242 ret = client->ops->init_instance(&vport->roce); > >> 11243 if (ret) > >> 11244 return ret; > >> 11245 > >> 11246 set_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state); > >> 11247 if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) || > >> 11248 rst_cnt != hdev->rst_stats.reset_cnt) { > >> 11249 ret = -EBUSY; > >> 11250 goto init_roce_err; > >> 11251 } > >> > >> Junxian > >> > >>>> > >>>> Junxian > >>>> > >>>>>> + return; > >>>>>> + > >>>>>> + for (i = 0; i < hr_cmd->max_cmds; i++) { > >>>>>> + hr_cmd->context[i].result = -EBUSY; > >>>>>> + complete(&hr_cmd->context[i].done); > >>>>>> + } > >>>>>> +} > >>>>>> + > >>>>>> static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle) > >>>>>> { > >>>>>> struct hns_roce_dev *hr_dev; > >>>>>> @@ -6997,6 +7012,9 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle) > >>>>>> hr_dev->dis_db = true; > >>>>>> hr_dev->state = HNS_ROCE_DEVICE_STATE_RST_DOWN; > >>>>>> > >>>>>> + /* Complete the CMDQ event in advance during the reset. */ > >>>>>> + hns_roce_v2_reset_notify_cmd(hr_dev); > >>>>>> + > >>>>>> return 0; > >>>>>> } > >>>>>> > >>>>>> -- > >>>>>> 2.33.0 > >>>>>> > >>>> > >>