On Wed, May 3, 2017 at 5:05 AM, Christian König <deathsimple at vodafone.de> wrote: > Am 03.05.2017 um 05:48 schrieb Monk Liu: >> >> if sriov gpu reset is invoked by job timeout, it is run >> in a global work-queue which is very slow and better not call >> msleep ortherwise it takes long time to get back CPU. >> >> so make below changes: >> >> 1: Change msleep 1 to mdelay 5 >> 2: Ignore the ack fail from pf after time out, >> because VF FLR will clear ack, sometime VF FLR is done >> prior to the beginning of poll_ack so we can ignore this ack >> >> TODO: >> Put job_timedout (and the following gpu reset) in a driver thread, >> instead of the global work_struct. >> >> Change-Id: I4608c67b55c67c88597e03eee35a126d7e850839 >> Signed-off-by: Monk Liu <Monk.Liu at amd.com> >> --- >> drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 18 ++++++++++-------- >> drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 10 +++++----- >> 2 files changed, 15 insertions(+), 13 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c >> b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c >> index 712f36e..e967a7b 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c >> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c >> @@ -124,8 +124,8 @@ static int xgpu_ai_poll_ack(struct amdgpu_device >> *adev) >> r = -ETIME; >> break; >> } >> - msleep(1); >> - timeout -= 1; >> + mdelay(5); >> + timeout -= 5; >> reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, >> >> mmBIF_BX_PF0_MAILBOX_CONTROL)); >> @@ -141,12 +141,12 @@ static int xgpu_ai_poll_msg(struct amdgpu_device >> *adev, enum idh_event event) >> r = xgpu_ai_mailbox_rcv_msg(adev, event); >> while (r) { >> if (timeout <= 0) { >> - pr_err("Doesn't get ack from pf.\n"); >> + pr_err("Doesn't get msg:%d from pf.\n", event); >> r = -ETIME; >> break; >> } >> - msleep(1); >> - timeout -= 1; >> + mdelay(5); >> + timeout -= 5; >> r = xgpu_ai_mailbox_rcv_msg(adev, event); >> } >> @@ -165,7 +165,7 @@ static int xgpu_ai_send_access_requests(struct >> amdgpu_device *adev, >> /* start to poll ack */ >> r = xgpu_ai_poll_ack(adev); >> if (r) >> - return r; >> + pr_err("Doesn't get ack from pf, continue\n"); >> xgpu_ai_mailbox_set_valid(adev, false); >> @@ -174,8 +174,10 @@ static int xgpu_ai_send_access_requests(struct >> amdgpu_device *adev, >> req == IDH_REQ_GPU_FINI_ACCESS || >> req == IDH_REQ_GPU_RESET_ACCESS) { >> r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); >> - if (r) >> + if (r) { >> + pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, >> give up\n"); >> return r; >> + } >> } >> return 0; >> @@ -211,7 +213,7 @@ static int xgpu_ai_mailbox_ack_irq(struct >> amdgpu_device *adev, >> struct amdgpu_irq_src *source, >> struct amdgpu_iv_entry *entry) >> { >> - DRM_DEBUG("get ack intr and do nothing.\n"); >> + printk("get ack intr and do nothing.\n"); > > > Changing a DRM_DEBUG to a printk looks odd. How about using pr_warn or > pr_info instead? > > Apart from that patch looks good to me, but I don't deeply into that stuff. > > So with the printk fixed feel free to add an Acked-by: Christian König > <christian.koenig at amd.com> to it. Same here. with that fixed: Acked-by: Alex Deucher <alexander.deucher at amd.com> > > Regards, > Christian. > > >> return 0; >> } >> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c >> b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c >> index 7bdc51b..f0d64f1 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c >> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c >> @@ -398,8 +398,8 @@ static int xgpu_vi_poll_ack(struct amdgpu_device >> *adev) >> r = -ETIME; >> break; >> } >> - msleep(1); >> - timeout -= 1; >> + mdelay(5); >> + timeout -= 5; >> reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL); >> } >> @@ -418,8 +418,8 @@ static int xgpu_vi_poll_msg(struct amdgpu_device >> *adev, enum idh_event event) >> r = -ETIME; >> break; >> } >> - msleep(1); >> - timeout -= 1; >> + mdelay(5); >> + timeout -= 5; >> r = xgpu_vi_mailbox_rcv_msg(adev, event); >> } >> @@ -447,7 +447,7 @@ static int xgpu_vi_send_access_requests(struct >> amdgpu_device *adev, >> request == IDH_REQ_GPU_RESET_ACCESS) { >> r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); >> if (r) >> - return r; >> + pr_err("Doesn't get ack from pf, continue\n"); >> } >> return 0; > > > > _______________________________________________ > amd-gfx mailing list > amd-gfx at lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx