if sriov gpu reset is invoked by job timeout, it is run in a global work-queue which is very slow and better not call msleep ortherwise it takes long time to get back CPU. so make below changes: 1: Change msleep 1 to mdelay 5 2: Ignore the ack fail from pf after time out, because VF FLR will clear ack, sometime VF FLR is done prior to the beginning of poll_ack so we can ignore this ack TODO: Put job_timedout (and the following gpu reset) in a driver thread, instead of the global work_struct. Change-Id: I4608c67b55c67c88597e03eee35a126d7e850839 Signed-off-by: Monk Liu <Monk.Liu at amd.com> --- drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 18 ++++++++++-------- drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 10 +++++----- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 712f36e..e967a7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -124,8 +124,8 @@ static int xgpu_ai_poll_ack(struct amdgpu_device *adev) r = -ETIME; break; } - msleep(1); - timeout -= 1; + mdelay(5); + timeout -= 5; reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL)); @@ -141,12 +141,12 @@ static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event) r = xgpu_ai_mailbox_rcv_msg(adev, event); while (r) { if (timeout <= 0) { - pr_err("Doesn't get ack from pf.\n"); + pr_err("Doesn't get msg:%d from pf.\n", event); r = -ETIME; break; } - msleep(1); - timeout -= 1; + mdelay(5); + timeout -= 5; r = xgpu_ai_mailbox_rcv_msg(adev, event); } @@ -165,7 +165,7 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, /* start to poll ack */ r = xgpu_ai_poll_ack(adev); if (r) - return r; + pr_err("Doesn't get ack from pf, continue\n"); xgpu_ai_mailbox_set_valid(adev, false); @@ -174,8 +174,10 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, req == IDH_REQ_GPU_FINI_ACCESS || req == IDH_REQ_GPU_RESET_ACCESS) { r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); - if (r) + if (r) { + pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n"); return r; + } } return 0; @@ -211,7 +213,7 @@ static int xgpu_ai_mailbox_ack_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - DRM_DEBUG("get ack intr and do nothing.\n"); + printk("get ack intr and do nothing.\n"); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index 7bdc51b..f0d64f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -398,8 +398,8 @@ static int xgpu_vi_poll_ack(struct amdgpu_device *adev) r = -ETIME; break; } - msleep(1); - timeout -= 1; + mdelay(5); + timeout -= 5; reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL); } @@ -418,8 +418,8 @@ static int xgpu_vi_poll_msg(struct amdgpu_device *adev, enum idh_event event) r = -ETIME; break; } - msleep(1); - timeout -= 1; + mdelay(5); + timeout -= 5; r = xgpu_vi_mailbox_rcv_msg(adev, event); } @@ -447,7 +447,7 @@ static int xgpu_vi_send_access_requests(struct amdgpu_device *adev, request == IDH_REQ_GPU_RESET_ACCESS) { r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); if (r) - return r; + pr_err("Doesn't get ack from pf, continue\n"); } return 0; -- 2.7.4