Am 03.05.2017 um 05:48 schrieb Monk Liu: > if sriov gpu reset is invoked by job timeout, it is run > in a global work-queue which is very slow and better not call > msleep ortherwise it takes long time to get back CPU. > > so make below changes: > > 1: Change msleep 1 to mdelay 5 > 2: Ignore the ack fail from pf after time out, > because VF FLR will clear ack, sometime VF FLR is done > prior to the beginning of poll_ack so we can ignore this ack > > TODO: > Put job_timedout (and the following gpu reset) in a driver thread, > instead of the global work_struct. > > Change-Id: I4608c67b55c67c88597e03eee35a126d7e850839 > Signed-off-by: Monk Liu <Monk.Liu at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 18 ++++++++++-------- > drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 10 +++++----- > 2 files changed, 15 insertions(+), 13 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c > index 712f36e..e967a7b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c > +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c > @@ -124,8 +124,8 @@ static int xgpu_ai_poll_ack(struct amdgpu_device *adev) > r = -ETIME; > break; > } > - msleep(1); > - timeout -= 1; > + mdelay(5); > + timeout -= 5; > > reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, > mmBIF_BX_PF0_MAILBOX_CONTROL)); > @@ -141,12 +141,12 @@ static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event) > r = xgpu_ai_mailbox_rcv_msg(adev, event); > while (r) { > if (timeout <= 0) { > - pr_err("Doesn't get ack from pf.\n"); > + pr_err("Doesn't get msg:%d from pf.\n", event); > r = -ETIME; > break; > } > - msleep(1); > - timeout -= 1; > + mdelay(5); > + timeout -= 5; > > r = xgpu_ai_mailbox_rcv_msg(adev, event); > } > @@ -165,7 +165,7 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, > /* start to poll ack */ > r = xgpu_ai_poll_ack(adev); > if (r) > - return r; > + pr_err("Doesn't get ack from pf, continue\n"); > > xgpu_ai_mailbox_set_valid(adev, false); > > @@ -174,8 +174,10 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, > req == IDH_REQ_GPU_FINI_ACCESS || > req == IDH_REQ_GPU_RESET_ACCESS) { > r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); > - if (r) > + if (r) { > + pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n"); > return r; > + } > } > > return 0; > @@ -211,7 +213,7 @@ static int xgpu_ai_mailbox_ack_irq(struct amdgpu_device *adev, > struct amdgpu_irq_src *source, > struct amdgpu_iv_entry *entry) > { > - DRM_DEBUG("get ack intr and do nothing.\n"); > + printk("get ack intr and do nothing.\n"); Changing a DRM_DEBUG to a printk looks odd. How about using pr_warn or pr_info instead? Apart from that patch looks good to me, but I don't deeply into that stuff. So with the printk fixed feel free to add an Acked-by: Christian König <christian.koenig at amd.com> to it. Regards, Christian. > return 0; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c > index 7bdc51b..f0d64f1 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c > +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c > @@ -398,8 +398,8 @@ static int xgpu_vi_poll_ack(struct amdgpu_device *adev) > r = -ETIME; > break; > } > - msleep(1); > - timeout -= 1; > + mdelay(5); > + timeout -= 5; > > reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL); > } > @@ -418,8 +418,8 @@ static int xgpu_vi_poll_msg(struct amdgpu_device *adev, enum idh_event event) > r = -ETIME; > break; > } > - msleep(1); > - timeout -= 1; > + mdelay(5); > + timeout -= 5; > > r = xgpu_vi_mailbox_rcv_msg(adev, event); > } > @@ -447,7 +447,7 @@ static int xgpu_vi_send_access_requests(struct amdgpu_device *adev, > request == IDH_REQ_GPU_RESET_ACCESS) { > r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); > if (r) > - return r; > + pr_err("Doesn't get ack from pf, continue\n"); > } > > return 0;