Am 08.05.2017 um 08:51 schrieb Monk Liu: > that way we can know which job cause hang and > can do per sched reset/recovery instead of all > sched. > > Change-Id: Ifc98cd74b2d93823c489de6a89087ba188957eff > Signed-off-by: Monk Liu <Monk.Liu at amd.com> Reviewed-by: Christian König <christian.koenig at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 +++---- > drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 2 +- > drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 2 +- > drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 2 +- > 5 files changed, 7 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 4985a7e..0e5f314 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -2529,14 +2529,13 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev, > * amdgpu_sriov_gpu_reset - reset the asic > * > * @adev: amdgpu device pointer > - * @voluntary: if this reset is requested by guest. > - * (true means by guest and false means by HYPERVISOR ) > + * @job: which job trigger hang > * > * Attempt the reset the GPU if it has hung (all asics). > * for SRIOV case. > * Returns 0 for success or an error on failure. > */ > -int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary) > +int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job) > { > int i, r = 0; > int resched; > @@ -2566,7 +2565,7 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary) > amdgpu_fence_driver_force_completion(adev); > > /* request to take full control of GPU before re-initialization */ > - if (voluntary) > + if (job) > amdgpu_virt_reset_gpu(adev); > else > amdgpu_virt_request_full_gpu(adev, true); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > index c7718af..3c6fb6e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > @@ -38,7 +38,7 @@ static void amdgpu_job_timedout(struct amd_sched_job *s_job) > job->ring->fence_drv.sync_seq); > > if (amdgpu_sriov_vf(job->adev)) > - amdgpu_sriov_gpu_reset(job->adev, true); > + amdgpu_sriov_gpu_reset(job->adev, job); > else > amdgpu_gpu_reset(job->adev); > } > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > index 6f2b7df..9e1062e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > @@ -96,7 +96,7 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); > int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); > int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); > int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); > -int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary); > +int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job); > int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev); > void amdgpu_virt_free_mm_table(struct amdgpu_device *adev); > > diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c > index 96139ec..69da52d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c > +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c > @@ -243,7 +243,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) > } > > /* Trigger recovery due to world switch failure */ > - amdgpu_sriov_gpu_reset(adev, false); > + amdgpu_sriov_gpu_reset(adev, NULL); > } > > static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, > diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c > index f0d64f1..1cdf5cc 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c > +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c > @@ -514,7 +514,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) > } > > /* Trigger recovery due to world switch failure */ > - amdgpu_sriov_gpu_reset(adev, false); > + amdgpu_sriov_gpu_reset(adev, NULL); > } > > static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,