instead of reset/recovery all rings, we can only work on the perticular ring if detects it hang. Change-Id: Ie9de78819e1567e9f001d3593c9c52f749137c32 Signed-off-by: Monk Liu <Monk.Liu at amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 35 ++++++++++++++++++++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 6 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 + 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 157d023..4dbd121 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2551,19 +2551,26 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job, b /* block TTM */ resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); - /* block scheduler */ - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - ring = adev->rings[i]; + /* we start from the ring trigger GPU hang */ + j = job ? job->ring->idx : 0; + /* block scheduler */ + for (i = j; i < j + AMDGPU_MAX_RINGS; ++i) { + ring = adev->rings[i % AMDGPU_MAX_RINGS]; if (!ring || !ring->sched.thread) continue; kthread_park(ring->sched.thread); + + if (job && job->ring->idx != i) + continue; + + /* only do job_reset on the hang ring if @job not NULL */ amd_sched_hw_job_reset(&ring->sched); - } - /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ - amdgpu_fence_driver_force_completion(adev); + /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ + amdgpu_fence_driver_force_completion_ring(ring); + } /* request to take full control of GPU before re-initialization */ if (voluntary) @@ -2615,12 +2622,26 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job, b } fence_put(fence); + /* before recovery and unpark, kickout guilty for every rings */ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - struct amdgpu_ring *ring = adev->rings[i]; + ring = adev->rings[i]; + if (!ring || !ring->sched.thread) continue; amd_sched_job_kickout_guilty(&ring->sched); + } + + for (i = j; i < j + AMDGPU_MAX_RINGS; ++i) { + ring = adev->rings[i % AMDGPU_MAX_RINGS]; + if (!ring || !ring->sched.thread) + continue; + + if (job && job->ring->idx != i) { + kthread_unpark(ring->sched.thread); + continue; + } + amd_sched_job_recovery(&ring->sched); kthread_unpark(ring->sched.thread); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 5772ef2..de4c851 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -541,6 +541,12 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev) } } +void amdgpu_fence_driver_force_completion_ring(struct amdgpu_ring *ring) +{ + if (ring) + amdgpu_fence_write(ring, ring->fence_drv.sync_seq); +} + /* * Common fence implementation */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 5786cc3..2acaac6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -76,6 +76,7 @@ struct amdgpu_fence_driver { int amdgpu_fence_driver_init(struct amdgpu_device *adev); void amdgpu_fence_driver_fini(struct amdgpu_device *adev); void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev); +void amdgpu_fence_driver_force_completion_ring(struct amdgpu_ring *ring); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, unsigned num_hw_submission); -- 2.7.4