If GPU begin to do recovery, skip scheduling IBs. Otherwise GPU recovery randomly fail. Signed-off-by: Dennis Li <Dennis.Li@xxxxxxx> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index dcfe8a3b03ff..054d7b0357fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -212,6 +212,7 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) struct dma_fence *fence = NULL, *finished; struct amdgpu_job *job; int r = 0; + int locked; job = to_amdgpu_job(sched_job); finished = &job->base.s_fence->finished; @@ -220,6 +221,10 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) trace_amdgpu_sched_run_job(job); + locked = down_read_trylock(&ring->adev->reset_sem); + if (!locked) + dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if GPU recovery */ + if (job->vram_lost_counter != atomic_read(&ring->adev->vram_lost_counter)) dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if VRAM lost */ @@ -231,6 +236,10 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) if (r) DRM_ERROR("Error scheduling IBs (%d)\n", r); } + + if (locked) + up_read(&ring->adev->reset_sem); + /* if gpu reset, hw fence will be replaced here */ dma_fence_put(job->fence); job->fence = dma_fence_get(fence); -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx