When the job is already signaled, the s_fence is freed. Then it will has null pointer in amdgpu_device_gpu_recover. Signed-off-by: Emily Deng <Emily.Deng@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- drivers/gpu/drm/scheduler/sched_main.c | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e6ce949..5a8f08e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4075,7 +4075,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * * job->base holds a reference to parent fence */ - if (job && job->base.s_fence->parent && + if (job && job->base.s_fence && job->base.s_fence->parent && dma_fence_is_signaled(job->base.s_fence->parent)) job_signaled = true; diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 31809ca..56cc10e 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -334,8 +334,8 @@ void drm_sched_increase_karma(struct drm_sched_job *bad) spin_lock(&rq->lock); list_for_each_entry_safe(entity, tmp, &rq->entities, list) { - if (bad->s_fence->scheduled.context == - entity->fence_context) { + if (bad->s_fence && (bad->s_fence->scheduled.context == + entity->fence_context)) { if (atomic_read(&bad->karma) > bad->sched->hang_limit) if (entity->guilty) @@ -376,7 +376,7 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) * This iteration is thread safe as sched thread is stopped. */ list_for_each_entry_safe_reverse(s_job, tmp, &sched->ring_mirror_list, node) { - if (s_job->s_fence->parent && + if (s_job->s_fence && s_job->s_fence->parent && dma_fence_remove_callback(s_job->s_fence->parent, &s_job->cb)) { atomic_dec(&sched->hw_rq_count); @@ -395,7 +395,8 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) * * Job is still alive so fence refcount at least 1 */ - dma_fence_wait(&s_job->s_fence->finished, false); + if (s_job->s_fence) + dma_fence_wait(&s_job->s_fence->finished, false); /* * We must keep bad job alive for later use during @@ -438,7 +439,7 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery) * GPU recovers can't run in parallel. */ list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { - struct dma_fence *fence = s_job->s_fence->parent; + struct dma_fence *fence = s_job->s_fence ? s_job->s_fence->parent : NULL; atomic_inc(&sched->hw_rq_count); -- 2.7.4 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx