Change-Id: I8e554d34c9e477ea255e0ed2a936397aa5f665e7 Signed-off-by: Chunming Zhou <David1.Zhou at amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 23 ++++++++++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 22 ++++++++++++++++++++++ 5 files changed, 42 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 163429c8..03b0fe7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -763,6 +763,7 @@ void amdgpu_job_free(struct amdgpu_job *job); int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, struct amd_sched_entity *entity, void *owner, struct fence **f); +void amdgpu_job_recovery(struct amd_gpu_scheduler *sched); struct amdgpu_ring { struct amdgpu_device *adev; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5c4691c..2c8e7f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1994,13 +1994,14 @@ retry: } /* restore scratch */ amdgpu_atombios_scratch_regs_restore(adev); - if (0) { + if (!r) { for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; if (!ring) continue; + amdgpu_job_recovery(&ring->sched); kthread_unpark(ring->sched.thread); - amdgpu_ring_restore(ring, ring_sizes[i], ring_data[i]); + kfree(ring_data[i]); ring_sizes[i] = 0; ring_data[i] = NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 72bf9f8..8af9903 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -366,9 +366,9 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, (unsigned long)ring); - ring->fence_drv.num_fences_mask = num_hw_submission * 2 - 1; + ring->fence_drv.num_fences_mask = num_hw_submission * 4 - 1; spin_lock_init(&ring->fence_drv.lock); - ring->fence_drv.fences = kcalloc(num_hw_submission * 2, sizeof(void *), + ring->fence_drv.fences = kcalloc(num_hw_submission * 4, sizeof(void *), GFP_KERNEL); if (!ring->fence_drv.fences) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 34e3542..702bd9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -193,14 +193,19 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (ring->funcs->emit_hdp_invalidate) amdgpu_ring_emit_hdp_invalidate(ring); - r = amdgpu_fence_emit(ring, &hwf); - if (r) { - dev_err(adev->dev, "failed to emit fence (%d)\n", r); - if (job && job->vm_id) - amdgpu_vm_reset_id(adev, job->vm_id); - amdgpu_ring_undo(ring); - return r; - } + if (!job || !job->fence) { + r = amdgpu_fence_emit(ring, &hwf); + if (r) { + dev_err(adev->dev, "failed to emit fence (%d)\n", r); + if (job && job->vm_id) + amdgpu_vm_reset_id(adev, job->vm_id); + amdgpu_ring_undo(ring); + return r; + } + } else + /* re-submit fence when gpu reset */ + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, + job->fence->seqno, AMDGPU_FENCE_FLAG_INT); /* wrap the last IB with fence */ if (job && job->uf_bo) { @@ -212,7 +217,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, } if (f) - *f = fence_get(hwf); + *f = (job && job->fence) ? job->fence : fence_get(hwf); if (patch_offset != ~0 && ring->funcs->patch_cond_exec) amdgpu_ring_patch_cond_exec(ring, patch_offset); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 83771c1..32fad1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -39,6 +39,28 @@ static void amdgpu_job_timedout(struct amd_sched_job *s_job) amdgpu_gpu_reset(job->adev); } +void amdgpu_job_recovery(struct amd_gpu_scheduler *sched) +{ + struct amd_sched_job *s_job, *tmp; + + spin_lock(&sched->job_list_lock); + list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { + struct amdgpu_job *job = to_amdgpu_job(s_job); + if (job->vm) { + struct amdgpu_vm_id *id = &job->adev->vm_manager.ids[job->vm_id]; + job->vm_pd_addr = amdgpu_bo_gpu_offset(job->vm->page_directory); + id->pd_gpu_addr = job->vm_pd_addr; + } + sched->ops->run_job(s_job); + } + s_job = list_first_entry_or_null(&sched->ring_mirror_list, + struct amd_sched_job, node); + if (s_job) + schedule_delayed_work(&s_job->work_tdr, sched->timeout); + + spin_unlock(&sched->job_list_lock); +} + int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, struct amdgpu_job **job, struct amdgpu_vm *vm) { -- 1.9.1