this is an improvement for previous patch, the sched_sync is to store fence that could be skipped as scheduled, when job is executed, we didn't need pipeline_sync if all fences in sched_sync are signalled, otherwise insert pipeline_sync still. Change-Id: I26d3a2794272ba94b25753d4bf367326d12f6939 Signed-off-by: Chunming Zhou <David1.Zhou at amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 7 ++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 5 ++++- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 787acd7..ef018bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1162,6 +1162,7 @@ struct amdgpu_job { struct amdgpu_vm *vm; struct amdgpu_ring *ring; struct amdgpu_sync sync; + struct amdgpu_sync sched_sync; struct amdgpu_ib *ibs; struct fence *fence; /* the hw fence */ uint32_t preamble_status; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 2c6624d..86ad507 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -121,6 +121,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib *ib = &ibs[0]; + struct fence *tmp; bool skip_preamble, need_ctx_switch; unsigned patch_offset = ~0; struct amdgpu_vm *vm; @@ -167,8 +168,12 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, return r; } - if (ring->funcs->emit_pipeline_sync && job && job->need_pipeline_sync) + if (ring->funcs->emit_pipeline_sync && job && + (tmp = amdgpu_sync_get_fence(&job->sched_sync))) { + job->need_pipeline_sync = true; amdgpu_ring_emit_pipeline_sync(ring); + fence_put(tmp); + } if (vm) { amdgpu_ring_insert_nop(ring, extra_nop); /* prevent CE go too fast than DE */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index cfa97ab..fa0c8b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -60,6 +60,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, (*job)->need_pipeline_sync = false; amdgpu_sync_create(&(*job)->sync); + amdgpu_sync_create(&(*job)->sched_sync); return 0; } @@ -98,6 +99,7 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job) fence_put(job->fence); amdgpu_sync_free(&job->sync); + amdgpu_sync_free(&job->sched_sync); kfree(job); } @@ -107,6 +109,7 @@ void amdgpu_job_free(struct amdgpu_job *job) fence_put(job->fence); amdgpu_sync_free(&job->sync); + amdgpu_sync_free(&job->sched_sync); kfree(job); } @@ -154,7 +157,7 @@ static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) } if (amd_sched_dependency_optimized(fence, sched_job->s_entity)) - job->need_pipeline_sync = true; + amdgpu_sync_fence(job->adev, &job->sched_sync, fence); return fence; } -- 1.9.1