On 05/09/2017 04:14 PM, Chunming Zhou wrote: > The problem is that executing the jobs in the right order doesn't give you the right result > because consecutive jobs executed on the same engine are pipelined. > In other words job B does it buffer read before job A has written it's result. > > Change-Id: I9065abf5bafbda7a92702ca11477315d25c9a347 > Signed-off-by: Chunming Zhou <David1.Zhou at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 4 ++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- > drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 17 +++++++++++++++++ > drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 2 ++ > 6 files changed, 27 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index dd2bd9a..787acd7 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -1169,6 +1169,7 @@ struct amdgpu_job { > void *owner; > uint64_t fence_ctx; /* the fence_context this job uses */ > bool vm_needs_flush; > + bool need_pipeline_sync; > unsigned vm_id; > uint64_t vm_pd_addr; > uint32_t gds_base, gds_size; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > index 06202e2..2c6624d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > @@ -167,6 +167,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, > return r; > } > > + if (ring->funcs->emit_pipeline_sync && job && job->need_pipeline_sync) > + amdgpu_ring_emit_pipeline_sync(ring); > if (vm) { > amdgpu_ring_insert_nop(ring, extra_nop); /* prevent CE go too fast than DE */ > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > index 690ef3d..cfa97ab 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > @@ -57,6 +57,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, > (*job)->vm = vm; > (*job)->ibs = (void *)&(*job)[1]; > (*job)->num_ibs = num_ibs; > + (*job)->need_pipeline_sync = false; > > amdgpu_sync_create(&(*job)->sync); > > @@ -152,6 +153,9 @@ static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) > fence = amdgpu_sync_get_fence(&job->sync); > } > > + if (amd_sched_dependency_optimized(fence, sched_job->s_entity)) > + job->need_pipeline_sync = true; > + > return fence; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index b35b853..b4f83fc 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -738,7 +738,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) > if (ring->funcs->init_cond_exec) > patch_offset = amdgpu_ring_init_cond_exec(ring); > > - if (ring->funcs->emit_pipeline_sync) > + if (ring->funcs->emit_pipeline_sync && !job->need_pipeline_sync) Perhaps we could emit pipeline sync regardless of the flag need_pipeline_sync. Could you explain the reason about this change? Jerry > amdgpu_ring_emit_pipeline_sync(ring); > > if (ring->funcs->emit_vm_flush && vm_flush_needed) { > diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c > index 3c258c3..fec870a 100644 > --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c > +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c > @@ -235,6 +235,23 @@ static void amd_sched_entity_clear_dep(struct fence *f, struct fence_cb *cb) > fence_put(f); > } > > +bool amd_sched_dependency_optimized(struct fence* fence, > + struct amd_sched_entity *entity) > +{ > + struct amd_gpu_scheduler *sched = entity->sched; > + struct amd_sched_fence *s_fence; > + > + if (!fence || fence_is_signaled(fence)) > + return false; > + if (fence->context == entity->fence_context) > + return true; > + s_fence = to_amd_sched_fence(fence); > + if (s_fence && s_fence->sched == sched) > + return true; > + > + return false; > +} > + > static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity) > { > struct amd_gpu_scheduler *sched = entity->sched; > diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h > index 2531b41..bfda90f 100644 > --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h > +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h > @@ -162,4 +162,6 @@ int amd_sched_job_init(struct amd_sched_job *job, > void *owner); > void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched); > void amd_sched_job_recovery(struct amd_gpu_scheduler *sched); > +bool amd_sched_dependency_optimized(struct fence* fence, > + struct amd_sched_entity *entity); > #endif >