Please ignore this patch, there is a bug in this patch which I will fix and resend the patch soon. Thanks, Andrey On 12/01/2017 01:59 PM, Andrey Grodzovsky wrote: > Instead mark fence as explicit in it's amdgpu_sync_entry. > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 - > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 14 +++++++------- > drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 26 ++++++++++++-------------- > drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 27 +++++++++++++++++++++------ > drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | 6 +++--- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 16 ++++++++-------- > 7 files changed, 52 insertions(+), 40 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index f8657c3..c56a986 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -1121,7 +1121,6 @@ struct amdgpu_job { > struct amdgpu_vm *vm; > struct amdgpu_ring *ring; > struct amdgpu_sync sync; > - struct amdgpu_sync dep_sync; > struct amdgpu_sync sched_sync; > struct amdgpu_ib *ibs; > struct dma_fence *fence; /* the hw fence */ > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index d15836b..b694d35 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -788,7 +788,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) > return r; > > r = amdgpu_sync_fence(adev, &p->job->sync, > - fpriv->prt_va->last_pt_update); > + fpriv->prt_va->last_pt_update, false); > if (r) > return r; > > @@ -802,7 +802,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) > return r; > > f = bo_va->last_pt_update; > - r = amdgpu_sync_fence(adev, &p->job->sync, f); > + r = amdgpu_sync_fence(adev, &p->job->sync, f, false); > if (r) > return r; > } > @@ -825,7 +825,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) > return r; > > f = bo_va->last_pt_update; > - r = amdgpu_sync_fence(adev, &p->job->sync, f); > + r = amdgpu_sync_fence(adev, &p->job->sync, f, false); > if (r) > return r; > } > @@ -836,7 +836,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) > if (r) > return r; > > - r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update); > + r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false); > if (r) > return r; > > @@ -1040,8 +1040,8 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, > amdgpu_ctx_put(ctx); > return r; > } else if (fence) { > - r = amdgpu_sync_fence(p->adev, &p->job->dep_sync, > - fence); > + r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, > + true); > dma_fence_put(fence); > amdgpu_ctx_put(ctx); > if (r) > @@ -1060,7 +1060,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, > if (r) > return r; > > - r = amdgpu_sync_fence(p->adev, &p->job->dep_sync, fence); > + r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true); > dma_fence_put(fence); > > return r; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > index 659997b..21086bd 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > @@ -164,7 +164,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, > } > > if (ring->funcs->emit_pipeline_sync && job && > - ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) || > + ((tmp = amdgpu_sync_get_fence(&job->sched_sync, false)) || > amdgpu_vm_need_pipeline_sync(ring, job))) { > need_pipe_sync = true; > dma_fence_put(tmp); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > index 18770a8..1d65590 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > @@ -60,7 +60,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, > (*job)->num_ibs = num_ibs; > > amdgpu_sync_create(&(*job)->sync); > - amdgpu_sync_create(&(*job)->dep_sync); > amdgpu_sync_create(&(*job)->sched_sync); > (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter); > > @@ -104,7 +103,6 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job) > amdgpu_ring_priority_put(job->ring, s_job->s_priority); > dma_fence_put(job->fence); > amdgpu_sync_free(&job->sync); > - amdgpu_sync_free(&job->dep_sync); > amdgpu_sync_free(&job->sched_sync); > kfree(job); > } > @@ -115,7 +113,6 @@ void amdgpu_job_free(struct amdgpu_job *job) > > dma_fence_put(job->fence); > amdgpu_sync_free(&job->sync); > - amdgpu_sync_free(&job->dep_sync); > amdgpu_sync_free(&job->sched_sync); > kfree(job); > } > @@ -149,17 +146,18 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job, > { > struct amdgpu_job *job = to_amdgpu_job(sched_job); > struct amdgpu_vm *vm = job->vm; > - > - struct dma_fence *fence = amdgpu_sync_get_fence(&job->dep_sync); > + bool explicit = false; > int r; > - > - if (amd_sched_dependency_optimized(fence, s_entity)) { > - r = amdgpu_sync_fence(job->adev, &job->sched_sync, fence); > - if (r) > - DRM_ERROR("Error adding fence to sync (%d)\n", r); > + struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync, &explicit); > + > + if (explicit) { > + if (amd_sched_dependency_optimized(fence, s_entity)) { > + r = amdgpu_sync_fence(job->adev, &job->sched_sync, fence, false); > + if (r) > + DRM_ERROR("Error adding fence to sync (%d)\n", r); > + } > } > - if (!fence) > - fence = amdgpu_sync_get_fence(&job->sync); > + > while (fence == NULL && vm && !job->vm_id) { > struct amdgpu_ring *ring = job->ring; > > @@ -169,7 +167,7 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job, > if (r) > DRM_ERROR("Error getting VM ID (%d)\n", r); > > - fence = amdgpu_sync_get_fence(&job->sync); > + fence = amdgpu_sync_get_fence(&job->sync, NULL); > } > > return fence; > @@ -190,7 +188,7 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) > finished = &job->base.s_fence->finished; > adev = job->adev; > > - BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL)); > + BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL, NULL)); > > trace_amdgpu_sched_run_job(job); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c > index a4bf21f..2c71fec 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c > @@ -35,6 +35,7 @@ > struct amdgpu_sync_entry { > struct hlist_node node; > struct dma_fence *fence; > + bool explicit; > }; > > static struct kmem_cache *amdgpu_sync_slab; > @@ -141,7 +142,7 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f) > * > */ > int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, > - struct dma_fence *f) > + struct dma_fence *f, bool explicit) > { > struct amdgpu_sync_entry *e; > > @@ -159,6 +160,8 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, > if (!e) > return -ENOMEM; > > + e->explicit = explicit; > + > hash_add(sync->fences, &e->node, f->context); > e->fence = dma_fence_get(f); > return 0; > @@ -189,7 +192,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, > > /* always sync to the exclusive fence */ > f = reservation_object_get_excl(resv); > - r = amdgpu_sync_fence(adev, sync, f); > + r = amdgpu_sync_fence(adev, sync, f, false); > > if (explicit_sync) > return r; > @@ -220,7 +223,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, > continue; > } > > - r = amdgpu_sync_fence(adev, sync, f); > + r = amdgpu_sync_fence(adev, sync, f, false); > if (r) > break; > } > @@ -237,7 +240,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, > * object. > */ > struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, > - struct amdgpu_ring *ring) > + struct amdgpu_ring *ring, > + bool *explicit) > { > struct amdgpu_sync_entry *e; > struct hlist_node *tmp; > @@ -261,10 +265,16 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, > if (dma_fence_is_signaled(&s_fence->scheduled)) > continue; > > + if (explicit) > + *explicit = e->explicit; > + > return &s_fence->scheduled; > } > } > > + if (explicit) > + *explicit = e->explicit; > + > return f; > } > > @@ -278,7 +288,7 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, > * > * Get and removes the next fence from the sync object not signaled yet. > */ > -struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) > +struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit) > { > struct amdgpu_sync_entry *e; > struct hlist_node *tmp; > @@ -292,8 +302,13 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) > hash_del(&e->node); > kmem_cache_free(amdgpu_sync_slab, e); > > - if (!dma_fence_is_signaled(f)) > + if (!dma_fence_is_signaled(f)) { > + > + if (explicit) > + *explicit = e->explicit; > + > return f; > + } > > dma_fence_put(f); > } > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h > index 70d7e3a..82c614d8 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h > @@ -41,15 +41,15 @@ struct amdgpu_sync { > > void amdgpu_sync_create(struct amdgpu_sync *sync); > int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, > - struct dma_fence *f); > + struct dma_fence *f, bool explicit); > int amdgpu_sync_resv(struct amdgpu_device *adev, > struct amdgpu_sync *sync, > struct reservation_object *resv, > void *owner, > bool explicit_sync); > struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, > - struct amdgpu_ring *ring); > -struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); > + struct amdgpu_ring *ring, bool *explicit); > +struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit); > int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); > void amdgpu_sync_free(struct amdgpu_sync *sync); > int amdgpu_sync_init(void); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index 7de519b..6a332f3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -486,9 +486,9 @@ static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm, > needs_flush = true; > /* to prevent one context starved by another context */ > id->pd_gpu_addr = 0; > - tmp = amdgpu_sync_peek_fence(&id->active, ring); > + tmp = amdgpu_sync_peek_fence(&id->active, ring, NULL); > if (tmp) { > - r = amdgpu_sync_fence(adev, sync, tmp); > + r = amdgpu_sync_fence(adev, sync, tmp, false); > return r; > } > } > @@ -496,7 +496,7 @@ static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm, > /* Good we can use this VMID. Remember this submission as > * user of the VMID. > */ > - r = amdgpu_sync_fence(ring->adev, &id->active, fence); > + r = amdgpu_sync_fence(ring->adev, &id->active, fence, false); > if (r) > goto out; > > @@ -556,7 +556,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, > /* Check if we have an idle VMID */ > i = 0; > list_for_each_entry(idle, &id_mgr->ids_lru, list) { > - fences[i] = amdgpu_sync_peek_fence(&idle->active, ring); > + fences[i] = amdgpu_sync_peek_fence(&idle->active, ring, NULL); > if (!fences[i]) > break; > ++i; > @@ -583,7 +583,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, > } > > > - r = amdgpu_sync_fence(ring->adev, sync, &array->base); > + r = amdgpu_sync_fence(ring->adev, sync, &array->base, false); > dma_fence_put(&array->base); > if (r) > goto error; > @@ -626,7 +626,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, > /* Good we can use this VMID. Remember this submission as > * user of the VMID. > */ > - r = amdgpu_sync_fence(ring->adev, &id->active, fence); > + r = amdgpu_sync_fence(ring->adev, &id->active, fence, false); > if (r) > goto error; > > @@ -646,7 +646,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, > id = idle; > > /* Remember this submission as user of the VMID */ > - r = amdgpu_sync_fence(ring->adev, &id->active, fence); > + r = amdgpu_sync_fence(ring->adev, &id->active, fence, false); > if (r) > goto error; > > @@ -1657,7 +1657,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, > addr = 0; > } > > - r = amdgpu_sync_fence(adev, &job->sync, exclusive); > + r = amdgpu_sync_fence(adev, &job->sync, exclusive, false); > if (r) > goto error_free; >