On 12/01/2017 02:59 PM, Christian König wrote: > Am 01.12.2017 um 20:39 schrieb Andrey Grodzovsky: >> Instead mark fence as explicit in it's amdgpu_sync_entry. >> >> v2: >> Fix use after free bug and add new parameter description. >> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com> >> --- >>  drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 1 - >>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 14 +++++++------- >>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  | 2 +- >>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 26 >> ++++++++++++-------------- >>  drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 23 +++++++++++++++++------ >>  drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | 6 +++--- >>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 16 ++++++++-------- >>  7 files changed, 48 insertions(+), 40 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> index f8657c3..c56a986 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> @@ -1121,7 +1121,6 @@ struct amdgpu_job { >>      struct amdgpu_vm   *vm; >>      struct amdgpu_ring   *ring; >>      struct amdgpu_sync   sync; >> -   struct amdgpu_sync   dep_sync; >>      struct amdgpu_sync   sched_sync; >>      struct amdgpu_ib   *ibs; >>      struct dma_fence   *fence; /* the hw fence */ >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> index d15836b..b694d35 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> @@ -788,7 +788,7 @@ static int amdgpu_bo_vm_update_pte(struct >> amdgpu_cs_parser *p) >>          return r; >>       r = amdgpu_sync_fence(adev, &p->job->sync, >> -                 fpriv->prt_va->last_pt_update); >> +                 fpriv->prt_va->last_pt_update, false); >>      if (r) >>          return r; >>  @@ -802,7 +802,7 @@ static int amdgpu_bo_vm_update_pte(struct >> amdgpu_cs_parser *p) >>              return r; >>           f = bo_va->last_pt_update; >> -       r = amdgpu_sync_fence(adev, &p->job->sync, f); >> +       r = amdgpu_sync_fence(adev, &p->job->sync, f, false); >>          if (r) >>              return r; >>      } >> @@ -825,7 +825,7 @@ static int amdgpu_bo_vm_update_pte(struct >> amdgpu_cs_parser *p) >>                  return r; >>               f = bo_va->last_pt_update; >> -           r = amdgpu_sync_fence(adev, &p->job->sync, f); >> +           r = amdgpu_sync_fence(adev, &p->job->sync, f, false); >>              if (r) >>                  return r; >>          } >> @@ -836,7 +836,7 @@ static int amdgpu_bo_vm_update_pte(struct >> amdgpu_cs_parser *p) >>      if (r) >>          return r; >>  -   r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update); >> +   r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false); >>      if (r) >>          return r; >>  @@ -1040,8 +1040,8 @@ static int amdgpu_cs_process_fence_dep(struct >> amdgpu_cs_parser *p, >>              amdgpu_ctx_put(ctx); >>              return r; >>          } else if (fence) { >> -           r = amdgpu_sync_fence(p->adev, &p->job->dep_sync, >> -                         fence); >> +           r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, >> +                   true); >>              dma_fence_put(fence); >>              amdgpu_ctx_put(ctx); >>              if (r) >> @@ -1060,7 +1060,7 @@ static int >> amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, >>      if (r) >>          return r; >>  -   r = amdgpu_sync_fence(p->adev, &p->job->dep_sync, fence); >> +   r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true); >>      dma_fence_put(fence); >>       return r; >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c >> index 659997b..0cf86eb 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c >> @@ -164,7 +164,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, >> unsigned num_ibs, >>      } >>       if (ring->funcs->emit_pipeline_sync && job && >> -       ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) || >> +       ((tmp = amdgpu_sync_get_fence(&job->sched_sync, NULL)) || >>           amdgpu_vm_need_pipeline_sync(ring, job))) { >>          need_pipe_sync = true; >>          dma_fence_put(tmp); >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c >> index 18770a8..61fb934 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c >> @@ -60,7 +60,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, >> unsigned num_ibs, >>      (*job)->num_ibs = num_ibs; >>       amdgpu_sync_create(&(*job)->sync); >> -   amdgpu_sync_create(&(*job)->dep_sync); >>      amdgpu_sync_create(&(*job)->sched_sync); >>      (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter); >>  @@ -104,7 +103,6 @@ static void amdgpu_job_free_cb(struct >> amd_sched_job *s_job) >>      amdgpu_ring_priority_put(job->ring, s_job->s_priority); >>      dma_fence_put(job->fence); >>      amdgpu_sync_free(&job->sync); >> -   amdgpu_sync_free(&job->dep_sync); >>      amdgpu_sync_free(&job->sched_sync); >>      kfree(job); >>  } >> @@ -115,7 +113,6 @@ void amdgpu_job_free(struct amdgpu_job *job) >>       dma_fence_put(job->fence); >>      amdgpu_sync_free(&job->sync); >> -   amdgpu_sync_free(&job->dep_sync); >>      amdgpu_sync_free(&job->sched_sync); >>      kfree(job); >>  } >> @@ -149,17 +146,18 @@ static struct dma_fence >> *amdgpu_job_dependency(struct amd_sched_job *sched_job, >>  { >>      struct amdgpu_job *job = to_amdgpu_job(sched_job); >>      struct amdgpu_vm *vm = job->vm; >> - >> -   struct dma_fence *fence = amdgpu_sync_get_fence(&job->dep_sync); >> +   bool explicit = false; >>      int r; >> - >> -   if (amd_sched_dependency_optimized(fence, s_entity)) { >> -       r = amdgpu_sync_fence(job->adev, &job->sched_sync, fence); >> -       if (r) >> -           DRM_ERROR("Error adding fence to sync (%d)\n", r); >> +   struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync, >> &explicit); >> + >> +   if (fence && explicit) { >> +       if (amd_sched_dependency_optimized(fence, s_entity)) { >> +           r = amdgpu_sync_fence(job->adev, &job->sched_sync, >> fence, false); >> +           if (r) >> +               DRM_ERROR("Error adding fence to sync (%d)\n", r); >> +       } >>      } >> -   if (!fence) >> -       fence = amdgpu_sync_get_fence(&job->sync); >> + >>      while (fence == NULL && vm && !job->vm_id) { >>          struct amdgpu_ring *ring = job->ring; >>  @@ -169,7 +167,7 @@ static struct dma_fence >> *amdgpu_job_dependency(struct amd_sched_job *sched_job, >>          if (r) >>              DRM_ERROR("Error getting VM ID (%d)\n", r); >>  -       fence = amdgpu_sync_get_fence(&job->sync); >> +       fence = amdgpu_sync_get_fence(&job->sync, NULL); >>      } >>       return fence; >> @@ -190,7 +188,7 @@ static struct dma_fence *amdgpu_job_run(struct >> amd_sched_job *sched_job) >>      finished = &job->base.s_fence->finished; >>      adev = job->adev; >>  -   BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL)); >> +   BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL, NULL)); >>       trace_amdgpu_sched_run_job(job); >>  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c >> index a4bf21f..6c3dda1 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c >> @@ -35,6 +35,7 @@ >>  struct amdgpu_sync_entry { >>      struct hlist_node   node; >>      struct dma_fence   *fence; >> +   bool   explicit; >>  }; >>   static struct kmem_cache *amdgpu_sync_slab; >> @@ -141,7 +142,7 @@ static bool amdgpu_sync_add_later(struct >> amdgpu_sync *sync, struct dma_fence *f) >>   * >>   */ >>  int amdgpu_sync_fence(struct amdgpu_device *adev, struct >> amdgpu_sync *sync, >> -             struct dma_fence *f) >> +             struct dma_fence *f, bool explicit) >>  { >>      struct amdgpu_sync_entry *e; >>  @@ -159,6 +160,8 @@ int amdgpu_sync_fence(struct amdgpu_device >> *adev, struct amdgpu_sync *sync, >>      if (!e) >>          return -ENOMEM; >>  +   e->explicit = explicit; >> + >>      hash_add(sync->fences, &e->node, f->context); >>      e->fence = dma_fence_get(f); >>      return 0; >> @@ -189,7 +192,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, >>       /* always sync to the exclusive fence */ >>      f = reservation_object_get_excl(resv); >> -   r = amdgpu_sync_fence(adev, sync, f); >> +   r = amdgpu_sync_fence(adev, sync, f, false); >>       if (explicit_sync) >>          return r; >> @@ -220,7 +223,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, >>                  continue; >>          } >>  -       r = amdgpu_sync_fence(adev, sync, f); >> +       r = amdgpu_sync_fence(adev, sync, f, false); >>          if (r) >>              break; >>      } >> @@ -232,12 +235,14 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, >>   * >>   * @sync: the sync object >>   * @ring: optional ring to use for test >> + * @explicit: true if the next fence is explicit >>   * >>   * Returns the next fence not signaled yet without removing it from >> the sync >>   * object. >>   */ >>  struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, >> -                    struct amdgpu_ring *ring) >> +                    struct amdgpu_ring *ring, >> +                    bool *explicit) >>  { >>      struct amdgpu_sync_entry *e; >>      struct hlist_node *tmp; >> @@ -247,6 +252,10 @@ struct dma_fence *amdgpu_sync_peek_fence(struct >> amdgpu_sync *sync, >>          struct dma_fence *f = e->fence; >>          struct amd_sched_fence *s_fence = to_amd_sched_fence(f); >>  + >> +       if (explicit) >> +           *explicit = e->explicit; >> + > > We don't need this for _peek_fence, don't we? Currently not, I assumed there might be some future use and also to make amdgpu_sync_peek_fence and amdgpu_sync_get_fence interfaces look the same. But i guess if there is no use then no need for extra parameter. Already removed and pushed the change. Thanks, Andrey > > Apart from that the patch is Reviewed-by: Christian König > <christian.koenig at amd.com>. > > Now the next step for you is to think a bit about how we can further > optimize this. > > For example we don't need to keep the implicit fence around any more > when we add an explicit one which is newer. > > Regards, > Christian. > >>          if (dma_fence_is_signaled(f)) { >>              hash_del(&e->node); >>              dma_fence_put(f); >> @@ -275,19 +284,21 @@ struct dma_fence *amdgpu_sync_peek_fence(struct >> amdgpu_sync *sync, >>   * amdgpu_sync_get_fence - get the next fence from the sync object >>   * >>   * @sync: sync object to use >> + * @explicit: true if the next fence is explicit >>   * >>   * Get and removes the next fence from the sync object not signaled >> yet. >>   */ >> -struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) >> +struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, >> bool *explicit) >>  { >>      struct amdgpu_sync_entry *e; >>      struct hlist_node *tmp; >>      struct dma_fence *f; >>      int i; >> - >>      hash_for_each_safe(sync->fences, i, tmp, e, node) { >>           f = e->fence; >> +       if (explicit) >> +           *explicit = e->explicit; >>           hash_del(&e->node); >>          kmem_cache_free(amdgpu_sync_slab, e); >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h >> index 70d7e3a..82c614d8 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h >> @@ -41,15 +41,15 @@ struct amdgpu_sync { >>   void amdgpu_sync_create(struct amdgpu_sync *sync); >>  int amdgpu_sync_fence(struct amdgpu_device *adev, struct >> amdgpu_sync *sync, >> -             struct dma_fence *f); >> +             struct dma_fence *f, bool explicit); >>  int amdgpu_sync_resv(struct amdgpu_device *adev, >>               struct amdgpu_sync *sync, >>               struct reservation_object *resv, >>               void *owner, >>               bool explicit_sync); >>  struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, >> -                    struct amdgpu_ring *ring); >> -struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); >> +                    struct amdgpu_ring *ring, bool *explicit); >> +struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, >> bool *explicit); >>  int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); >>  void amdgpu_sync_free(struct amdgpu_sync *sync); >>  int amdgpu_sync_init(void); >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> index 7de519b..6a332f3 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> @@ -486,9 +486,9 @@ static int >> amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm, >>          needs_flush = true; >>          /* to prevent one context starved by another context */ >>          id->pd_gpu_addr = 0; >> -       tmp = amdgpu_sync_peek_fence(&id->active, ring); >> +       tmp = amdgpu_sync_peek_fence(&id->active, ring, NULL); >>          if (tmp) { >> -           r = amdgpu_sync_fence(adev, sync, tmp); >> +           r = amdgpu_sync_fence(adev, sync, tmp, false); >>              return r; >>          } >>      } >> @@ -496,7 +496,7 @@ static int >> amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm, >>      /* Good we can use this VMID. Remember this submission as >>      * user of the VMID. >>      */ >> -   r = amdgpu_sync_fence(ring->adev, &id->active, fence); >> +   r = amdgpu_sync_fence(ring->adev, &id->active, fence, false); >>      if (r) >>          goto out; >>  @@ -556,7 +556,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, >> struct amdgpu_ring *ring, >>      /* Check if we have an idle VMID */ >>      i = 0; >>      list_for_each_entry(idle, &id_mgr->ids_lru, list) { >> -       fences[i] = amdgpu_sync_peek_fence(&idle->active, ring); >> +       fences[i] = amdgpu_sync_peek_fence(&idle->active, ring, NULL); >>          if (!fences[i]) >>              break; >>          ++i; >> @@ -583,7 +583,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, >> struct amdgpu_ring *ring, >>          } >>   -       r = amdgpu_sync_fence(ring->adev, sync, &array->base); >> +       r = amdgpu_sync_fence(ring->adev, sync, &array->base, false); >>          dma_fence_put(&array->base); >>          if (r) >>              goto error; >> @@ -626,7 +626,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, >> struct amdgpu_ring *ring, >>          /* Good we can use this VMID. Remember this submission as >>           * user of the VMID. >>           */ >> -       r = amdgpu_sync_fence(ring->adev, &id->active, fence); >> +       r = amdgpu_sync_fence(ring->adev, &id->active, fence, false); >>          if (r) >>              goto error; >>  @@ -646,7 +646,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, >> struct amdgpu_ring *ring, >>      id = idle; >>       /* Remember this submission as user of the VMID */ >> -   r = amdgpu_sync_fence(ring->adev, &id->active, fence); >> +   r = amdgpu_sync_fence(ring->adev, &id->active, fence, false); >>      if (r) >>          goto error; >>  @@ -1657,7 +1657,7 @@ static int amdgpu_vm_bo_update_mapping(struct >> amdgpu_device *adev, >>          addr = 0; >>      } >>  -   r = amdgpu_sync_fence(adev, &job->sync, exclusive); >> +   r = amdgpu_sync_fence(adev, &job->sync, exclusive, false); >>      if (r) >>          goto error_free; >