On 2017-09-18 10:47 PM, zhoucm1 wrote: > > > On 2017å¹´09æ??19æ?¥ 07:16, Andres Rodriguez wrote: >> Introduce a flag to signal that access to a BO will be synchronized >> through an external mechanism. >> >> Currently all buffers shared between contexts are subject to implicit >> synchronization. However, this is only required for protocols that >> currently don't support an explicit synchronization mechanism (DRI2/3). >> >> This patch introduces the AMDGPU_GEM_CREATE_EXPLICIT_SYNC, so that >> users can specify when it is safe to disable implicit sync. >> >> Signed-off-by: Andres Rodriguez <andresx7 at gmail.com> >> --- >>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c    | 4 +++- >>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   | 4 +++- >>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 8 ++++++++ >>  drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c  | 8 ++++++-- >>  drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h  | 3 ++- >>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   | 6 ++++-- >>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    | 10 ++++++---- >>  include/uapi/drm/amdgpu_drm.h             | 2 ++ >>  8 files changed, 34 insertions(+), 11 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> index db97e78..107533f 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> @@ -704,7 +704,9 @@ static int amdgpu_cs_sync_rings(struct >> amdgpu_cs_parser *p) >>      list_for_each_entry(e, &p->validated, tv.head) { >>          struct reservation_object *resv = e->robj->tbo.resv; >> -       r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp); >> +       r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, >> +                    p->filp, >> +                    amdgpu_bo_explicit_sync(e->robj)); >>          if (r) >>              return r; >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >> index b0d45c8..21e9936 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >> @@ -212,7 +212,9 @@ int amdgpu_gem_create_ioctl(struct drm_device >> *dev, void *data, >>                AMDGPU_GEM_CREATE_NO_CPU_ACCESS | >>                AMDGPU_GEM_CREATE_CPU_GTT_USWC | >>                AMDGPU_GEM_CREATE_VRAM_CLEARED | >> -             AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)) >> +             AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | >> +             AMDGPU_GEM_CREATE_EXPLICIT_SYNC)) >> + >>          return -EINVAL; >>      /* reject invalid gem domains */ >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h >> index c26ef53..428aae0 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h >> @@ -193,6 +193,14 @@ static inline bool >> amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo) >>      } >>  } >> +/** >> + * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced >> + */ >> +static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) >> +{ >> +   return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC; >> +} >> + >>  int amdgpu_bo_create(struct amdgpu_device *adev, >>                  unsigned long size, int byte_align, >>                  bool kernel, u32 domain, u64 flags, >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c >> index c586f44..6bf4bed 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c >> @@ -169,14 +169,15 @@ int amdgpu_sync_fence(struct amdgpu_device >> *adev, struct amdgpu_sync *sync, >>   * >>   * @sync: sync object to add fences from reservation object to >>   * @resv: reservation object with embedded fence >> - * @shared: true if we should only sync to the exclusive fence >> + * @explicit_sync: true if we should only sync to the exclusive fence >>   * >>   * Sync to the fence >>   */ >>  int amdgpu_sync_resv(struct amdgpu_device *adev, >>               struct amdgpu_sync *sync, >>               struct reservation_object *resv, >> -            void *owner) >> +            void *owner, >> +            bool explicit_sync) > Could you move explicit_sync inside function? > like: > > bool explicit_sync = bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC; > I was thinking of doing something like this originally. Extract the ttm bo from the resv object, and then get the abo from that. Would've been a pretty tiny and clean patch. However, the reservation object is a pointer instead of being embedded inside the ttm bo. So that path doesn't work. Passing in a pointer to the full bo is overkill I think. And the function might be used in cases where the reservation object is not associated with a specific bo (at least the current interface allows for that). That is why I ended up choosing this interface, even though it made the patch a lot more verbose. But if you, or anyone, has any suggestions on how to simplify this patch let me know. >>  { >>      struct reservation_object_list *flist; >>      struct dma_fence *f; >> @@ -191,6 +192,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, >>      f = reservation_object_get_excl(resv); >>      r = amdgpu_sync_fence(adev, sync, f); >> +   if (explicit_sync) >> +       return r; >> + > Do you need to move return before syncing excl? > I think the exclusive fence always needs to be syncd. My understanding of TTM is pretty basic, so I might be wrong here. Regards, Andres > Regards, > David Zhou >>      flist = reservation_object_get_list(resv); >>      if (!flist || r) >>          return r; >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h >> index dc76879..70d7e3a 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h >> @@ -45,7 +45,8 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, >> struct amdgpu_sync *sync, >>  int amdgpu_sync_resv(struct amdgpu_device *adev, >>               struct amdgpu_sync *sync, >>               struct reservation_object *resv, >> -            void *owner); >> +            void *owner, >> +            bool explicit_sync); >>  struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, >>                       struct amdgpu_ring *ring); >>  struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >> index c9c059d..18c5662 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >> @@ -1510,7 +1510,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, >> uint64_t src_offset, >>      job->vm_needs_flush = vm_needs_flush; >>      if (resv) { >>          r = amdgpu_sync_resv(adev, &job->sync, resv, >> -                    AMDGPU_FENCE_OWNER_UNDEFINED); >> +                    AMDGPU_FENCE_OWNER_UNDEFINED, >> +                    false); >>          if (r) { >>              DRM_ERROR("sync failed (%d).\n", r); >>              goto error_free; >> @@ -1602,7 +1603,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, >>      if (resv) { >>          r = amdgpu_sync_resv(adev, &job->sync, resv, >> -                    AMDGPU_FENCE_OWNER_UNDEFINED); >> +                    AMDGPU_FENCE_OWNER_UNDEFINED, >> +                    amdgpu_bo_explicit_sync(bo)); >>          if (r) { >>              DRM_ERROR("sync failed (%d).\n", r); >>              goto error_free; >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> index 2df254c..ca9a9b4 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> @@ -987,7 +987,7 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device >> *adev, struct amdgpu_vm *vm, >>      int r; >>      amdgpu_sync_create(&sync); >> -   amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner); >> +   amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner, >> false); >>      r = amdgpu_sync_wait(&sync, true); >>      amdgpu_sync_free(&sync); >> @@ -1128,11 +1128,13 @@ static int amdgpu_vm_update_level(struct >> amdgpu_device *adev, >>              amdgpu_ring_pad_ib(ring, params.ib); >>              amdgpu_sync_resv(adev, &job->sync, >>                       parent->base.bo->tbo.resv, >> -                    AMDGPU_FENCE_OWNER_VM); >> +                    AMDGPU_FENCE_OWNER_VM, >> +                    amdgpu_bo_explicit_sync(parent->base.bo)); > >>              if (shadow) >>                  amdgpu_sync_resv(adev, &job->sync, >>                           shadow->tbo.resv, >> -                        AMDGPU_FENCE_OWNER_VM); >> +                        AMDGPU_FENCE_OWNER_VM, >> +                        amdgpu_bo_explicit_sync(shadow)); >>              WARN_ON(params.ib->length_dw > ndw); >>              r = amdgpu_job_submit(job, ring, &vm->entity, >> @@ -1595,7 +1597,7 @@ static int amdgpu_vm_bo_update_mapping(struct >> amdgpu_device *adev, >>          goto error_free; >>      r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv, >> -                owner); >> +                owner, amdgpu_bo_explicit_sync(vm->root.base.bo)); >>      if (r) >>          goto error_free; >> diff --git a/include/uapi/drm/amdgpu_drm.h >> b/include/uapi/drm/amdgpu_drm.h >> index 69d64e5..96fcde8 100644 >> --- a/include/uapi/drm/amdgpu_drm.h >> +++ b/include/uapi/drm/amdgpu_drm.h >> @@ -91,6 +91,8 @@ extern "C" { >>  #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS   (1 << 5) >>  /* Flag that BO is always valid in this VM */ >>  #define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID   (1 << 6) >> +/* Flag that BO sharing will be explicitly synchronized */ >> +#define AMDGPU_GEM_CREATE_EXPLICIT_SYNC       (1 << 7) >>  struct drm_amdgpu_gem_create_in { >>      /** the requested memory size */ >