Am 19.09.2017 um 01:16 schrieb Andres Rodriguez: > Introduce a flag to signal that access to a BO will be synchronized > through an external mechanism. > > Currently all buffers shared between contexts are subject to implicit > synchronization. However, this is only required for protocols that > currently don't support an explicit synchronization mechanism (DRI2/3). > > This patch introduces the AMDGPU_GEM_CREATE_EXPLICIT_SYNC, so that > users can specify when it is safe to disable implicit sync. > > Signed-off-by: Andres Rodriguez <andresx7 at gmail.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 +++- > drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 +++- > drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 8 ++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 8 ++++++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | 3 ++- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 6 ++++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 ++++++---- > include/uapi/drm/amdgpu_drm.h | 2 ++ > 8 files changed, 34 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index db97e78..107533f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -704,7 +704,9 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) > > list_for_each_entry(e, &p->validated, tv.head) { > struct reservation_object *resv = e->robj->tbo.resv; > - r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp); > + r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, > + p->filp, > + amdgpu_bo_explicit_sync(e->robj)); Doesn't "p->filp," still fits on the previous line? > > if (r) > return r; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > index b0d45c8..21e9936 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > @@ -212,7 +212,9 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, > AMDGPU_GEM_CREATE_NO_CPU_ACCESS | > AMDGPU_GEM_CREATE_CPU_GTT_USWC | > AMDGPU_GEM_CREATE_VRAM_CLEARED | > - AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)) > + AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | > + AMDGPU_GEM_CREATE_EXPLICIT_SYNC)) > + > return -EINVAL; > > /* reject invalid gem domains */ > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > index c26ef53..428aae0 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > @@ -193,6 +193,14 @@ static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo) > } > } > > +/** > + * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced > + */ > +static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) > +{ > + return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC; > +} > + There is probably only one place where this is needed (the CS IOCTL), so an extra functions seems to be overkill. > int amdgpu_bo_create(struct amdgpu_device *adev, > unsigned long size, int byte_align, > bool kernel, u32 domain, u64 flags, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c > index c586f44..6bf4bed 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c > @@ -169,14 +169,15 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, > * > * @sync: sync object to add fences from reservation object to > * @resv: reservation object with embedded fence > - * @shared: true if we should only sync to the exclusive fence > + * @explicit_sync: true if we should only sync to the exclusive fence > * > * Sync to the fence > */ > int amdgpu_sync_resv(struct amdgpu_device *adev, > struct amdgpu_sync *sync, > struct reservation_object *resv, > - void *owner) > + void *owner, > + bool explicit_sync) The new parameter can be on the same line as "void *owner". > { > struct reservation_object_list *flist; > struct dma_fence *f; > @@ -191,6 +192,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, > f = reservation_object_get_excl(resv); > r = amdgpu_sync_fence(adev, sync, f); > > + if (explicit_sync) > + return r; > + > flist = reservation_object_get_list(resv); > if (!flist || r) > return r; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h > index dc76879..70d7e3a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h > @@ -45,7 +45,8 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, > int amdgpu_sync_resv(struct amdgpu_device *adev, > struct amdgpu_sync *sync, > struct reservation_object *resv, > - void *owner); > + void *owner, > + bool explicit_sync); > struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, > struct amdgpu_ring *ring); > struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index c9c059d..18c5662 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -1510,7 +1510,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, > job->vm_needs_flush = vm_needs_flush; > if (resv) { > r = amdgpu_sync_resv(adev, &job->sync, resv, > - AMDGPU_FENCE_OWNER_UNDEFINED); > + AMDGPU_FENCE_OWNER_UNDEFINED, > + false); > if (r) { > DRM_ERROR("sync failed (%d).\n", r); > goto error_free; > @@ -1602,7 +1603,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, > > if (resv) { > r = amdgpu_sync_resv(adev, &job->sync, resv, > - AMDGPU_FENCE_OWNER_UNDEFINED); > + AMDGPU_FENCE_OWNER_UNDEFINED, > + amdgpu_bo_explicit_sync(bo)); Copy & fill needs to be implicitly synced. > if (r) { > DRM_ERROR("sync failed (%d).\n", r); > goto error_free; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index 2df254c..ca9a9b4 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -987,7 +987,7 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, > int r; > > amdgpu_sync_create(&sync); > - amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner); > + amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner, false); > r = amdgpu_sync_wait(&sync, true); > amdgpu_sync_free(&sync); > > @@ -1128,11 +1128,13 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, > amdgpu_ring_pad_ib(ring, params.ib); > amdgpu_sync_resv(adev, &job->sync, > parent->base.bo->tbo.resv, > - AMDGPU_FENCE_OWNER_VM); > + AMDGPU_FENCE_OWNER_VM, > + amdgpu_bo_explicit_sync(parent->base.bo)); > if (shadow) > amdgpu_sync_resv(adev, &job->sync, > shadow->tbo.resv, > - AMDGPU_FENCE_OWNER_VM); > + AMDGPU_FENCE_OWNER_VM, > + amdgpu_bo_explicit_sync(shadow)); All VM updates need to be implicitly synced. > > WARN_ON(params.ib->length_dw > ndw); > r = amdgpu_job_submit(job, ring, &vm->entity, > @@ -1595,7 +1597,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, > goto error_free; > > r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv, > - owner); > + owner, amdgpu_bo_explicit_sync(vm->root.base.bo)); > if (r) > goto error_free; > > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h > index 69d64e5..96fcde8 100644 > --- a/include/uapi/drm/amdgpu_drm.h > +++ b/include/uapi/drm/amdgpu_drm.h > @@ -91,6 +91,8 @@ extern "C" { > #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS (1 << 5) > /* Flag that BO is always valid in this VM */ > #define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID (1 << 6) > +/* Flag that BO sharing will be explicitly synchronized */ > +#define AMDGPU_GEM_CREATE_EXPLICIT_SYNC (1 << 7) > > struct drm_amdgpu_gem_create_in { > /** the requested memory size */