On Tue, Apr 04, 2017 at 09:40:57AM +0200, Christian König wrote: > Am 04.04.2017 um 06:27 schrieb Dave Airlie: > > From: Dave Airlie <airlied@xxxxxxxxxx> > > > > This creates a new interface for amdgpu with ioctls to > > create/destroy/import and export shared semaphores using > > sem object backed by the sync_file code. The semaphores > > are not installed as fd (except for export), but rather > > like other driver internal objects in an idr. The idr > > holds the initial reference on the sync file. > > > > The command submission interface is enhanced with two new > > chunks, one for semaphore waiting, one for semaphore signalling > > and just takes a list of handles for each. > > > > This is based on work originally done by David Zhou at AMD, > > with input from Christian Konig on what things should look like. > > > > NOTE: this interface addition needs a version bump to expose > > it to userspace. > > > > v1.1: keep file reference on import. > > v2: move to using syncobjs > > > > Signed-off-by: Dave Airlie <airlied@xxxxxxxxxx> > > Looks good to me in general, but one note below. > > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 86 ++++++++++++++++++++++++++++++++- > > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- > > include/uapi/drm/amdgpu_drm.h | 6 +++ > > 3 files changed, 92 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > > index 4671432..4dd210b 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > > @@ -27,6 +27,7 @@ > > #include <linux/pagemap.h> > > #include <drm/drmP.h> > > #include <drm/amdgpu_drm.h> > > +#include <drm/drm_syncobj.h> > > #include "amdgpu.h" > > #include "amdgpu_trace.h" > > @@ -217,6 +218,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) > > break; > > case AMDGPU_CHUNK_ID_DEPENDENCIES: > > + case AMDGPU_CHUNK_ID_SEM_WAIT: > > + case AMDGPU_CHUNK_ID_SEM_SIGNAL: > > break; > > default: > > @@ -1009,6 +1012,44 @@ static int amdgpu_process_fence_dep(struct amdgpu_device *adev, > > return 0; > > } > > +static int amdgpu_sem_lookup_and_sync(struct amdgpu_device *adev, > > + struct drm_file *file_private, > > + struct amdgpu_sync *sync, > > + uint32_t handle) > > +{ > > + int r; > > + struct dma_fence *old_fence; > > + r = drm_syncobj_swap_fences(file_private, handle, NULL, &old_fence); > > What happens when we the CS fails later on? Would the semaphore then not > contain the fence any more? Atm rules are that you're not allowed to publish a dma_fence before you're committed to executing whatever it represents (i.e. guaranteed to get signalled). Publish depends upon context, but can include: Install an fd for a type fence sync_file, swap the sync_file in a sema type, assign it to the reservation_object for implicit sync. I guess we need to have drm_syncobj_lookup exported (or whatever it was), keep a temporary pointer, and then swap it only at the end (with sync_file_swap_fence or whatever it was). -Daniel > > Christian. > > > + if (r) > > + return r; > > + > > + r = amdgpu_sync_fence(adev, sync, old_fence); > > + dma_fence_put(old_fence); > > + > > + return r; > > +} > > + > > +static int amdgpu_process_sem_wait_dep(struct amdgpu_device *adev, > > + struct amdgpu_cs_parser *p, > > + struct amdgpu_cs_chunk *chunk) > > +{ > > + unsigned num_deps; > > + int i, r; > > + struct drm_amdgpu_cs_chunk_sem *deps; > > + > > + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; > > + num_deps = chunk->length_dw * 4 / > > + sizeof(struct drm_amdgpu_cs_chunk_sem); > > + > > + for (i = 0; i < num_deps; ++i) { > > + r = amdgpu_sem_lookup_and_sync(adev, p->filp, &p->job->sync, > > + deps[i].handle); > > + if (r) > > + return r; > > + } > > + return 0; > > +} > > + > > static int amdgpu_cs_dependencies(struct amdgpu_device *adev, > > struct amdgpu_cs_parser *p) > > { > > @@ -1023,12 +1064,55 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, > > r = amdgpu_process_fence_dep(adev, p, chunk); > > if (r) > > return r; > > + } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_WAIT) { > > + r = amdgpu_process_sem_wait_dep(adev, p, chunk); > > + if (r) > > + return r; > > } > > } > > return 0; > > } > > +static int amdgpu_process_sem_signal_dep(struct amdgpu_cs_parser *p, > > + struct amdgpu_cs_chunk *chunk, > > + struct dma_fence *fence) > > +{ > > + unsigned num_deps; > > + int i, r; > > + struct drm_amdgpu_cs_chunk_sem *deps; > > + > > + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; > > + num_deps = chunk->length_dw * 4 / > > + sizeof(struct drm_amdgpu_cs_chunk_sem); > > + > > + for (i = 0; i < num_deps; ++i) { > > + r = drm_syncobj_replace_fence(p->filp, deps[i].handle, > > + fence); > > + if (r) > > + return r; > > + } > > + return 0; > > +} > > + > > +static int amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) > > +{ > > + int i, r; > > + > > + for (i = 0; i < p->nchunks; ++i) { > > + struct amdgpu_cs_chunk *chunk; > > + > > + chunk = &p->chunks[i]; > > + > > + if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_SIGNAL) { > > + r = amdgpu_process_sem_signal_dep(p, chunk, p->fence); > > + if (r) > > + return r; > > + } > > + } > > + return 0; > > +} > > + > > static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > > union drm_amdgpu_cs *cs) > > { > > @@ -1056,7 +1140,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > > trace_amdgpu_cs_ioctl(job); > > amd_sched_entity_push_job(&job->base); > > - return 0; > > + return amdgpu_cs_post_dependencies(p); > > } > > int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > > index b76cd69..e95951e 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > > @@ -683,7 +683,7 @@ static struct drm_driver kms_driver = { > > .driver_features = > > DRIVER_USE_AGP | > > DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | > > - DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET, > > + DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ, > > .load = amdgpu_driver_load_kms, > > .open = amdgpu_driver_open_kms, > > .preclose = amdgpu_driver_preclose_kms, > > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h > > index 5797283..647c520 100644 > > --- a/include/uapi/drm/amdgpu_drm.h > > +++ b/include/uapi/drm/amdgpu_drm.h > > @@ -390,6 +390,8 @@ struct drm_amdgpu_gem_va { > > #define AMDGPU_CHUNK_ID_IB 0x01 > > #define AMDGPU_CHUNK_ID_FENCE 0x02 > > #define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03 > > +#define AMDGPU_CHUNK_ID_SEM_WAIT 0x04 > > +#define AMDGPU_CHUNK_ID_SEM_SIGNAL 0x05 > > struct drm_amdgpu_cs_chunk { > > __u32 chunk_id; > > @@ -454,6 +456,10 @@ struct drm_amdgpu_cs_chunk_fence { > > __u32 offset; > > }; > > +struct drm_amdgpu_cs_chunk_sem { > > + __u32 handle; > > +}; > > + > > struct drm_amdgpu_cs_chunk_data { > > union { > > struct drm_amdgpu_cs_chunk_ib ib_data; > > > _______________________________________________ > dri-devel mailing list > dri-devel@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/dri-devel -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel