Am 12.04.2017 um 06:57 schrieb Dave Airlie: > From: Dave Airlie <airlied at redhat.com> > > This creates a new command submission chunk for amdgpu > to add wait and signal sync objects around the submission. > > Sync objects are managed via the drm syncobj ioctls. > > The command submission interface is enhanced with two new > chunks, one for semaphore waiting, one for semaphore signalling > and just takes a list of handles for each. > > This is based on work originally done by David Zhou at AMD, > with input from Christian Konig on what things should look like. > > NOTE: this interface addition needs a version bump to expose > it to userspace. > > v1.1: keep file reference on import. > v2: move to using syncobjs > v2.1: change some APIs to just use p pointer. > v3: make more robust against CS failures, we now add the > wait sems but only remove them once the CS job has been > submitted. > > Signed-off-by: Dave Airlie <airlied at redhat.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 120 +++++++++++++++++++++++++++++++- > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- > include/uapi/drm/amdgpu_drm.h | 6 ++ > 3 files changed, 126 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index df25b32..e9b5796 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -27,6 +27,7 @@ > #include <linux/pagemap.h> > #include <drm/drmP.h> > #include <drm/amdgpu_drm.h> > +#include <drm/drm_syncobj.h> > #include "amdgpu.h" > #include "amdgpu_trace.h" > > @@ -217,6 +218,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) > break; > > case AMDGPU_CHUNK_ID_DEPENDENCIES: > + case AMDGPU_CHUNK_ID_SEM_WAIT: > + case AMDGPU_CHUNK_ID_SEM_SIGNAL: > break; > > default: > @@ -1008,6 +1011,73 @@ static int amdgpu_process_fence_dep(struct amdgpu_cs_parser *p, > return 0; > } > > +static int amdgpu_sem_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, > + uint32_t handle) > +{ > + int r; > + struct dma_fence *fence; Missing new line between declaration and code. > + r = drm_syncobj_fence_get(p->filp, handle, &fence); > + if (r) > + return r; > + > + r = amdgpu_sync_fence(p->adev, &p->job->sync, fence); > + dma_fence_put(fence); > + > + return r; > +} > + > +static int amdgpu_sem_lookup_and_remove(struct amdgpu_cs_parser *p, > + uint32_t handle) > +{ > + int r; > + struct dma_fence *old_fence; > + > + r = drm_syncobj_replace_fence(p->filp, handle, NULL); > + if (r) > + return r; > + dma_fence_put(old_fence); Am I wrong or is old_fence not initialized here? Additional to that what happens when the fence in the sync object was changed while we do the CS? Or even worse the handle got assigned to a new sync object. Regards, Christian. > + > + return r; > +} > + > +static int amdgpu_process_sem_wait_dep(struct amdgpu_cs_parser *p, > + struct amdgpu_cs_chunk *chunk) > +{ > + unsigned num_deps; > + int i, r; > + struct drm_amdgpu_cs_chunk_sem *deps; > + > + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; > + num_deps = chunk->length_dw * 4 / > + sizeof(struct drm_amdgpu_cs_chunk_sem); > + > + for (i = 0; i < num_deps; ++i) { > + r = amdgpu_sem_lookup_and_add_to_sync(p, deps[i].handle); > + if (r) > + return r; > + } > + return 0; > +} > + > +static int amdgpu_process_sem_wait_post(struct amdgpu_cs_parser *p, > + struct amdgpu_cs_chunk *chunk) > +{ > + unsigned num_deps; > + int i, r; > + struct drm_amdgpu_cs_chunk_sem *deps; > + > + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; > + num_deps = chunk->length_dw * 4 / > + sizeof(struct drm_amdgpu_cs_chunk_sem); > + > + for (i = 0; i < num_deps; ++i) { > + r = amdgpu_sem_lookup_and_remove(p, deps[i].handle); > + if (r) > + return r; > + } > + return 0; > +} > + > static int amdgpu_cs_dependencies(struct amdgpu_device *adev, > struct amdgpu_cs_parser *p) > { > @@ -1022,12 +1092,60 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, > r = amdgpu_process_fence_dep(p, chunk); > if (r) > return r; > + } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_WAIT) { > + r = amdgpu_process_sem_wait_dep(p, chunk); > + if (r) > + return r; > } > } > > return 0; > } > > +static int amdgpu_process_sem_signal_dep(struct amdgpu_cs_parser *p, > + struct amdgpu_cs_chunk *chunk) > +{ > + unsigned num_deps; > + int i, r; > + struct drm_amdgpu_cs_chunk_sem *deps; > + > + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; > + num_deps = chunk->length_dw * 4 / > + sizeof(struct drm_amdgpu_cs_chunk_sem); > + > + for (i = 0; i < num_deps; ++i) { > + r = drm_syncobj_replace_fence(p->filp, deps[i].handle, > + p->fence); > + if (r) > + return r; > + } > + return 0; > +} > + > +static int amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) > +{ > + int i, r; > + > + for (i = 0; i < p->nchunks; ++i) { > + struct amdgpu_cs_chunk *chunk; > + > + chunk = &p->chunks[i]; > + > + if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_WAIT) { > + r = amdgpu_process_sem_wait_post(p, chunk); > + if (r) > + return r; > + } > + > + if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_SIGNAL) { > + r = amdgpu_process_sem_signal_dep(p, chunk); > + if (r) > + return r; > + } > + } > + return 0; > +} > + > static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > union drm_amdgpu_cs *cs) > { > @@ -1055,7 +1173,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > trace_amdgpu_cs_ioctl(job); > amd_sched_entity_push_job(&job->base); > > - return 0; > + return amdgpu_cs_post_dependencies(p); > } > > int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > index b76cd69..e95951e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > @@ -683,7 +683,7 @@ static struct drm_driver kms_driver = { > .driver_features = > DRIVER_USE_AGP | > DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | > - DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET, > + DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ, > .load = amdgpu_driver_load_kms, > .open = amdgpu_driver_open_kms, > .preclose = amdgpu_driver_preclose_kms, > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h > index 5797283..647c520 100644 > --- a/include/uapi/drm/amdgpu_drm.h > +++ b/include/uapi/drm/amdgpu_drm.h > @@ -390,6 +390,8 @@ struct drm_amdgpu_gem_va { > #define AMDGPU_CHUNK_ID_IB 0x01 > #define AMDGPU_CHUNK_ID_FENCE 0x02 > #define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03 > +#define AMDGPU_CHUNK_ID_SEM_WAIT 0x04 > +#define AMDGPU_CHUNK_ID_SEM_SIGNAL 0x05 > > struct drm_amdgpu_cs_chunk { > __u32 chunk_id; > @@ -454,6 +456,10 @@ struct drm_amdgpu_cs_chunk_fence { > __u32 offset; > }; > > +struct drm_amdgpu_cs_chunk_sem { > + __u32 handle; > +}; > + > struct drm_amdgpu_cs_chunk_data { > union { > struct drm_amdgpu_cs_chunk_ib ib_data;