On Wed, Apr 12, 2017 at 12:57 AM, Dave Airlie <airlied at gmail.com> wrote: > From: Dave Airlie <airlied at redhat.com> > > This creates a new command submission chunk for amdgpu > to add wait and signal sync objects around the submission. > > Sync objects are managed via the drm syncobj ioctls. > > The command submission interface is enhanced with two new > chunks, one for semaphore waiting, one for semaphore signalling > and just takes a list of handles for each. > > This is based on work originally done by David Zhou at AMD, > with input from Christian Konig on what things should look like. > > NOTE: this interface addition needs a version bump to expose > it to userspace. > > v1.1: keep file reference on import. > v2: move to using syncobjs > v2.1: change some APIs to just use p pointer. > v3: make more robust against CS failures, we now add the > wait sems but only remove them once the CS job has been > submitted. > > Signed-off-by: Dave Airlie <airlied at redhat.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 120 +++++++++++++++++++++++++++++++- > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- > include/uapi/drm/amdgpu_drm.h | 6 ++ > 3 files changed, 126 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index df25b32..e9b5796 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -27,6 +27,7 @@ > #include <linux/pagemap.h> > #include <drm/drmP.h> > #include <drm/amdgpu_drm.h> > +#include <drm/drm_syncobj.h> > #include "amdgpu.h" > #include "amdgpu_trace.h" > > @@ -217,6 +218,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) > break; > > case AMDGPU_CHUNK_ID_DEPENDENCIES: > + case AMDGPU_CHUNK_ID_SEM_WAIT: > + case AMDGPU_CHUNK_ID_SEM_SIGNAL: > break; > > default: > @@ -1008,6 +1011,73 @@ static int amdgpu_process_fence_dep(struct amdgpu_cs_parser *p, > return 0; > } > > +static int amdgpu_sem_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, > + uint32_t handle) > +{ > + int r; > + struct dma_fence *fence; > + r = drm_syncobj_fence_get(p->filp, handle, &fence); > + if (r) > + return r; > + > + r = amdgpu_sync_fence(p->adev, &p->job->sync, fence); > + dma_fence_put(fence); > + > + return r; > +} > + > +static int amdgpu_sem_lookup_and_remove(struct amdgpu_cs_parser *p, > + uint32_t handle) > +{ > + int r; > + struct dma_fence *old_fence; > + > + r = drm_syncobj_replace_fence(p->filp, handle, NULL); > + if (r) > + return r; > + dma_fence_put(old_fence); > + > + return r; > +} > + > +static int amdgpu_process_sem_wait_dep(struct amdgpu_cs_parser *p, > + struct amdgpu_cs_chunk *chunk) > +{ > + unsigned num_deps; > + int i, r; > + struct drm_amdgpu_cs_chunk_sem *deps; > + > + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; > + num_deps = chunk->length_dw * 4 / > + sizeof(struct drm_amdgpu_cs_chunk_sem); > + > + for (i = 0; i < num_deps; ++i) { > + r = amdgpu_sem_lookup_and_add_to_sync(p, deps[i].handle); > + if (r) > + return r; > + } > + return 0; > +} > + > +static int amdgpu_process_sem_wait_post(struct amdgpu_cs_parser *p, > + struct amdgpu_cs_chunk *chunk) > +{ > + unsigned num_deps; > + int i, r; > + struct drm_amdgpu_cs_chunk_sem *deps; > + > + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; > + num_deps = chunk->length_dw * 4 / > + sizeof(struct drm_amdgpu_cs_chunk_sem); > + > + for (i = 0; i < num_deps; ++i) { > + r = amdgpu_sem_lookup_and_remove(p, deps[i].handle); > + if (r) > + return r; > + } > + return 0; > +} > + > static int amdgpu_cs_dependencies(struct amdgpu_device *adev, > struct amdgpu_cs_parser *p) > { > @@ -1022,12 +1092,60 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, > r = amdgpu_process_fence_dep(p, chunk); > if (r) > return r; > + } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_WAIT) { > + r = amdgpu_process_sem_wait_dep(p, chunk); > + if (r) > + return r; > } > } > > return 0; > } > > +static int amdgpu_process_sem_signal_dep(struct amdgpu_cs_parser *p, > + struct amdgpu_cs_chunk *chunk) > +{ > + unsigned num_deps; > + int i, r; > + struct drm_amdgpu_cs_chunk_sem *deps; > + > + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; > + num_deps = chunk->length_dw * 4 / > + sizeof(struct drm_amdgpu_cs_chunk_sem); > + > + for (i = 0; i < num_deps; ++i) { > + r = drm_syncobj_replace_fence(p->filp, deps[i].handle, > + p->fence); > + if (r) > + return r; > + } > + return 0; > +} > + > +static int amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) > +{ > + int i, r; > + > + for (i = 0; i < p->nchunks; ++i) { > + struct amdgpu_cs_chunk *chunk; > + > + chunk = &p->chunks[i]; > + > + if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_WAIT) { > + r = amdgpu_process_sem_wait_post(p, chunk); > + if (r) > + return r; > + } > + > + if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_SIGNAL) { > + r = amdgpu_process_sem_signal_dep(p, chunk); > + if (r) > + return r; > + } > + } > + return 0; > +} > + > static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > union drm_amdgpu_cs *cs) > { > @@ -1055,7 +1173,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > trace_amdgpu_cs_ioctl(job); > amd_sched_entity_push_job(&job->base); > > - return 0; > + return amdgpu_cs_post_dependencies(p); > } > > int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > index b76cd69..e95951e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > @@ -683,7 +683,7 @@ static struct drm_driver kms_driver = { > .driver_features = > DRIVER_USE_AGP | > DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | > - DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET, > + DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ, > .load = amdgpu_driver_load_kms, > .open = amdgpu_driver_open_kms, > .preclose = amdgpu_driver_preclose_kms, > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h > index 5797283..647c520 100644 > --- a/include/uapi/drm/amdgpu_drm.h > +++ b/include/uapi/drm/amdgpu_drm.h > @@ -390,6 +390,8 @@ struct drm_amdgpu_gem_va { > #define AMDGPU_CHUNK_ID_IB 0x01 > #define AMDGPU_CHUNK_ID_FENCE 0x02 > #define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03 > +#define AMDGPU_CHUNK_ID_SEM_WAIT 0x04 > +#define AMDGPU_CHUNK_ID_SEM_SIGNAL 0x05 > > struct drm_amdgpu_cs_chunk { > __u32 chunk_id; > @@ -454,6 +456,10 @@ struct drm_amdgpu_cs_chunk_fence { > __u32 offset; > }; > > +struct drm_amdgpu_cs_chunk_sem { > + __u32 handle; Do we need a pad here? Alex > +}; > + > struct drm_amdgpu_cs_chunk_data { > union { > struct drm_amdgpu_cs_chunk_ib ib_data; > -- > 2.9.3 > > _______________________________________________ > dri-devel mailing list > dri-devel at lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/dri-devel