Am 29.05.2017 um 09:30 schrieb Dave Airlie: > From: Dave Airlie <airlied at redhat.com> > > This creates a new command submission chunk for amdgpu > to add in and out sync objects around the submission. > > Sync objects are managed via the drm syncobj ioctls. > > The command submission interface is enhanced with two new > chunks, one for syncobj pre submission dependencies, > and one for post submission sync obj signalling, > and just takes a list of handles for each. > > This is based on work originally done by David Zhou at AMD, > with input from Christian Konig on what things should look like. > > In theory VkFences could be backed with sync objects and > just get passed into the cs as syncobj handles as well. > > NOTE: this interface addition needs a version bump to expose > it to userspace. > > TODO: update to dep_sync when rebasing onto amdgpu master. > (with this - r-b from Christian) > > v1.1: keep file reference on import. > v2: move to using syncobjs > v2.1: change some APIs to just use p pointer. > v3: make more robust against CS failures, we now add the > wait sems but only remove them once the CS job has been > submitted. > v4: rewrite names of API and base on new syncobj code. > v5: move post deps earlier, rename some apis > > Signed-off-by: Dave Airlie <airlied at redhat.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 87 ++++++++++++++++++++++++++++++++- > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- > include/uapi/drm/amdgpu_drm.h | 6 +++ > 3 files changed, 93 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index 30225d7..3cbd547 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -27,6 +27,7 @@ > #include <linux/pagemap.h> > #include <drm/drmP.h> > #include <drm/amdgpu_drm.h> > +#include <drm/drm_syncobj.h> > #include "amdgpu.h" > #include "amdgpu_trace.h" > > @@ -226,6 +227,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) > break; > > case AMDGPU_CHUNK_ID_DEPENDENCIES: > + case AMDGPU_CHUNK_ID_SYNCOBJ_IN: > + case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: > break; > > default: > @@ -1040,6 +1043,40 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, > return 0; > } > > +static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, > + uint32_t handle) > +{ > + int r; > + struct dma_fence *fence; > + r = drm_syncobj_fence_get(p->filp, handle, &fence); > + if (r) > + return r; > + > + r = amdgpu_sync_fence(p->adev, &p->job->sync, fence); > + dma_fence_put(fence); > + > + return r; > +} > + > +static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p, > + struct amdgpu_cs_chunk *chunk) > +{ > + unsigned num_deps; > + int i, r; > + struct drm_amdgpu_cs_chunk_sem *deps; > + > + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; > + num_deps = chunk->length_dw * 4 / > + sizeof(struct drm_amdgpu_cs_chunk_sem); > + > + for (i = 0; i < num_deps; ++i) { > + r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle); > + if (r) > + return r; > + } > + return 0; > +} > + > static int amdgpu_cs_dependencies(struct amdgpu_device *adev, > struct amdgpu_cs_parser *p) > { > @@ -1054,12 +1091,54 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, > r = amdgpu_cs_process_fence_dep(p, chunk); > if (r) > return r; > + } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) { > + r = amdgpu_cs_process_syncobj_in_dep(p, chunk); > + if (r) > + return r; > } > } > > return 0; > } > > +static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, > + struct amdgpu_cs_chunk *chunk) > +{ > + unsigned num_deps; > + int i, r; > + struct drm_amdgpu_cs_chunk_sem *deps; > + > + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; > + num_deps = chunk->length_dw * 4 / > + sizeof(struct drm_amdgpu_cs_chunk_sem); > + > + for (i = 0; i < num_deps; ++i) { > + r = drm_syncobj_replace_fence(p->filp, deps[i].handle, > + p->fence); > + if (r) > + return r; Thinking more about it, here is still a problem with the handling. Imagine you have multiple semaphores to signal and only the last handle is invalid. So if you have n handles then the sync objects 1..(n-1) will get a fence which is never signaled because n aborts the command submission. I think the only valid approach to handle this is to resolve the handles upfront during the initial parsing of the chunks. Christian. > + } > + return 0; > +} > + > +static int amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) > +{ > + int i, r; > + > + for (i = 0; i < p->nchunks; ++i) { > + struct amdgpu_cs_chunk *chunk; > + > + chunk = &p->chunks[i]; > + > + if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) { > + r = amdgpu_cs_process_syncobj_out_dep(p, chunk); > + if (r) > + return r; > + } > + } > + return 0; > +} > + > static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > union drm_amdgpu_cs *cs) > { > @@ -1080,6 +1159,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > job->owner = p->filp; > job->fence_ctx = entity->fence_context; > p->fence = dma_fence_get(&job->base.s_fence->finished); > + > + r = amdgpu_cs_post_dependencies(p); > + if (r) { > + amdgpu_job_free(job); > + return r; > + } > + > cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence); > job->uf_sequence = cs->out.handle; > amdgpu_job_free_resources(job); > @@ -1087,7 +1173,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > > trace_amdgpu_cs_ioctl(job); > amd_sched_entity_push_job(&job->base); > - > return 0; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > index f2d705e..5329e7f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > @@ -719,7 +719,7 @@ static struct drm_driver kms_driver = { > .driver_features = > DRIVER_USE_AGP | > DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | > - DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET, > + DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ, > .load = amdgpu_driver_load_kms, > .open = amdgpu_driver_open_kms, > .postclose = amdgpu_driver_postclose_kms, > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h > index 6c249e5..cc702ca 100644 > --- a/include/uapi/drm/amdgpu_drm.h > +++ b/include/uapi/drm/amdgpu_drm.h > @@ -416,6 +416,8 @@ struct drm_amdgpu_gem_va { > #define AMDGPU_CHUNK_ID_IB 0x01 > #define AMDGPU_CHUNK_ID_FENCE 0x02 > #define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03 > +#define AMDGPU_CHUNK_ID_SYNCOBJ_IN 0x04 > +#define AMDGPU_CHUNK_ID_SYNCOBJ_OUT 0x05 > > struct drm_amdgpu_cs_chunk { > __u32 chunk_id; > @@ -483,6 +485,10 @@ struct drm_amdgpu_cs_chunk_fence { > __u32 offset; > }; > > +struct drm_amdgpu_cs_chunk_sem { > + __u32 handle; > +}; > + > struct drm_amdgpu_cs_chunk_data { > union { > struct drm_amdgpu_cs_chunk_ib ib_data;