Am 15.03.2017 um 10:01 schrieb Daniel Vetter: > On Tue, Mar 14, 2017 at 10:50:54AM +1000, Dave Airlie wrote: >> From: Dave Airlie <airlied at redhat.com> >> >> This creates a new interface for amdgpu with ioctls to >> create/destroy/import and export shared semaphores using >> sem object backed by the sync_file code. The semaphores >> are not installed as fd (except for export), but rather >> like other driver internal objects in an idr. The idr >> holds the initial reference on the sync file. >> >> The command submission interface is enhanced with two new >> chunks, one for semaphore waiting, one for semaphore signalling >> and just takes a list of handles for each. >> >> This is based on work originally done by David Zhou at AMD, >> with input from Christian Konig on what things should look like. >> >> NOTE: this interface addition needs a version bump to expose >> it to userspace. >> >> Signed-off-by: Dave Airlie <airlied at redhat.com> > Another uapi corner case: Since you allow semaphores to be created before > they have a first fence attached, that essentially creates future fences. > I.e. sync_file fd with no fence yet attached. We want that anyway, but > maybe not together with semaphores (since there's some more implications). > > I think it'd be good to attach a dummy fence which already starts out as > signalled to sync_file->fence for semaphores. That way userspace can't go > evil, create a semaphore, then pass it to a driver which then promptly > oopses or worse because it assumes then when it has a sync_file, it also > has a fence. And the dummy fence could be globally shared, so not really > overhead. Sounds fishy to me, what happens in case of a race condition and the receiver sometimes waits on the dummy and sometimes on the real fence? I would rather teach the users of sync_file to return a proper error code when you want to wait on a sync_file which doesn't have a fence yet. Christian. > -Daniel > >> --- >> drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- >> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 12 ++ >> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 70 ++++++++++- >> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 8 ++ >> drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c | 204 ++++++++++++++++++++++++++++++++ >> include/uapi/drm/amdgpu_drm.h | 28 +++++ >> 6 files changed, 322 insertions(+), 2 deletions(-) >> create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile >> index 2814aad..404bcba 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/Makefile >> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile >> @@ -24,7 +24,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ >> atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ >> amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ >> amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ >> - amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o >> + amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_sem.o >> >> # add asic specific block >> amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> index c1b9135..4ed8811 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> @@ -702,6 +702,8 @@ struct amdgpu_fpriv { >> struct mutex bo_list_lock; >> struct idr bo_list_handles; >> struct amdgpu_ctx_mgr ctx_mgr; >> + spinlock_t sem_handles_lock; >> + struct idr sem_handles; >> }; >> >> /* >> @@ -1814,5 +1816,15 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, >> uint64_t addr, struct amdgpu_bo **bo); >> int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser); >> >> +int amdgpu_sem_ioctl(struct drm_device *dev, void *data, >> + struct drm_file *filp); >> +void amdgpu_sem_destroy(struct amdgpu_fpriv *fpriv, u32 handle); >> +int amdgpu_sem_lookup_and_signal(struct amdgpu_fpriv *fpriv, >> + uint32_t handle, >> + struct dma_fence *fence); >> +int amdgpu_sem_lookup_and_sync(struct amdgpu_device *adev, >> + struct amdgpu_fpriv *fpriv, >> + struct amdgpu_sync *sync, >> + uint32_t handle); >> #include "amdgpu_object.h" >> #endif >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> index 4671432..80fc94b 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> @@ -217,6 +217,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) >> break; >> >> case AMDGPU_CHUNK_ID_DEPENDENCIES: >> + case AMDGPU_CHUNK_ID_SEM_WAIT: >> + case AMDGPU_CHUNK_ID_SEM_SIGNAL: >> break; >> >> default: >> @@ -1009,6 +1011,28 @@ static int amdgpu_process_fence_dep(struct amdgpu_device *adev, >> return 0; >> } >> >> +static int amdgpu_process_sem_wait_dep(struct amdgpu_device *adev, >> + struct amdgpu_cs_parser *p, >> + struct amdgpu_cs_chunk *chunk) >> +{ >> + struct amdgpu_fpriv *fpriv = p->filp->driver_priv; >> + unsigned num_deps; >> + int i, r; >> + struct drm_amdgpu_cs_chunk_sem *deps; >> + >> + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; >> + num_deps = chunk->length_dw * 4 / >> + sizeof(struct drm_amdgpu_cs_chunk_sem); >> + >> + for (i = 0; i < num_deps; ++i) { >> + r = amdgpu_sem_lookup_and_sync(adev, fpriv, &p->job->sync, >> + deps[i].handle); >> + if (r) >> + return r; >> + } >> + return 0; >> +} >> + >> static int amdgpu_cs_dependencies(struct amdgpu_device *adev, >> struct amdgpu_cs_parser *p) >> { >> @@ -1023,12 +1047,56 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, >> r = amdgpu_process_fence_dep(adev, p, chunk); >> if (r) >> return r; >> + } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_WAIT) { >> + r = amdgpu_process_sem_wait_dep(adev, p, chunk); >> + if (r) >> + return r; >> } >> } >> >> return 0; >> } >> >> +static int amdgpu_process_sem_signal_dep(struct amdgpu_cs_parser *p, >> + struct amdgpu_cs_chunk *chunk, >> + struct dma_fence *fence) >> +{ >> + struct amdgpu_fpriv *fpriv = p->filp->driver_priv; >> + unsigned num_deps; >> + int i, r; >> + struct drm_amdgpu_cs_chunk_sem *deps; >> + >> + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; >> + num_deps = chunk->length_dw * 4 / >> + sizeof(struct drm_amdgpu_cs_chunk_sem); >> + >> + for (i = 0; i < num_deps; ++i) { >> + r = amdgpu_sem_lookup_and_signal(fpriv, deps[i].handle, >> + fence); >> + if (r) >> + return r; >> + } >> + return 0; >> +} >> + >> +static int amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) >> +{ >> + int i, r; >> + >> + for (i = 0; i < p->nchunks; ++i) { >> + struct amdgpu_cs_chunk *chunk; >> + >> + chunk = &p->chunks[i]; >> + >> + if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_SIGNAL) { >> + r = amdgpu_process_sem_signal_dep(p, chunk, p->fence); >> + if (r) >> + return r; >> + } >> + } >> + return 0; >> +} >> + >> static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, >> union drm_amdgpu_cs *cs) >> { >> @@ -1056,7 +1124,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, >> trace_amdgpu_cs_ioctl(job); >> amd_sched_entity_push_job(&job->base); >> >> - return 0; >> + return amdgpu_cs_post_dependencies(p); >> } >> >> int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c >> index 61d94c7..013aed1 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c >> @@ -664,6 +664,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) >> mutex_init(&fpriv->bo_list_lock); >> idr_init(&fpriv->bo_list_handles); >> >> + spin_lock_init(&fpriv->sem_handles_lock); >> + idr_init(&fpriv->sem_handles); >> amdgpu_ctx_mgr_init(&fpriv->ctx_mgr); >> >> file_priv->driver_priv = fpriv; >> @@ -689,6 +691,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, >> struct amdgpu_device *adev = dev->dev_private; >> struct amdgpu_fpriv *fpriv = file_priv->driver_priv; >> struct amdgpu_bo_list *list; >> + struct amdgpu_sem *sem; >> int handle; >> >> if (!fpriv) >> @@ -715,6 +718,10 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, >> idr_destroy(&fpriv->bo_list_handles); >> mutex_destroy(&fpriv->bo_list_lock); >> >> + idr_for_each_entry(&fpriv->sem_handles, sem, handle) >> + amdgpu_sem_destroy(fpriv, handle); >> + idr_destroy(&fpriv->sem_handles); >> + >> kfree(fpriv); >> file_priv->driver_priv = NULL; >> >> @@ -896,6 +903,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { >> DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), >> DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), >> DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), >> + DRM_IOCTL_DEF_DRV(AMDGPU_SEM, amdgpu_sem_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), >> }; >> const int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms); >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c >> new file mode 100644 >> index 0000000..066520a >> --- /dev/null >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c >> @@ -0,0 +1,204 @@ >> +/* >> + * Copyright 2016 Advanced Micro Devices, Inc. >> + * >> + * Permission is hereby granted, free of charge, to any person obtaining a >> + * copy of this software and associated documentation files (the "Software"), >> + * to deal in the Software without restriction, including without limitation >> + * the rights to use, copy, modify, merge, publish, distribute, sublicense, >> + * and/or sell copies of the Software, and to permit persons to whom the >> + * Software is furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice shall be included in >> + * all copies or substantial portions of the Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >> + * OTHER DEALINGS IN THE SOFTWARE. >> + * >> + * Authors: >> + * Chunming Zhou <david1.zhou at amd.com> >> + */ >> +#include <linux/file.h> >> +#include <linux/fs.h> >> +#include <linux/kernel.h> >> +#include <linux/poll.h> >> +#include <linux/seq_file.h> >> +#include <linux/export.h> >> +#include <linux/sched.h> >> +#include <linux/slab.h> >> +#include <linux/uaccess.h> >> +#include <linux/anon_inodes.h> >> +#include <linux/sync_file.h> >> +#include "amdgpu.h" >> +#include <drm/drmP.h> >> + >> +static inline struct sync_file *amdgpu_sync_file_lookup(struct amdgpu_fpriv *fpriv, u32 handle) >> +{ >> + struct sync_file *sync_file; >> + >> + spin_lock(&fpriv->sem_handles_lock); >> + >> + /* Check if we currently have a reference on the object */ >> + sync_file = idr_find(&fpriv->sem_handles, handle); >> + >> + spin_unlock(&fpriv->sem_handles_lock); >> + >> + return sync_file; >> +} >> + >> +static int amdgpu_sem_create(struct amdgpu_fpriv *fpriv, u32 *handle) >> +{ >> + struct sync_file *sync_file = sync_file_alloc(); >> + int ret; >> + >> + if (!sync_file) >> + return -ENOMEM; >> + >> + snprintf(sync_file->name, sizeof(sync_file->name), "sync_sem"); >> + >> + /* we get a file reference and we use that in the idr. */ >> + idr_preload(GFP_KERNEL); >> + spin_lock(&fpriv->sem_handles_lock); >> + >> + ret = idr_alloc(&fpriv->sem_handles, sync_file, 1, 0, GFP_NOWAIT); >> + >> + spin_unlock(&fpriv->sem_handles_lock); >> + idr_preload_end(); >> + >> + if (ret < 0) >> + return ret; >> + >> + *handle = ret; >> + return 0; >> +} >> + >> +void amdgpu_sem_destroy(struct amdgpu_fpriv *fpriv, u32 handle) >> +{ >> + struct sync_file *sync_file = amdgpu_sync_file_lookup(fpriv, handle); >> + if (!sync_file) >> + return; >> + >> + spin_lock(&fpriv->sem_handles_lock); >> + idr_remove(&fpriv->sem_handles, handle); >> + spin_unlock(&fpriv->sem_handles_lock); >> + >> + fput(sync_file->file); >> +} >> + >> + >> +int amdgpu_sem_lookup_and_signal(struct amdgpu_fpriv *fpriv, >> + uint32_t handle, >> + struct dma_fence *fence) >> +{ >> + struct sync_file *sync_file; >> + struct dma_fence *old_fence; >> + sync_file = amdgpu_sync_file_lookup(fpriv, handle); >> + if (!sync_file) >> + return -EINVAL; >> + >> + old_fence = sync_file_replace_fence(sync_file, fence); >> + dma_fence_put(old_fence); >> + return 0; >> +} >> + >> +static int amdgpu_sem_import(struct amdgpu_fpriv *fpriv, >> + int fd, u32 *handle) >> +{ >> + struct sync_file *sync_file = sync_file_fdget(fd); >> + int ret; >> + >> + if (!sync_file) >> + return -EINVAL; >> + >> + idr_preload(GFP_KERNEL); >> + spin_lock(&fpriv->sem_handles_lock); >> + >> + ret = idr_alloc(&fpriv->sem_handles, sync_file, 1, 0, GFP_NOWAIT); >> + >> + spin_unlock(&fpriv->sem_handles_lock); >> + idr_preload_end(); >> + >> + fput(sync_file->file); >> + if (ret < 0) >> + goto err_out; >> + >> + *handle = ret; >> + return 0; >> +err_out: >> + return ret; >> + >> +} >> + >> +static int amdgpu_sem_export(struct amdgpu_fpriv *fpriv, >> + u32 handle, int *fd) >> +{ >> + struct sync_file *sync_file; >> + int ret; >> + >> + sync_file = amdgpu_sync_file_lookup(fpriv, handle); >> + if (!sync_file) >> + return -EINVAL; >> + >> + ret = get_unused_fd_flags(O_CLOEXEC); >> + if (ret < 0) >> + goto err_put_file; >> + >> + fd_install(ret, sync_file->file); >> + >> + *fd = ret; >> + return 0; >> +err_put_file: >> + return ret; >> +} >> + >> +int amdgpu_sem_lookup_and_sync(struct amdgpu_device *adev, >> + struct amdgpu_fpriv *fpriv, >> + struct amdgpu_sync *sync, >> + uint32_t handle) >> +{ >> + int r; >> + struct sync_file *sync_file; >> + struct dma_fence *fence; >> + >> + sync_file = amdgpu_sync_file_lookup(fpriv, handle); >> + if (!sync_file) >> + return -EINVAL; >> + >> + fence = sync_file_replace_fence(sync_file, NULL); >> + r = amdgpu_sync_fence(adev, sync, fence); >> + dma_fence_put(fence); >> + >> + return r; >> +} >> + >> +int amdgpu_sem_ioctl(struct drm_device *dev, void *data, >> + struct drm_file *filp) >> +{ >> + union drm_amdgpu_sem *args = data; >> + struct amdgpu_fpriv *fpriv = filp->driver_priv; >> + int r = 0; >> + >> + switch (args->in.op) { >> + case AMDGPU_SEM_OP_CREATE_SEM: >> + r = amdgpu_sem_create(fpriv, &args->out.handle); >> + break; >> + case AMDGPU_SEM_OP_IMPORT_SEM: >> + r = amdgpu_sem_import(fpriv, args->in.handle, &args->out.handle); >> + break; >> + case AMDGPU_SEM_OP_EXPORT_SEM: >> + r = amdgpu_sem_export(fpriv, args->in.handle, &args->out.fd); >> + break; >> + case AMDGPU_SEM_OP_DESTROY_SEM: >> + amdgpu_sem_destroy(fpriv, args->in.handle); >> + break; >> + default: >> + r = -EINVAL; >> + break; >> + } >> + >> + return r; >> +} >> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h >> index 5797283..646b103 100644 >> --- a/include/uapi/drm/amdgpu_drm.h >> +++ b/include/uapi/drm/amdgpu_drm.h >> @@ -51,6 +51,7 @@ extern "C" { >> #define DRM_AMDGPU_GEM_OP 0x10 >> #define DRM_AMDGPU_GEM_USERPTR 0x11 >> #define DRM_AMDGPU_WAIT_FENCES 0x12 >> +#define DRM_AMDGPU_SEM 0x13 >> >> #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create) >> #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap) >> @@ -65,6 +66,7 @@ extern "C" { >> #define DRM_IOCTL_AMDGPU_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op) >> #define DRM_IOCTL_AMDGPU_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr) >> #define DRM_IOCTL_AMDGPU_WAIT_FENCES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences) >> +#define DRM_IOCTL_AMDGPU_SEM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_SEM, union drm_amdgpu_sem) >> >> #define AMDGPU_GEM_DOMAIN_CPU 0x1 >> #define AMDGPU_GEM_DOMAIN_GTT 0x2 >> @@ -335,6 +337,26 @@ union drm_amdgpu_wait_fences { >> struct drm_amdgpu_wait_fences_out out; >> }; >> >> +#define AMDGPU_SEM_OP_CREATE_SEM 0 >> +#define AMDGPU_SEM_OP_IMPORT_SEM 1 >> +#define AMDGPU_SEM_OP_EXPORT_SEM 2 >> +#define AMDGPU_SEM_OP_DESTROY_SEM 3 >> + >> +struct drm_amdgpu_sem_in { >> + __u32 op; >> + __u32 handle; >> +}; >> + >> +struct drm_amdgpu_sem_out { >> + __u32 fd; >> + __u32 handle; >> +}; >> + >> +union drm_amdgpu_sem { >> + struct drm_amdgpu_sem_in in; >> + struct drm_amdgpu_sem_out out; >> +}; >> + >> #define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO 0 >> #define AMDGPU_GEM_OP_SET_PLACEMENT 1 >> >> @@ -390,6 +412,8 @@ struct drm_amdgpu_gem_va { >> #define AMDGPU_CHUNK_ID_IB 0x01 >> #define AMDGPU_CHUNK_ID_FENCE 0x02 >> #define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03 >> +#define AMDGPU_CHUNK_ID_SEM_WAIT 0x04 >> +#define AMDGPU_CHUNK_ID_SEM_SIGNAL 0x05 >> >> struct drm_amdgpu_cs_chunk { >> __u32 chunk_id; >> @@ -454,6 +478,10 @@ struct drm_amdgpu_cs_chunk_fence { >> __u32 offset; >> }; >> >> +struct drm_amdgpu_cs_chunk_sem { >> + __u32 handle; >> +}; >> + >> struct drm_amdgpu_cs_chunk_data { >> union { >> struct drm_amdgpu_cs_chunk_ib ib_data; >> -- >> 2.7.4 >> >> _______________________________________________ >> dri-devel mailing list >> dri-devel at lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/dri-devel