Vulkan VkFence semantics require that the application be able to perform a CPU wait on work which may not yet have been submitted. This is perfectly safe because the CPU wait has a timeout which will get triggered eventually if no work is ever submitted. This behavior is advantageous for multi-threaded workloads because, so long as all of the threads agree on what fences to use up-front, you don't have the extra cross-thread synchronization cost of thread A telling thread B that it has submitted its dependent work and thread B is now free to wait. Within a single process, this can be implemented in the userspace driver by doing exactly the same kind of tracking the app would have to do using posix condition variables or similar. However, in order for this to work cross-process (as is required by VK_KHR_external_fence), we need to handle this in the kernel. This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which instructs the IOCTL to wait for the syncobj to have a non-null fence and then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can easily get the Vulkan behavior. Signed-off-by: Jason Ekstrand <jason@xxxxxxxxxxxxxx> --- drivers/gpu/drm/drm_syncobj.c | 319 ++++++++++++++++++++++++++++++++++++++---- include/uapi/drm/drm.h | 1 + 2 files changed, 293 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 510dfc2..ecfc43b 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -51,6 +51,7 @@ #include <linux/fs.h> #include <linux/anon_inodes.h> #include <linux/sync_file.h> +#include <linux/sched/signal.h> #include "drm_internal.h" #include <drm/drm_syncobj.h> @@ -104,6 +105,27 @@ static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj, list_add_tail(&cb->node, &syncobj->cb_list); } +static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, + struct dma_fence **fence, + struct drm_syncobj_cb *cb, + drm_syncobj_func_t func) +{ + int ret; + + spin_lock(&syncobj->lock); + if (syncobj->fence) { + *fence = dma_fence_get(syncobj->fence); + ret = 1; + } else { + *fence = NULL; + drm_syncobj_add_callback_locked(syncobj, cb, func); + ret = 0; + } + spin_unlock(&syncobj->lock); + + return ret; +} + /** * drm_syncobj_add_callback - adds a callback to syncobj::cb_list * @syncobj: Sync object to which to add the callback @@ -526,6 +548,255 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data, &args->handle); } +static int drm_syncobj_signaled(struct drm_syncobj *syncobj, + uint32_t wait_flags) +{ + struct dma_fence *fence; + int ret; + + fence = drm_syncobj_fence_get(syncobj); + if (!fence) { + if (wait_flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) + return 0; + else + return -EINVAL; + } + + ret = dma_fence_is_signaled(fence) ? 1 : 0; + + dma_fence_put(fence); + + return ret; +} + +struct syncobj_get_wait_cb { + struct drm_syncobj_cb base; + struct task_struct *task; + struct dma_fence **fence; +}; + +static void syncobj_get_wait_cb_func(struct drm_syncobj *syncobj, + struct drm_syncobj_cb *wait_cb) +{ + struct syncobj_get_wait_cb *wait = + container_of(wait_cb, struct syncobj_get_wait_cb, base); + + /* This happens inside the syncobj lock */ + *wait->fence = dma_fence_get(syncobj->fence); + wake_up_process(wait->task); +} + +static signed long +drm_syncobj_get_fence_wait_timeout(struct drm_syncobj *syncobj, + struct dma_fence **fence, + signed long timeout) +{ + struct syncobj_get_wait_cb cb; + signed long ret; + + if (timeout == 0) { + *fence = drm_syncobj_fence_get(syncobj); + if (*fence) + return 1; + return 0; + } + + cb.task = current; + cb.fence = fence; + + if (drm_syncobj_fence_get_or_add_callback(syncobj, fence, &cb.base, + syncobj_get_wait_cb_func)) { + /* We got the fence immediately. No callback was installed. */ + return timeout; + } + + ret = timeout; + while (ret > 0) { + set_current_state(TASK_INTERRUPTIBLE); + + if (*fence) + break; + + ret = schedule_timeout(ret); + + if (ret > 0 && signal_pending(current)) + ret = -ERESTARTSYS; + } + + __set_current_state(TASK_RUNNING); + + drm_syncobj_remove_callback(syncobj, &cb.base); + + /* It's possible that we got the fence after timing out or getting + * interrupted but before removing the callback. + */ + if (ret <= 0 && *fence) + ret = 1; + + return ret; +} + +static signed long drm_syncobj_wait_timeout(struct drm_syncobj *syncobj, + uint32_t flags, + signed long timeout) +{ + struct dma_fence *fence; + signed long ret; + + if (timeout == 0) + return drm_syncobj_signaled(syncobj, flags); + + if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { + ret = drm_syncobj_get_fence_wait_timeout(syncobj, &fence, + timeout); + if (ret <= 0) + return ret; + timeout = ret; + } else { + fence = drm_syncobj_fence_get(syncobj); + } + + if (!fence) + return -EINVAL; + + ret = dma_fence_wait_timeout(fence, true, timeout); + + /* Various dma_fence wait callbacks will return ENOENT to indicate + * that the fence has already been signaled. We sanitize this as + * returning with all of the time left. + */ + if (ret == -ENOENT) + ret = timeout; + + dma_fence_put(fence); + + return ret; +} + +struct syncobj_wait_any_entry { + struct task_struct *task; + struct dma_fence *fence; + struct dma_fence_cb fence_cb; + struct drm_syncobj_cb syncobj_cb; +}; + +static void syncobj_wait_any_fence_func(struct dma_fence *fence, + struct dma_fence_cb *cb) +{ + struct syncobj_wait_any_entry *wait = + container_of(cb, struct syncobj_wait_any_entry, fence_cb); + + wake_up_process(wait->task); +} + +static void syncobj_wait_any_syncobj_func(struct drm_syncobj *syncobj, + struct drm_syncobj_cb *cb) +{ + struct syncobj_wait_any_entry *wait = + container_of(cb, struct syncobj_wait_any_entry, syncobj_cb); + + /* This happens inside the syncobj lock */ + wait->fence = dma_fence_get(syncobj->fence); + wake_up_process(wait->task); +} + +static signed long drm_syncobj_wait_any_timeout(struct drm_syncobj **syncobjs, + uint32_t count, + uint32_t flags, + signed long timeout, + uint32_t *idx) +{ + struct syncobj_wait_any_entry *entries; + struct dma_fence *fence; + signed long ret; + uint32_t i; + + if (timeout == 0) { + for (i = 0; i < count; ++i) { + ret = drm_syncobj_signaled(syncobjs[i], flags); + if (ret < 0) + return ret; + if (ret == 0) + continue; + if (idx) + *idx = i; + return 1; + } + + return 0; + } + + entries = kcalloc(count, sizeof(*entries), GFP_KERNEL); + if (!entries) + return -ENOMEM; + + for (i = 0; i < count; ++i) { + entries[i].task = current; + + if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { + drm_syncobj_fence_get_or_add_callback(syncobjs[i], + &entries[i].fence, + &entries[i].syncobj_cb, + syncobj_wait_any_syncobj_func); + } else { + entries[i].fence = drm_syncobj_fence_get(syncobjs[i]); + if (!entries[i].fence) { + ret = -EINVAL; + goto err_cleanup_entries; + } + } + } + + ret = timeout; + while (ret > 0) { + for (i = 0; i < count; ++i) { + fence = entries[i].fence; + if (!fence) + continue; + + if (dma_fence_is_signaled(fence)) { + if (idx) + *idx = i; + goto done_waiting; + } + + if (!entries[i].fence_cb.func && + dma_fence_add_callback(fence, + &entries[i].fence_cb, + syncobj_wait_any_fence_func)) { + /* The fence is already signaled */ + if (idx) + *idx = i; + goto done_waiting; + } + } + + set_current_state(TASK_INTERRUPTIBLE); + + ret = schedule_timeout(ret); + + if (ret > 0 && signal_pending(current)) + ret = -ERESTARTSYS; + } + +done_waiting: + __set_current_state(TASK_RUNNING); + +err_cleanup_entries: + for (i = 0; i < count; ++i) { + if (entries[i].syncobj_cb.func) + drm_syncobj_remove_callback(syncobjs[i], + &entries[i].syncobj_cb); + if (entries[i].fence_cb.func) + dma_fence_remove_callback(entries[i].fence, + &entries[i].fence_cb); + dma_fence_put(entries[i].fence); + } + kfree(entries); + + return ret; +} + /** * drm_timeout_abs_to_jiffies - calculate jiffies timeout from absolute value * @@ -561,7 +832,7 @@ static signed long drm_timeout_abs_to_jiffies(int64_t timeout_nsec) static int drm_syncobj_wait_fences(struct drm_device *dev, struct drm_file *file_private, struct drm_syncobj_wait *wait, - struct dma_fence **fences) + struct drm_syncobj **syncobjs) { signed long timeout = drm_timeout_abs_to_jiffies(wait->timeout_nsec); signed long ret = 0; @@ -570,17 +841,9 @@ static int drm_syncobj_wait_fences(struct drm_device *dev, if (wait->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) { uint32_t i; for (i = 0; i < wait->count_handles; i++) { - ret = dma_fence_wait_timeout(fences[i], true, timeout); - - /* Various dma_fence wait callbacks will return - * ENOENT to indicate that the fence has already - * been signaled. We need to sanitize this to 0 so - * we don't return early and the client doesn't see - * an unexpected error. - */ - if (ret == -ENOENT) - ret = 0; - + ret = drm_syncobj_wait_timeout(syncobjs[i], + wait->flags, + timeout); if (ret < 0) return ret; if (ret == 0) @@ -589,10 +852,10 @@ static int drm_syncobj_wait_fences(struct drm_device *dev, } first = 0; } else { - ret = dma_fence_wait_any_timeout(fences, - wait->count_handles, - true, timeout, - &first); + ret = drm_syncobj_wait_any_timeout(syncobjs, + wait->count_handles, + wait->flags, + timeout, &first); } if (ret < 0) @@ -610,14 +873,15 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, { struct drm_syncobj_wait *args = data; uint32_t *handles; - struct dma_fence **fences; + struct drm_syncobj **syncobjs; int ret = 0; uint32_t i; if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) return -ENODEV; - if (args->flags != 0 && args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) + if (args->flags & ~(DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)) return -EINVAL; if (args->count_handles == 0) @@ -636,27 +900,28 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, goto err_free_handles; } - fences = kcalloc(args->count_handles, - sizeof(struct dma_fence *), GFP_KERNEL); - if (!fences) { + syncobjs = kcalloc(args->count_handles, + sizeof(struct drm_syncobj *), GFP_KERNEL); + if (!syncobjs) { ret = -ENOMEM; goto err_free_handles; } for (i = 0; i < args->count_handles; i++) { - ret = drm_syncobj_find_fence(file_private, handles[i], - &fences[i]); - if (ret) + syncobjs[i] = drm_syncobj_find(file_private, handles[i]); + if (!syncobjs[i]) { + ret = -ENOENT; goto err_free_fence_array; + } } ret = drm_syncobj_wait_fences(dev, file_private, - args, fences); + args, syncobjs); err_free_fence_array: for (i = 0; i < args->count_handles; i++) - dma_fence_put(fences[i]); - kfree(fences); + drm_syncobj_put(syncobjs[i]); + kfree(syncobjs); err_free_handles: kfree(handles); diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 4b301b4..f8ec8fe 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -719,6 +719,7 @@ struct drm_syncobj_handle { }; #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1) struct drm_syncobj_wait { __u64 handles; /* absolute timeout */ -- 2.5.0.400.gff86faf _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel