Quoting Jason Ekstrand (2017-08-09 18:00:54) > Vulkan VkFence semantics require that the application be able to perform > a CPU wait on work which may not yet have been submitted. This is > perfectly safe because the CPU wait has a timeout which will get > triggered eventually if no work is ever submitted. This behavior is > advantageous for multi-threaded workloads because, so long as all of the > threads agree on what fences to use up-front, you don't have the extra > cross-thread synchronization cost of thread A telling thread B that it > has submitted its dependent work and thread B is now free to wait. > > Within a single process, this can be implemented in the userspace driver > by doing exactly the same kind of tracking the app would have to do > using posix condition variables or similar. However, in order for this > to work cross-process (as is required by VK_KHR_external_fence), we need > to handle this in the kernel. > > This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which > instructs the IOCTL to wait for the syncobj to have a non-null fence and > then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can > easily get the Vulkan behavior. > > v2: > - Fix a bug in the invalid syncobj error path > - Unify the wait-all and wait-any cases > > Signed-off-by: Jason Ekstrand <jason@xxxxxxxxxxxxxx> > Cc: Dave Airlie <airlied@xxxxxxxxxx> > --- > > I realized today (this is what happens when you sleep) that it takes almost > no work to make the wait-any path also handle wait-all. By unifying the > two, I deleted over a hundred lines of code from the implementation. > > drivers/gpu/drm/drm_syncobj.c | 243 ++++++++++++++++++++++++++++++++++-------- > include/uapi/drm/drm.h | 1 + > 2 files changed, 199 insertions(+), 45 deletions(-) > > diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c > index 510dfc2..5e7f654 100644 > --- a/drivers/gpu/drm/drm_syncobj.c > +++ b/drivers/gpu/drm/drm_syncobj.c > @@ -51,6 +51,7 @@ > #include <linux/fs.h> > #include <linux/anon_inodes.h> > #include <linux/sync_file.h> > +#include <linux/sched/signal.h> > > #include "drm_internal.h" > #include <drm/drm_syncobj.h> > @@ -104,6 +105,27 @@ static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj, This is a bit of the puzzle that is missing. > list_add_tail(&cb->node, &syncobj->cb_list); > } > > +static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, > + struct dma_fence **fence, > + struct drm_syncobj_cb *cb, > + drm_syncobj_func_t func) > +{ > + int ret; > + > + spin_lock(&syncobj->lock); > + if (syncobj->fence) { > + *fence = dma_fence_get(syncobj->fence); > + ret = 1; > + } else { > + *fence = NULL; > + drm_syncobj_add_callback_locked(syncobj, cb, func); > + ret = 0; > + } > + spin_unlock(&syncobj->lock); > + > + return ret; > +} > + > /** > * drm_syncobj_add_callback - adds a callback to syncobj::cb_list > * @syncobj: Sync object to which to add the callback > @@ -526,6 +548,159 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data, > &args->handle); > } > > +static int drm_syncobj_signaled(struct drm_syncobj *syncobj, > + uint32_t wait_flags) > +{ > + struct dma_fence *fence; > + int ret; > + > + fence = drm_syncobj_fence_get(syncobj); > + if (!fence) { > + if (wait_flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) > + return 0; > + else > + return -EINVAL; > + } > + > + ret = dma_fence_is_signaled(fence) ? 1 : 0; > + > + dma_fence_put(fence); > + > + return ret; > +} > + > +struct syncobj_wait_entry { > + struct task_struct *task; > + struct dma_fence *fence; > + struct dma_fence_cb fence_cb; > + struct drm_syncobj_cb syncobj_cb; > +}; > + > +static void syncobj_wait_fence_func(struct dma_fence *fence, > + struct dma_fence_cb *cb) > +{ > + struct syncobj_wait_entry *wait = > + container_of(cb, struct syncobj_wait_entry, fence_cb); > + > + wake_up_process(wait->task); > +} > + > +static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj, > + struct drm_syncobj_cb *cb) > +{ > + struct syncobj_wait_entry *wait = > + container_of(cb, struct syncobj_wait_entry, syncobj_cb); > + > + /* This happens inside the syncobj lock */ > + wait->fence = dma_fence_get(syncobj->fence); > + wake_up_process(wait->task); > +} > + > +static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, > + uint32_t count, > + uint32_t flags, > + signed long timeout, > + uint32_t *idx) > +{ > + struct syncobj_wait_entry *entries; > + struct dma_fence *fence; > + signed long ret; > + uint32_t signaled_count, i; > + > + if (timeout == 0) { > + signaled_count = 0; > + for (i = 0; i < count; ++i) { > + ret = drm_syncobj_signaled(syncobjs[i], flags); > + if (ret < 0) > + return ret; > + if (ret == 0) > + continue; > + if (signaled_count == 0 && idx) > + *idx = i; > + signaled_count++; > + } > + > + if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) > + return signaled_count == count ? 1 : 0; > + else > + return signaled_count > 0 ? 1 : 0; > + } > + > + entries = kcalloc(count, sizeof(*entries), GFP_KERNEL); > + if (!entries) > + return -ENOMEM; > + > + for (i = 0; i < count; ++i) { > + entries[i].task = current; > + > + if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { > + drm_syncobj_fence_get_or_add_callback(syncobjs[i], > + &entries[i].fence, > + &entries[i].syncobj_cb, > + syncobj_wait_syncobj_func); > + } else { > + entries[i].fence = drm_syncobj_fence_get(syncobjs[i]); > + if (!entries[i].fence) { > + ret = -EINVAL; > + goto err_cleanup_entries; > + } So we are taking a snapshot here. It looks like this could have been done using a dma_fence_array + dma_fence_proxy for capturing the future fence. > + ret = timeout; > + while (ret > 0) { > + signaled_count = 0; > + for (i = 0; i < count; ++i) { > + fence = entries[i].fence; > + if (!fence) > + continue; > + > + if (dma_fence_is_signaled(fence) || > + (!entries[i].fence_cb.func && > + dma_fence_add_callback(fence, > + &entries[i].fence_cb, > + syncobj_wait_fence_func))) { > + /* The fence has been signaled */ > + if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) { > + signaled_count++; > + } else { > + if (idx) > + *idx = i; > + goto done_waiting; > + } > + } > + } > + > + if ((flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) && > + signaled_count == count) > + goto done_waiting; > + > + set_current_state(TASK_INTERRUPTIBLE); This is the first step in the wait-loop, you set TASK_INTERRUPTIBLE before doing any checks. So that if any state changes whilst you are in the middle of those checks, the schedule_timeout is a nop and you can check again. -Chris _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel