Re: [PATCH] drm/syncobj: Allow wait for submit and signal behavior (v2)

Christian König <deathsimple@xxxxxxxxxxx> · Wed, 9 Aug 2017 20:25:26 +0200

Am 09.08.2017 um 19:57 schrieb Chris Wilson:
Quoting Jason Ekstrand (2017-08-09 18:00:54)
Vulkan VkFence semantics require that the application be able to perform
a CPU wait on work which may not yet have been submitted.  This is
perfectly safe because the CPU wait has a timeout which will get
triggered eventually if no work is ever submitted.  This behavior is
advantageous for multi-threaded workloads because, so long as all of the
threads agree on what fences to use up-front, you don't have the extra
cross-thread synchronization cost of thread A telling thread B that it
has submitted its dependent work and thread B is now free to wait.

Within a single process, this can be implemented in the userspace driver
by doing exactly the same kind of tracking the app would have to do
using posix condition variables or similar.  However, in order for this
to work cross-process (as is required by VK_KHR_external_fence), we need
to handle this in the kernel.

This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which
instructs the IOCTL to wait for the syncobj to have a non-null fence and
then wait on the fence.  Combined with DRM_IOCTL_SYNCOBJ_RESET, you can
easily get the Vulkan behavior.

v2:
  - Fix a bug in the invalid syncobj error path
  - Unify the wait-all and wait-any cases

Signed-off-by: Jason Ekstrand <jason@xxxxxxxxxxxxxx>
Cc: Dave Airlie <airlied@xxxxxxxxxx>
---

I realized today (this is what happens when you sleep) that it takes almost
no work to make the wait-any path also handle wait-all.  By unifying the
two, I deleted over a hundred lines of code from the implementation.

  drivers/gpu/drm/drm_syncobj.c | 243 ++++++++++++++++++++++++++++++++++--------
  include/uapi/drm/drm.h        |   1 +
  2 files changed, 199 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 510dfc2..5e7f654 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -51,6 +51,7 @@
  #include <linux/fs.h>
  #include <linux/anon_inodes.h>
  #include <linux/sync_file.h>
+#include <linux/sched/signal.h>
  
  #include "drm_internal.h"
  #include <drm/drm_syncobj.h>
@@ -104,6 +105,27 @@ static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj,
This is a bit of the puzzle that is missing.

         list_add_tail(&cb->node, &syncobj->cb_list);
  }
  
+static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj,
+                                                struct dma_fence **fence,
+                                                struct drm_syncobj_cb *cb,
+                                                drm_syncobj_func_t func)
+{
+       int ret;
+
+       spin_lock(&syncobj->lock);
+       if (syncobj->fence) {
+               *fence = dma_fence_get(syncobj->fence);
+               ret = 1;
+       } else {
+               *fence = NULL;
+               drm_syncobj_add_callback_locked(syncobj, cb, func);
+               ret = 0;
+       }
+       spin_unlock(&syncobj->lock);
+
+       return ret;
+}
+
  /**
   * drm_syncobj_add_callback - adds a callback to syncobj::cb_list
   * @syncobj: Sync object to which to add the callback
@@ -526,6 +548,159 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
                                         &args->handle);
  }
  
+static int drm_syncobj_signaled(struct drm_syncobj *syncobj,
+                               uint32_t wait_flags)
+{
+       struct dma_fence *fence;
+       int ret;
+
+       fence = drm_syncobj_fence_get(syncobj);
+       if (!fence) {
+               if (wait_flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
+                       return 0;
+               else
+                       return -EINVAL;
+       }
+
+       ret = dma_fence_is_signaled(fence) ? 1 : 0;
+
+       dma_fence_put(fence);
+
+       return ret;
+}
+
+struct syncobj_wait_entry {
+       struct task_struct *task;
+       struct dma_fence *fence;
+       struct dma_fence_cb fence_cb;
+       struct drm_syncobj_cb syncobj_cb;
+};
+
+static void syncobj_wait_fence_func(struct dma_fence *fence,
+                                   struct dma_fence_cb *cb)
+{
+       struct syncobj_wait_entry *wait =
+               container_of(cb, struct syncobj_wait_entry, fence_cb);
+
+       wake_up_process(wait->task);
+}
+
+static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
+                                     struct drm_syncobj_cb *cb)
+{
+       struct syncobj_wait_entry *wait =
+               container_of(cb, struct syncobj_wait_entry, syncobj_cb);
+
+       /* This happens inside the syncobj lock */
+       wait->fence = dma_fence_get(syncobj->fence);
+       wake_up_process(wait->task);
+}
+
+static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
+                                                 uint32_t count,
+                                                 uint32_t flags,
+                                                 signed long timeout,
+                                                 uint32_t *idx)
+{
+       struct syncobj_wait_entry *entries;
+       struct dma_fence *fence;
+       signed long ret;
+       uint32_t signaled_count, i;
+
+       if (timeout == 0) {
+               signaled_count = 0;
+               for (i = 0; i < count; ++i) {
+                       ret = drm_syncobj_signaled(syncobjs[i], flags);
+                       if (ret < 0)
+                               return ret;
+                       if (ret == 0)
+                               continue;
+                       if (signaled_count == 0 && idx)
+                               *idx = i;
+                       signaled_count++;
+               }
+
+               if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
+                       return signaled_count == count ? 1 : 0;
+               else
+                       return signaled_count > 0 ? 1 : 0;
+       }
+
+       entries = kcalloc(count, sizeof(*entries), GFP_KERNEL);
+       if (!entries)
+               return -ENOMEM;
+
+       for (i = 0; i < count; ++i) {
+               entries[i].task = current;
+
+               if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
+                       drm_syncobj_fence_get_or_add_callback(syncobjs[i],
+                                                             &entries[i].fence,
+                                                             &entries[i].syncobj_cb,
+                                                             syncobj_wait_syncobj_func);
+               } else {
+                       entries[i].fence = drm_syncobj_fence_get(syncobjs[i]);
+                       if (!entries[i].fence) {
+                               ret = -EINVAL;
+                               goto err_cleanup_entries;
+                       }
So we are taking a snapshot here. It looks like this could have been
done using a dma_fence_array + dma_fence_proxy for capturing the future
fence.

+       ret = timeout;
+       while (ret > 0) {
+               signaled_count = 0;
+               for (i = 0; i < count; ++i) {
+                       fence = entries[i].fence;
+                       if (!fence)
+                               continue;
+
+                       if (dma_fence_is_signaled(fence) ||
+                           (!entries[i].fence_cb.func &&
+                            dma_fence_add_callback(fence,
+                                                   &entries[i].fence_cb,
+                                                   syncobj_wait_fence_func))) {
+                               /* The fence has been signaled */
+                               if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) {
+                                       signaled_count++;
+                               } else {
+                                       if (idx)
+                                               *idx = i;
+                                       goto done_waiting;
+                               }
+                       }
+               }
+
+               if ((flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) &&
+                   signaled_count == count)
+                       goto done_waiting;
+
+               set_current_state(TASK_INTERRUPTIBLE);
This is the first step in the wait-loop, you set TASK_INTERRUPTIBLE
before doing any checks. So that if any state changes whilst you are in
the middle of those checks, the schedule_timeout is a nop and you can
check again.

Yeah, completely agree.

I would rather drop the approach with the custom wait and try to use 
wait_event_interruptible here.

As far as I can see that should make the whole patch much cleaner in 
general.

Regards,
Christian.

-Chris
_______________________________________________
dri-devel mailing list
dri-devel@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/dri-devel


_______________________________________________
dri-devel mailing list
dri-devel@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/dri-devel