Re: [PATCH v5 06/10] drm/i915: add syncobj timeline support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Thanks a lot for your comments.

On 27/06/2019 12:15, Chris Wilson wrote:
Quoting Lionel Landwerlin (2019-06-27 09:00:41)
Introduces a new parameters to execbuf so that we can specify syncobj
handles as well as timeline points.

v2: Reuse i915_user_extension_fn

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx>
---
  .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 280 ++++++++++++++----
  drivers/gpu/drm/i915/i915_drv.c               |   2 +
  include/uapi/drm/i915_drm.h                   |  37 +++
  3 files changed, 263 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 1970dd8cbac3..476fade6fcab 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -213,6 +213,13 @@ enum {
   * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
   */
+struct i915_drm_dma_fences {
i915_execbuffer_fences or eb_fences as it is private to the impl.


Sure.



+       struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
+       struct dma_fence *dma_fence;
+       u64 value;
+       struct dma_fence_chain *chain_fence;
+};
+
  struct i915_execbuffer {
         struct drm_i915_private *i915; /** i915 backpointer */
         struct drm_file *file; /** per-file lookup tables and limits */
@@ -275,6 +282,7 @@ struct i915_execbuffer {
struct {
                 u64 flags; /** Available extensions parameters */
+               struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
Another 40b on stack. Getting closer to those warning bells being set off.

         } extensions;
  };
@@ -2224,67 +2232,193 @@ eb_select_engine(struct i915_execbuffer *eb,
  }
static void
-__free_fence_array(struct drm_syncobj **fences, unsigned int n)
+__free_fence_array(struct i915_drm_dma_fences *fences, unsigned int n)
  {
-       while (n--)
-               drm_syncobj_put(ptr_mask_bits(fences[n], 2));
+       while (n--) {
+               drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
+               dma_fence_put(fences[n].dma_fence);
+               kfree(fences[n].chain_fence);
+       }
         kvfree(fences);
  }
-static struct drm_syncobj **
-get_fence_array(struct drm_i915_gem_execbuffer2 *args,
-               struct drm_file *file)
+static struct i915_drm_dma_fences *
+get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
  {
-       const unsigned long nfences = args->num_cliprects;
+       struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences =
+               &eb->extensions.timeline_fences;
+       struct drm_i915_gem_exec_fence __user *user_fences;
+       struct i915_drm_dma_fences *fences;
+       u64 __user *user_values;
+       unsigned long n;
+       int err;
+
+       *out_n_fences = timeline_fences->handle_count;
I'd encourage using a const num_fences = timeline_fences->handle_count
for readability within the function and then assigning
	*out_n_fences = num_fences;
right before the successful return fences;


Done.



+       /* Check multiplication overflow for access_ok() and kvmalloc_array() */
+       BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
+       if (*out_n_fences > min_t(unsigned long,
+                                 ULONG_MAX / sizeof(*user_fences),
+                                 SIZE_MAX / sizeof(*fences)))
+               return ERR_PTR(-EINVAL);
+
+       user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
+       if (!access_ok(user_fences, *out_n_fences * sizeof(*user_fences)))
+               return ERR_PTR(-EFAULT);
+
+       user_values = u64_to_user_ptr(timeline_fences->values_ptr);
+       if (!access_ok(user_values, *out_n_fences * sizeof(*user_values)))
+               return ERR_PTR(-EFAULT);
+       fences = kvmalloc_array(*out_n_fences, sizeof(*fences),
+                               __GFP_NOWARN | GFP_KERNEL);
+       if (!fences)
+               return ERR_PTR(-ENOMEM);
+
+       BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
+                    ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
+
+       for (n = 0; n < *out_n_fences; n++) {
+               struct drm_i915_gem_exec_fence user_fence;
+               struct drm_syncobj *syncobj;
+               struct dma_fence *fence = NULL;
+               u64 point;
+
+               if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) {
+                       err = -EFAULT;
+                       goto err;
+               }
+
+               if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
+                       err = -EINVAL;
+                       goto err;
+               }
+
+               if (__get_user(point, user_values++)) {
+                       err = -EFAULT;
+                       goto err;
+               }
+
+               syncobj = drm_syncobj_find(eb->file, user_fence.handle);
+               if (!syncobj) {
+                       DRM_DEBUG("Invalid syncobj handle provided\n");
+                       err = -EINVAL;
+                       goto err;
+               }
+
+               if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
+                       fence = drm_syncobj_fence_get(syncobj);
+                       if (!fence) {
+                               DRM_DEBUG("Syncobj handle has no fence\n");
+                               drm_syncobj_put(syncobj);
+                               err = -EINVAL;
+                               goto err;
+                       }
+
+                       err = dma_fence_chain_find_seqno(&fence, point);
Is an imported syncobj always a fence-chain?

drm_syncobj_import_sync_file_fence() says no.


drm_syncobj_import_sync_file_fence() would turn a timeline semaphore into a binary semaphore by breaking the chain.

drm_syncobj_transfer_to_timeline() is what you should use if you wish to import a fence into the timeline.



Similarly, we put ordinary fences into the syncobj ourselves if
point==0, or the user uses the old path.

I think you need something like

if (dma_fence_is_chain() || point) {
	err = dma_fence_chain_find_seqno(&fence, point);
	if (err) {
	}
}


dma_fence_chain_find_seqno() already deals with point == 0.
If the fence is not a dma_fence_chain then the application screwed up somewhere.



+                       if (err) {
+                               DRM_DEBUG("Syncobj handle missing requested point\n");
+                               goto err;
+                       }
+               }
+
+               /*
+                * For timeline syncobjs we need to create a chain.
"need to preallocate chains for later signaling


Sure.



+                */
+               if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
+                       fences[n].chain_fence =
+                               kmalloc(sizeof(*fences[n].chain_fence),
+                                       GFP_KERNEL);
+                       if (!fences[n].chain_fence) {
+                               dma_fence_put(fence);
+                               drm_syncobj_put(syncobj);
+                               err = -ENOMEM;
+                               DRM_DEBUG("Unable to alloc chain_fence\n");
+                               goto err;
+                       }
+               } else {
+                       fences[n].chain_fence = NULL;
+               }
+
+               fences[n].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
+               fences[n].dma_fence = fence;
+               fences[n].value = point;
+       }
+
+       return fences;
+
+err:
+       __free_fence_array(fences, n);
+       return ERR_PTR(err);
+}
+
+static struct i915_drm_dma_fences *
+get_legacy_fence_array(struct i915_execbuffer *eb,
+                      int *out_n_fences)
+{
+       struct drm_i915_gem_execbuffer2 *args = eb->args;
         struct drm_i915_gem_exec_fence __user *user;
-       struct drm_syncobj **fences;
+       struct i915_drm_dma_fences *fences;
         unsigned long n;
         int err;
- if (!(args->flags & I915_EXEC_FENCE_ARRAY))
-               return NULL;
+       *out_n_fences = args->num_cliprects;
Same suggestion as earlier for a local num_fences.


Done.



+static struct i915_drm_dma_fences *
+get_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
+{
+       if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
+               return get_legacy_fence_array(eb, out_n_fences);
Blank line.


Oops.



+       if (eb->extensions.flags & BIT(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
+               return get_timeline_fence_array(eb, out_n_fences);
+
+       *out_n_fences = 0;
+
No blank line, my argument is that both are part of the return value for
the function.


Okay.


+       return NULL;
+}
+
  static void
-put_fence_array(struct drm_i915_gem_execbuffer2 *args,
-               struct drm_syncobj **fences)
+put_fence_array(struct i915_drm_dma_fences *fences, int nfences)
  {
         if (fences)
-               __free_fence_array(fences, args->num_cliprects);
+               __free_fence_array(fences, nfences);
  }
static int
  await_fence_array(struct i915_execbuffer *eb,
-                 struct drm_syncobj **fences)
+                 struct i915_drm_dma_fences *fences,
+                 int nfences)
  {
-       const unsigned int nfences = eb->args->num_cliprects;
         unsigned int n;
         int err;
for (n = 0; n < nfences; n++) {
                 struct drm_syncobj *syncobj;
-               struct dma_fence *fence;
                 unsigned int flags;
- syncobj = ptr_unpack_bits(fences[n], &flags, 2);
+               syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
                 if (!(flags & I915_EXEC_FENCE_WAIT))
                         continue;
- fence = drm_syncobj_fence_get(syncobj);
-               if (!fence)
-                       return -EINVAL;
-
-               err = i915_request_await_dma_fence(eb->request, fence);
-               dma_fence_put(fence);
+               err = i915_request_await_dma_fence(eb->request,
+                                                  fences[n].dma_fence);
                 if (err < 0)
                         return err;
         }
@@ -2334,9 +2475,9 @@ await_fence_array(struct i915_execbuffer *eb,
static void
  signal_fence_array(struct i915_execbuffer *eb,
-                  struct drm_syncobj **fences)
+                  struct i915_drm_dma_fences *fences,
+                  int nfences)
  {
-       const unsigned int nfences = eb->args->num_cliprects;
         struct dma_fence * const fence = &eb->request->fence;
         unsigned int n;
@@ -2344,15 +2485,43 @@ signal_fence_array(struct i915_execbuffer *eb,
                 struct drm_syncobj *syncobj;
                 unsigned int flags;
- syncobj = ptr_unpack_bits(fences[n], &flags, 2);
+               syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
                 if (!(flags & I915_EXEC_FENCE_SIGNAL))
                         continue;
- drm_syncobj_replace_fence(syncobj, fence);
+               if (fences[n].chain_fence) {
+                       drm_syncobj_add_point(syncobj, fences[n].chain_fence,
+                                             fence, fences[n].value);
+                       /*
+                        * The chain's ownership is transfered to the
+                        * timeline.
+                        */
+                       fences[n].chain_fence = NULL;
+               } else {
+                       drm_syncobj_replace_fence(syncobj, fence);
+               }
         }
  }
+static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
+{
+       struct i915_execbuffer *eb = data;
+
+       /* Timeline fences are incompatible with the fence array flag. */
+       if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
+               return -EINVAL;
Also you only allow one timeline_fence extension struct.


Well spotted, thanks.



+
+       if (copy_from_user(&eb->extensions.timeline_fences, ext,
+                          sizeof(eb->extensions.timeline_fences)))
+               return -EFAULT;
+
+       eb->extensions.flags |= BIT(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES);
+
+       return 0;
+}
+
  static const i915_user_extension_fn execbuf_extensions[] = {
+        [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
  };
static int
@@ -2372,14 +2541,15 @@ static int
  i915_gem_do_execbuffer(struct drm_device *dev,
                        struct drm_file *file,
                        struct drm_i915_gem_execbuffer2 *args,
-                      struct drm_i915_gem_exec_object2 *exec,
-                      struct drm_syncobj **fences)
+                      struct drm_i915_gem_exec_object2 *exec)
  {
         struct i915_execbuffer eb;
         struct dma_fence *in_fence = NULL;
         struct dma_fence *exec_fence = NULL;
         struct sync_file *out_fence = NULL;
+       struct i915_drm_dma_fences *fences = NULL;
         int out_fence_fd = -1;
+       int nfences = 0;
         int err;
BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
@@ -2421,10 +2591,16 @@ i915_gem_do_execbuffer(struct drm_device *dev,
                         return err;
         }
+ fences = get_fence_array(&eb, &nfences);
+       if (IS_ERR(fences))
+               return PTR_ERR(fences);
+
         if (args->flags & I915_EXEC_FENCE_IN) {
                 in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
-               if (!in_fence)
-                       return -EINVAL;
+               if (!in_fence) {
+                       err = -EINVAL;
+                       goto err_fences;
+               }
         }
if (args->flags & I915_EXEC_FENCE_SUBMIT) {
@@ -2582,7 +2758,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
         }
if (fences) {
-               err = await_fence_array(&eb, fences);
+               err = await_fence_array(&eb, fences, nfences);
                 if (err)
                         goto err_request;
         }
@@ -2611,7 +2787,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
         i915_request_add(eb.request);
if (fences)
-               signal_fence_array(&eb, fences);
+               signal_fence_array(&eb, fences, nfences);
if (out_fence) {
                 if (err == 0) {
@@ -2646,6 +2822,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
         dma_fence_put(exec_fence);
  err_in_fence:
         dma_fence_put(in_fence);
+err_fences:
+       put_fence_array(fences, nfences);
         return err;
  }
@@ -2739,7 +2917,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
                         exec2_list[i].flags = 0;
         }
- err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL);
+       err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list);
         if (exec2.flags & __EXEC_HAS_RELOC) {
                 struct drm_i915_gem_exec_object __user *user_exec_list =
                         u64_to_user_ptr(args->buffers_ptr);
@@ -2770,7 +2948,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
  {
         struct drm_i915_gem_execbuffer2 *args = data;
         struct drm_i915_gem_exec_object2 *exec2_list;
-       struct drm_syncobj **fences = NULL;
         const size_t count = args->buffer_count;
         int err;
@@ -2798,15 +2975,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
                 return -EFAULT;
         }
- if (args->flags & I915_EXEC_FENCE_ARRAY) {
-               fences = get_fence_array(args, file);
-               if (IS_ERR(fences)) {
-                       kvfree(exec2_list);
-                       return PTR_ERR(fences);
-               }
-       }
-
-       err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences);
+       err = i915_gem_do_execbuffer(dev, file, args, exec2_list);
/*
          * Now that we have begun execution of the batchbuffer, we ignore
@@ -2846,7 +3015,6 @@ end:;
         }
args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
-       put_fence_array(args, fences);
         kvfree(exec2_list);
         return err;
  }
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 7c3c3b8ab339..48f9009a6318 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -456,6 +456,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
         case I915_PARAM_HAS_EXEC_BATCH_FIRST:
         case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
         case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
+       case I915_PARAM_HAS_EXEC_TIMELINE_FENCES:
                 /* For the time being all of these are always true;
                  * if some supported hardware does not have one of these
                  * features this value needs to be provided from
@@ -3220,6 +3221,7 @@ static struct drm_driver driver = {
         .driver_features =
             DRIVER_GEM |
             DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ,
+           DRIVER_SYNCOBJ_TIMELINE,
With a comma ? i.e. .ioctls = DRIVER_SYNCOBJ_TIMELINE


Waaaat. How am I running a this and it works....?



         .release = i915_driver_release,
         .open = i915_driver_open,
         .lastclose = i915_driver_lastclose,
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index efa195d6994e..b7fe1915e34d 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -617,6 +617,12 @@ typedef struct drm_i915_irq_wait {
   */
  #define I915_PARAM_PERF_REVISION       54
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
+ * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See
+ * I915_EXEC_EXT.
+ */
+#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
+
  /* Must be kept compact -- no holes and well documented */
typedef struct drm_i915_getparam {
@@ -1014,9 +1020,40 @@ struct drm_i915_gem_exec_fence {
  };
enum drm_i915_gem_execbuffer_ext {
+       /**
+        * This identifier is associated with
+        * drm_i915_gem_execbuf_ext_timeline_fences.
Or just "See drm_i915_gem_execbuf_ext_timeline_fences
+        */
+       DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES,
  = 0
don't leave it up to the compiler.


Should we set every single item in the enum or just the first one?


+
         DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
  };
+/**
+ * This structure describes an array of drm_syncobj and associated points for
+ * timeline variants of drm_syncobj. It is invalid to append this structure to
+ * the execbuf if I915_EXEC_FENCE_ARRAY is set.
+ */
+struct drm_i915_gem_execbuffer_ext_timeline_fences {
+       struct i915_user_extension base;
+
+       /**
+        * Number of element in the handles_ptr & value_ptr arrays.
+        */
+       __u64 handle_count;
Since it is used for both, I would use something like fence_count, or
num_fences (although _count is in the majority).


Done.



+       /**
+        * Pointer to an array of struct drm_i915_gem_exec_fence of size handle_count.
s/size/length/

I think we typically use size for byte sizes around the uAPI.


Sure.



+        */
+       __u64 handles_ptr;
+
+       /**
+        * Pointer to an array of u64 values of size handle_count. Values must
+        * be 0 for a binary drm_syncobj.
+        */
+       __u64 values_ptr;
+};
+
  struct drm_i915_gem_execbuffer2 {
         /**
          * List of gem_exec_object2 structs
--
2.21.0.392.gf8f6787159e

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux