Re: [PATCH v3 2/2] drm/i915: add syncobj timeline support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 31/07/2019 23:03, Chris Wilson wrote:
Quoting Lionel Landwerlin (2019-07-31 15:07:33)
-static struct drm_syncobj **
-get_fence_array(struct drm_i915_gem_execbuffer2 *args,
-               struct drm_file *file)
+static struct i915_eb_fences *
+get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
+{
+       struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences =
+               &eb->extensions.timeline_fences;
+       struct drm_i915_gem_exec_fence __user *user_fences;
+       struct i915_eb_fences *fences;
+       u64 __user *user_values;
+       u64 num_fences, num_user_fences = timeline_fences->fence_count;
+       unsigned long n;
+       int err;
+
+       /* Check multiplication overflow for access_ok() and kvmalloc_array() */
+       BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
+       if (num_user_fences > min_t(unsigned long,
+                                   ULONG_MAX / sizeof(*user_fences),
+                                   SIZE_MAX / sizeof(*fences)))
+               return ERR_PTR(-EINVAL);
+
+       user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
+       if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences)))
+               return ERR_PTR(-EFAULT);
+
+       user_values = u64_to_user_ptr(timeline_fences->values_ptr);
+       if (!access_ok(user_values, num_user_fences * sizeof(*user_values)))
+               return ERR_PTR(-EFAULT);
+
+       fences = kvmalloc_array(num_user_fences, sizeof(*fences),
+                               __GFP_NOWARN | GFP_KERNEL);
+       if (!fences)
+               return ERR_PTR(-ENOMEM);
+
+       BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
+                    ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
+
+       for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) {
+               struct drm_i915_gem_exec_fence user_fence;
+               struct drm_syncobj *syncobj;
+               struct dma_fence *fence = NULL;
+               u64 point;
+
+               if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) {
+                       err = -EFAULT;
+                       goto err;
+               }
+
+               if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
+                       err = -EINVAL;
+                       goto err;
+               }
+
+               if (__get_user(point, user_values++)) {
+                       err = -EFAULT;
+                       goto err;
+               }
+
+               syncobj = drm_syncobj_find(eb->file, user_fence.handle);
+               if (!syncobj) {
+                       DRM_DEBUG("Invalid syncobj handle provided\n");
+                       err = -ENOENT;
+                       goto err;
+               }
+
+               if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
+                       fence = drm_syncobj_fence_get(syncobj);
+                       if (!fence) {
+                               DRM_DEBUG("Syncobj handle has no fence\n");
+                               drm_syncobj_put(syncobj);
+                               err = -EINVAL;
+                               goto err;
+                       }
+
+                       err = dma_fence_chain_find_seqno(&fence, point);
+                       if (err) {
+                               DRM_DEBUG("Syncobj handle missing requested point %llu\n", point);
+                               drm_syncobj_put(syncobj);
+                               goto err;
+                       }
+
+                       /* A point might have been signaled already and
+                        * garbage collected from the timeline. In this case
+                        * just ignore the point and carry on.
+                        */
+                       if (!fence) {
+                               drm_syncobj_put(syncobj);
+                               continue;
+                       }
+               }
+
+               /*
+                * For timeline syncobjs we need to preallocate chains for
+                * later signaling.
+                */
+               if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
if (dma_fence_chain_find_seqno() == 0)
	return -EINVAL;

as an early sanity check?


Makes sense, let me see if that breaks anything.


Also waiting and signaling the same point doesn't work anymore for timelines.

Will check that too.



+                       fences[num_fences].chain_fence =
+                               kmalloc(sizeof(*fences[num_fences].chain_fence),
+                                       GFP_KERNEL);
+                       if (!fences[num_fences].chain_fence) {
+                               dma_fence_put(fence);
+                               drm_syncobj_put(syncobj);
+                               err = -ENOMEM;
+                               DRM_DEBUG("Unable to alloc chain_fence\n");
+                               goto err;
+                       }
+               } else {
+                       fences[num_fences].chain_fence = NULL;
+               }
+
+               fences[num_fences].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
+               fences[num_fences].dma_fence = fence;
+               fences[num_fences].value = point;
+               num_fences++;
+       }
+
+       *out_n_fences = num_fences;
+
+       return fences;
+
+err:
+       __free_fence_array(fences, num_fences);
+       return ERR_PTR(err);
+}
@@ -2329,9 +2484,9 @@ await_fence_array(struct i915_execbuffer *eb,
static void
  signal_fence_array(struct i915_execbuffer *eb,
-                  struct drm_syncobj **fences)
+                  struct i915_eb_fences *fences,
+                  int nfences)
  {
-       const unsigned int nfences = eb->args->num_cliprects;
         struct dma_fence * const fence = &eb->request->fence;
         unsigned int n;
@@ -2339,15 +2494,46 @@ signal_fence_array(struct i915_execbuffer *eb,
                 struct drm_syncobj *syncobj;
                 unsigned int flags;
- syncobj = ptr_unpack_bits(fences[n], &flags, 2);
+               syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
                 if (!(flags & I915_EXEC_FENCE_SIGNAL))
                         continue;
- drm_syncobj_replace_fence(syncobj, fence);
+               if (fences[n].chain_fence) {
+                       drm_syncobj_add_point(syncobj, fences[n].chain_fence,
+                                             fence, fences[n].value);
+                       /*
+                        * The chain's ownership is transfered to the
+                        * timeline.
+                        */
+                       fences[n].chain_fence = NULL;
+               } else {
+                       drm_syncobj_replace_fence(syncobj, fence);
+               }
         }
  }
I think I have convinced myself that with the split between wait before,
signal after combined with the rule that seqno point along the syncobj
are monotonic, you should not be able to generate an AB-BA deadlock
between concurrent clients.

Except that we need to fixup drm_syncobj_add_point() to actually apply
that rule. Throwing an error and leaving the client stuck is less than
ideal, we can't even recover with a GPU reset, and as they are native
fences we don't employ a failsafe timer.


Unfortunately we're not the only user of add_point() and amdgpu doesn't want it to fail.

Best I can come up with is take the syncobj lock when signaling in get_timeline_fence_array() do the check there, unlock in __free_fence_array.




Doesn't WAIT_FOR_SUBMIT throw a spanner in the works and allow for
deadlocks?


Hm... I can't see how.

Unless you mean clients could deadlock them themselves the same way it would using 2 pthread_mutex and having 2 threads trying to lock both mutexes in opposite order.

Is it that the kernel's problem?


On the other hand, if they used MI_SEMA rather scheduler
fences, give or take some coarseness in our timeslicing granularity (I
need to enable fast context switches on user semaphores -- first attempt
was just an interrupt storm!) it will work as well as a round-robin
scheduler can work. (In other words, we only need coarse fences for the
scheduler and userspace can implement its own semaphores -- with the
open discussion on how to share the iova user semaphores between
processes, cf ugpu.)
-Chris


_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux