Quoting Lionel Landwerlin (2019-07-31 15:07:33) > -static struct drm_syncobj ** > -get_fence_array(struct drm_i915_gem_execbuffer2 *args, > - struct drm_file *file) > +static struct i915_eb_fences * > +get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences) > +{ > + struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences = > + &eb->extensions.timeline_fences; > + struct drm_i915_gem_exec_fence __user *user_fences; > + struct i915_eb_fences *fences; > + u64 __user *user_values; > + u64 num_fences, num_user_fences = timeline_fences->fence_count; > + unsigned long n; > + int err; > + > + /* Check multiplication overflow for access_ok() and kvmalloc_array() */ > + BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); > + if (num_user_fences > min_t(unsigned long, > + ULONG_MAX / sizeof(*user_fences), > + SIZE_MAX / sizeof(*fences))) > + return ERR_PTR(-EINVAL); > + > + user_fences = u64_to_user_ptr(timeline_fences->handles_ptr); > + if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences))) > + return ERR_PTR(-EFAULT); > + > + user_values = u64_to_user_ptr(timeline_fences->values_ptr); > + if (!access_ok(user_values, num_user_fences * sizeof(*user_values))) > + return ERR_PTR(-EFAULT); > + > + fences = kvmalloc_array(num_user_fences, sizeof(*fences), > + __GFP_NOWARN | GFP_KERNEL); > + if (!fences) > + return ERR_PTR(-ENOMEM); > + > + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & > + ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); > + > + for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) { > + struct drm_i915_gem_exec_fence user_fence; > + struct drm_syncobj *syncobj; > + struct dma_fence *fence = NULL; > + u64 point; > + > + if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) { > + err = -EFAULT; > + goto err; > + } > + > + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { > + err = -EINVAL; > + goto err; > + } > + > + if (__get_user(point, user_values++)) { > + err = -EFAULT; > + goto err; > + } > + > + syncobj = drm_syncobj_find(eb->file, user_fence.handle); > + if (!syncobj) { > + DRM_DEBUG("Invalid syncobj handle provided\n"); > + err = -ENOENT; > + goto err; > + } > + > + if (user_fence.flags & I915_EXEC_FENCE_WAIT) { > + fence = drm_syncobj_fence_get(syncobj); > + if (!fence) { > + DRM_DEBUG("Syncobj handle has no fence\n"); > + drm_syncobj_put(syncobj); > + err = -EINVAL; > + goto err; > + } > + > + err = dma_fence_chain_find_seqno(&fence, point); > + if (err) { > + DRM_DEBUG("Syncobj handle missing requested point %llu\n", point); > + drm_syncobj_put(syncobj); > + goto err; > + } > + > + /* A point might have been signaled already and > + * garbage collected from the timeline. In this case > + * just ignore the point and carry on. > + */ > + if (!fence) { > + drm_syncobj_put(syncobj); > + continue; > + } > + } > + > + /* > + * For timeline syncobjs we need to preallocate chains for > + * later signaling. > + */ > + if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) { if (dma_fence_chain_find_seqno() == 0) return -EINVAL; as an early sanity check? > + fences[num_fences].chain_fence = > + kmalloc(sizeof(*fences[num_fences].chain_fence), > + GFP_KERNEL); > + if (!fences[num_fences].chain_fence) { > + dma_fence_put(fence); > + drm_syncobj_put(syncobj); > + err = -ENOMEM; > + DRM_DEBUG("Unable to alloc chain_fence\n"); > + goto err; > + } > + } else { > + fences[num_fences].chain_fence = NULL; > + } > + > + fences[num_fences].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2); > + fences[num_fences].dma_fence = fence; > + fences[num_fences].value = point; > + num_fences++; > + } > + > + *out_n_fences = num_fences; > + > + return fences; > + > +err: > + __free_fence_array(fences, num_fences); > + return ERR_PTR(err); > +} > @@ -2329,9 +2484,9 @@ await_fence_array(struct i915_execbuffer *eb, > > static void > signal_fence_array(struct i915_execbuffer *eb, > - struct drm_syncobj **fences) > + struct i915_eb_fences *fences, > + int nfences) > { > - const unsigned int nfences = eb->args->num_cliprects; > struct dma_fence * const fence = &eb->request->fence; > unsigned int n; > > @@ -2339,15 +2494,46 @@ signal_fence_array(struct i915_execbuffer *eb, > struct drm_syncobj *syncobj; > unsigned int flags; > > - syncobj = ptr_unpack_bits(fences[n], &flags, 2); > + syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2); > if (!(flags & I915_EXEC_FENCE_SIGNAL)) > continue; > > - drm_syncobj_replace_fence(syncobj, fence); > + if (fences[n].chain_fence) { > + drm_syncobj_add_point(syncobj, fences[n].chain_fence, > + fence, fences[n].value); > + /* > + * The chain's ownership is transfered to the > + * timeline. > + */ > + fences[n].chain_fence = NULL; > + } else { > + drm_syncobj_replace_fence(syncobj, fence); > + } > } > } I think I have convinced myself that with the split between wait before, signal after combined with the rule that seqno point along the syncobj are monotonic, you should not be able to generate an AB-BA deadlock between concurrent clients. Except that we need to fixup drm_syncobj_add_point() to actually apply that rule. Throwing an error and leaving the client stuck is less than ideal, we can't even recover with a GPU reset, and as they are native fences we don't employ a failsafe timer. Doesn't WAIT_FOR_SUBMIT throw a spanner in the works and allow for deadlocks? On the other hand, if they used MI_SEMA rather scheduler fences, give or take some coarseness in our timeslicing granularity (I need to enable fast context switches on user semaphores -- first attempt was just an interrupt storm!) it will work as well as a round-robin scheduler can work. (In other words, we only need coarse fences for the scheduler and userspace can implement its own semaphores -- with the open discussion on how to share the iova user semaphores between processes, cf ugpu.) -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx