Quoting Lionel Landwerlin (2019-08-01 15:29:34) > On 31/07/2019 23:03, Chris Wilson wrote: > > Quoting Lionel Landwerlin (2019-07-31 15:07:33) > >> -static struct drm_syncobj ** > >> -get_fence_array(struct drm_i915_gem_execbuffer2 *args, > >> - struct drm_file *file) > >> +static struct i915_eb_fences * > >> +get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences) > >> +{ > >> + struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences = > >> + &eb->extensions.timeline_fences; > >> + struct drm_i915_gem_exec_fence __user *user_fences; > >> + struct i915_eb_fences *fences; > >> + u64 __user *user_values; > >> + u64 num_fences, num_user_fences = timeline_fences->fence_count; > >> + unsigned long n; > >> + int err; > >> + > >> + /* Check multiplication overflow for access_ok() and kvmalloc_array() */ > >> + BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); > >> + if (num_user_fences > min_t(unsigned long, > >> + ULONG_MAX / sizeof(*user_fences), > >> + SIZE_MAX / sizeof(*fences))) > >> + return ERR_PTR(-EINVAL); > >> + > >> + user_fences = u64_to_user_ptr(timeline_fences->handles_ptr); > >> + if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences))) > >> + return ERR_PTR(-EFAULT); > >> + > >> + user_values = u64_to_user_ptr(timeline_fences->values_ptr); > >> + if (!access_ok(user_values, num_user_fences * sizeof(*user_values))) > >> + return ERR_PTR(-EFAULT); > >> + > >> + fences = kvmalloc_array(num_user_fences, sizeof(*fences), > >> + __GFP_NOWARN | GFP_KERNEL); > >> + if (!fences) > >> + return ERR_PTR(-ENOMEM); > >> + > >> + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & > >> + ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); > >> + > >> + for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) { > >> + struct drm_i915_gem_exec_fence user_fence; > >> + struct drm_syncobj *syncobj; > >> + struct dma_fence *fence = NULL; > >> + u64 point; > >> + > >> + if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) { > >> + err = -EFAULT; > >> + goto err; > >> + } > >> + > >> + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { > >> + err = -EINVAL; > >> + goto err; > >> + } > >> + > >> + if (__get_user(point, user_values++)) { > >> + err = -EFAULT; > >> + goto err; > >> + } > >> + > >> + syncobj = drm_syncobj_find(eb->file, user_fence.handle); > >> + if (!syncobj) { > >> + DRM_DEBUG("Invalid syncobj handle provided\n"); > >> + err = -ENOENT; > >> + goto err; > >> + } > >> + > >> + if (user_fence.flags & I915_EXEC_FENCE_WAIT) { > >> + fence = drm_syncobj_fence_get(syncobj); > >> + if (!fence) { > >> + DRM_DEBUG("Syncobj handle has no fence\n"); > >> + drm_syncobj_put(syncobj); > >> + err = -EINVAL; > >> + goto err; > >> + } > >> + > >> + err = dma_fence_chain_find_seqno(&fence, point); > >> + if (err) { > >> + DRM_DEBUG("Syncobj handle missing requested point %llu\n", point); > >> + drm_syncobj_put(syncobj); > >> + goto err; > >> + } > >> + > >> + /* A point might have been signaled already and > >> + * garbage collected from the timeline. In this case > >> + * just ignore the point and carry on. > >> + */ > >> + if (!fence) { > >> + drm_syncobj_put(syncobj); > >> + continue; > >> + } > >> + } > >> + > >> + /* > >> + * For timeline syncobjs we need to preallocate chains for > >> + * later signaling. > >> + */ > >> + if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) { > > if (dma_fence_chain_find_seqno() == 0) > > return -EINVAL; > > > > as an early sanity check? > > > >> + fences[num_fences].chain_fence = > >> + kmalloc(sizeof(*fences[num_fences].chain_fence), > >> + GFP_KERNEL); > >> + if (!fences[num_fences].chain_fence) { > >> + dma_fence_put(fence); > >> + drm_syncobj_put(syncobj); > >> + err = -ENOMEM; > >> + DRM_DEBUG("Unable to alloc chain_fence\n"); > >> + goto err; > >> + } > >> + } else { > >> + fences[num_fences].chain_fence = NULL; > >> + } > >> + > >> + fences[num_fences].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2); > >> + fences[num_fences].dma_fence = fence; > >> + fences[num_fences].value = point; > >> + num_fences++; > >> + } > >> + > >> + *out_n_fences = num_fences; > >> + > >> + return fences; > >> + > >> +err: > >> + __free_fence_array(fences, num_fences); > >> + return ERR_PTR(err); > >> +} > >> @@ -2329,9 +2484,9 @@ await_fence_array(struct i915_execbuffer *eb, > >> > >> static void > >> signal_fence_array(struct i915_execbuffer *eb, > >> - struct drm_syncobj **fences) > >> + struct i915_eb_fences *fences, > >> + int nfences) > >> { > >> - const unsigned int nfences = eb->args->num_cliprects; > >> struct dma_fence * const fence = &eb->request->fence; > >> unsigned int n; > >> > >> @@ -2339,15 +2494,46 @@ signal_fence_array(struct i915_execbuffer *eb, > >> struct drm_syncobj *syncobj; > >> unsigned int flags; > >> > >> - syncobj = ptr_unpack_bits(fences[n], &flags, 2); > >> + syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2); > >> if (!(flags & I915_EXEC_FENCE_SIGNAL)) > >> continue; > >> > >> - drm_syncobj_replace_fence(syncobj, fence); > >> + if (fences[n].chain_fence) { > >> + drm_syncobj_add_point(syncobj, fences[n].chain_fence, > >> + fence, fences[n].value); > >> + /* > >> + * The chain's ownership is transfered to the > >> + * timeline. > >> + */ > >> + fences[n].chain_fence = NULL; > >> + } else { > >> + drm_syncobj_replace_fence(syncobj, fence); > >> + } > >> } > >> } > > I think I have convinced myself that with the split between wait before, > > signal after combined with the rule that seqno point along the syncobj > > are monotonic, you should not be able to generate an AB-BA deadlock > > between concurrent clients. > > > Can you come up with an example that would deadlock? Timeline holds 2,1; a wait on 2 will fail with -EINVAL to userspace. (Though possibly perfectly valid behaviour on the part of the user.) Timeline holds 2, with 1 being submitted. A wait on 1 waits on 2 instead. If 1 gains a dependency on A (e.g. bad userspace or an implicit fence, it's a concurrent wait/submit so expect the worst, i.e. userspace has to be racing with itself to get into this mess), you now have a deadlock. (The assumption being that the syncpt along the timeline are themselves not strictly ordered, and considering they are external syncobj, that seems like a reasonable generalisation.) -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx