Am 20.09.2018 um 13:03 schrieb Chunming Zhou: > This patch is for VK_KHR_timeline_semaphore extension, semaphore is called syncobj in kernel side: > This extension introduces a new type of syncobj that has an integer payload > identifying a point in a timeline. Such timeline syncobjs support the > following operations: > * CPU query - A host operation that allows querying the payload of the > timeline syncobj. > * CPU wait - A host operation that allows a blocking wait for a > timeline syncobj to reach a specified value. > * Device wait - A device operation that allows waiting for a > timeline syncobj to reach a specified value. > * Device signal - A device operation that allows advancing the > timeline syncobj to a specified value. > > v1: > Since it's a timeline, that means the front time point(PT) always is signaled before the late PT. > a. signal PT design: > Signal PT fence N depends on PT[N-1] fence and signal opertion fence, when PT[N] fence is signaled, > the timeline will increase to value of PT[N]. > b. wait PT design: > Wait PT fence is signaled by reaching timeline point value, when timeline is increasing, will compare > wait PTs value with new timeline value, if PT value is lower than timeline value, then wait PT will be > signaled, otherwise keep in list. syncobj wait operation can wait on any point of timeline, > so need a RB tree to order them. And wait PT could ahead of signal PT, we need a sumission fence to > perform that. > > v2: > 1. remove unused DRM_SYNCOBJ_CREATE_TYPE_NORMAL. (Christian) > 2. move unexposed denitions to .c file. (Daniel Vetter) > 3. split up the change to drm_syncobj_find_fence() in a separate patch. (Christian) > 4. split up the change to drm_syncobj_replace_fence() in a separate patch. > 5. drop the submission_fence implementation and instead use wait_event() for that. (Christian) > 6. WARN_ON(point != 0) for NORMAL type syncobj case. (Daniel Vetter) > > v3: > 1. replace normal syncobj with timeline implemenation. (Vetter and Christian) > a. normal syncobj signal op will create a signal PT to tail of signal pt list. > b. normal syncobj wait op will create a wait pt with last signal point, and this wait PT is only signaled by related signal point PT. > 2. many bug fix and clean up > 3. stub fence moving is moved to other patch. > > v4� > 1. fix RB tree loop with while(node=rb_first(...)). (Christian) > 2. fix syncobj lifecycle. (Christian) > 3. only enable_signaling when there is wait_pt. (Christian) > 4. fix timeline path issues. > 5. write a timeline test in libdrm > > v5: (Christian) > 1. semaphore is called syncobj in kernel side. > 2. don't need 'timeline' characters in some function name. > 3. keep syncobj cb. > > v6: (Christian) > 1. merge syncobj_timeline to syncobj structure. > 2. simplify some check sentences. > 3. some misc change. > 4. fix CTS failed issue. > > v7: (Christian) > 1. error handling when creating signal pt. > 2. remove timeline naming in func. > 3. export flags in find_fence. > 4. allow reset timeline. > > v8: > 1. use wait_event_interruptible without timeout > 2. rename _TYPE_INDIVIDUAL to _TYPE_BINARY > > individual syncobj is tested by ./deqp-vk -n dEQP-VK*semaphore* > timeline syncobj is tested by ./amdgpu_test -s 9 > > Signed-off-by: Chunming Zhou <david1.zhou at amd.com> > Cc: Christian Konig <christian.koenig at amd.com> > Cc: Dave Airlie <airlied at redhat.com> > Cc: Daniel Rakos <Daniel.Rakos at amd.com> > Cc: Daniel Vetter <daniel at ffwll.ch> A few more function comments would be nice to have, but that can also come later. Reviewed-by: Christian König <christian.koenig at amd.com> for now. Thanks for the hard work, Christian. > --- > drivers/gpu/drm/drm_syncobj.c | 287 ++++++++++++++++++--- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- > include/drm/drm_syncobj.h | 65 ++--- > include/uapi/drm/drm.h | 1 + > 4 files changed, 281 insertions(+), 74 deletions(-) > > diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c > index f796c9fc3858..67472bd77c83 100644 > --- a/drivers/gpu/drm/drm_syncobj.c > +++ b/drivers/gpu/drm/drm_syncobj.c > @@ -56,6 +56,9 @@ > #include "drm_internal.h" > #include <drm/drm_syncobj.h> > > +/* merge normal syncobj to timeline syncobj, the point interval is 1 */ > +#define DRM_SYNCOBJ_BINARY_POINT 1 > + > struct drm_syncobj_stub_fence { > struct dma_fence base; > spinlock_t lock; > @@ -82,6 +85,11 @@ static const struct dma_fence_ops drm_syncobj_stub_fence_ops = { > .release = drm_syncobj_stub_fence_release, > }; > > +struct drm_syncobj_signal_pt { > + struct dma_fence_array *base; > + u64 value; > + struct list_head list; > +}; > > /** > * drm_syncobj_find - lookup and reference a sync object. > @@ -124,8 +132,8 @@ static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, > { > int ret; > > - *fence = drm_syncobj_fence_get(syncobj); > - if (*fence) > + ret = drm_syncobj_search_fence(syncobj, 0, 0, fence); > + if (!ret) > return 1; > > spin_lock(&syncobj->lock); > @@ -133,10 +141,12 @@ static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, > * have the lock, try one more time just to be sure we don't add a > * callback when a fence has already been set. > */ > - if (syncobj->fence) { > - *fence = dma_fence_get(rcu_dereference_protected(syncobj->fence, > - lockdep_is_held(&syncobj->lock))); > - ret = 1; > + if (!list_empty(&syncobj->signal_pt_list)) { > + spin_unlock(&syncobj->lock); > + drm_syncobj_search_fence(syncobj, 0, 0, fence); > + if (*fence) > + return 1; > + spin_lock(&syncobj->lock); > } else { > *fence = NULL; > drm_syncobj_add_callback_locked(syncobj, cb, func); > @@ -164,6 +174,159 @@ void drm_syncobj_remove_callback(struct drm_syncobj *syncobj, > spin_unlock(&syncobj->lock); > } > > +static void drm_syncobj_init(struct drm_syncobj *syncobj) > +{ > + spin_lock(&syncobj->lock); > + syncobj->timeline_context = dma_fence_context_alloc(1); > + syncobj->timeline = 0; > + syncobj->signal_point = 0; > + init_waitqueue_head(&syncobj->wq); > + > + INIT_LIST_HEAD(&syncobj->signal_pt_list); > + spin_unlock(&syncobj->lock); > +} > + > +static void drm_syncobj_fini(struct drm_syncobj *syncobj) > +{ > + struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp; > + > + spin_lock(&syncobj->lock); > + list_for_each_entry_safe(signal_pt, tmp, > + &syncobj->signal_pt_list, list) { > + list_del(&signal_pt->list); > + dma_fence_put(&signal_pt->base->base); > + kfree(signal_pt); > + } > + spin_unlock(&syncobj->lock); > +} > + > +static struct dma_fence > +*drm_syncobj_find_signal_pt_for_point(struct drm_syncobj *syncobj, > + uint64_t point) > +{ > + struct drm_syncobj_signal_pt *signal_pt; > + > + if ((syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) && > + (point <= syncobj->timeline)) { > + struct drm_syncobj_stub_fence *fence = > + kzalloc(sizeof(struct drm_syncobj_stub_fence), > + GFP_KERNEL); > + > + if (!fence) > + return NULL; > + spin_lock_init(&fence->lock); > + dma_fence_init(&fence->base, > + &drm_syncobj_stub_fence_ops, > + &fence->lock, > + syncobj->timeline_context, > + point); > + > + dma_fence_signal(&fence->base); > + return &fence->base; > + } > + > + list_for_each_entry(signal_pt, &syncobj->signal_pt_list, list) { > + if (point > signal_pt->value) > + continue; > + if ((syncobj->type == DRM_SYNCOBJ_TYPE_BINARY) && > + (point != signal_pt->value)) > + continue; > + return dma_fence_get(&signal_pt->base->base); > + } > + return NULL; > +} > + > +static int drm_syncobj_create_signal_pt(struct drm_syncobj *syncobj, > + struct dma_fence *fence, > + u64 point) > +{ > + struct drm_syncobj_signal_pt *signal_pt = > + kzalloc(sizeof(struct drm_syncobj_signal_pt), GFP_KERNEL); > + struct drm_syncobj_signal_pt *tail_pt; > + struct dma_fence **fences; > + int num_fences = 0; > + int ret = 0, i; > + > + if (!signal_pt) > + return -ENOMEM; > + if (!fence) > + goto out; > + > + fences = kmalloc_array(sizeof(void *), 2, GFP_KERNEL); > + if (!fences) { > + ret = -ENOMEM; > + goto out; > + } > + fences[num_fences++] = dma_fence_get(fence); > + /* timeline syncobj must take this dependency */ > + if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) { > + spin_lock(&syncobj->lock); > + if (!list_empty(&syncobj->signal_pt_list)) { > + tail_pt = list_last_entry(&syncobj->signal_pt_list, > + struct drm_syncobj_signal_pt, list); > + fences[num_fences++] = dma_fence_get(&tail_pt->base->base); > + } > + spin_unlock(&syncobj->lock); > + } > + signal_pt->base = dma_fence_array_create(num_fences, fences, > + syncobj->timeline_context, > + point, false); > + if (!signal_pt->base) { > + ret = -ENOMEM; > + goto fail; > + } > + > + spin_lock(&syncobj->lock); > + if (syncobj->signal_point >= point) { > + DRM_WARN("A later signal is ready!"); > + spin_unlock(&syncobj->lock); > + goto exist; > + } > + signal_pt->value = point; > + list_add_tail(&signal_pt->list, &syncobj->signal_pt_list); > + syncobj->signal_point = point; > + spin_unlock(&syncobj->lock); > + wake_up_all(&syncobj->wq); > + > + return 0; > +exist: > + dma_fence_put(&signal_pt->base->base); > +fail: > + for (i = 0; i < num_fences; i++) > + dma_fence_put(fences[i]); > + kfree(fences); > +out: > + kfree(signal_pt); > + return ret; > +} > + > +static void drm_syncobj_garbage_collection(struct drm_syncobj *syncobj) > +{ > + struct drm_syncobj_signal_pt *signal_pt, *tmp, *tail_pt; > + > + spin_lock(&syncobj->lock); > + tail_pt = list_last_entry(&syncobj->signal_pt_list, > + struct drm_syncobj_signal_pt, > + list); > + list_for_each_entry_safe(signal_pt, tmp, > + &syncobj->signal_pt_list, list) { > + if (syncobj->type == DRM_SYNCOBJ_TYPE_BINARY && > + signal_pt == tail_pt) > + continue; > + if (dma_fence_is_signaled(&signal_pt->base->base)) { > + syncobj->timeline = signal_pt->value; > + list_del(&signal_pt->list); > + dma_fence_put(&signal_pt->base->base); > + kfree(signal_pt); > + } else { > + /*signal_pt is in order in list, from small to big, so > + * the later must not be signal either */ > + break; > + } > + } > + > + spin_unlock(&syncobj->lock); > +} > /** > * drm_syncobj_replace_fence - replace fence in a sync object. > * @syncobj: Sync object to replace fence in > @@ -176,28 +339,29 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, > u64 point, > struct dma_fence *fence) > { > - struct dma_fence *old_fence; > - struct drm_syncobj_cb *cur, *tmp; > - > - if (fence) > - dma_fence_get(fence); > - > - spin_lock(&syncobj->lock); > - > - old_fence = rcu_dereference_protected(syncobj->fence, > - lockdep_is_held(&syncobj->lock)); > - rcu_assign_pointer(syncobj->fence, fence); > + u64 pt_value = point; > + > + drm_syncobj_garbage_collection(syncobj); > + if (syncobj->type == DRM_SYNCOBJ_TYPE_BINARY) { > + if (!fence) { > + drm_syncobj_fini(syncobj); > + drm_syncobj_init(syncobj); > + return; > + } > + pt_value = syncobj->signal_point + > + DRM_SYNCOBJ_BINARY_POINT; > + } > + drm_syncobj_create_signal_pt(syncobj, fence, pt_value); > + if (fence) { > + struct drm_syncobj_cb *cur, *tmp; > > - if (fence != old_fence) { > + spin_lock(&syncobj->lock); > list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) { > list_del_init(&cur->node); > cur->func(syncobj, cur); > } > + spin_unlock(&syncobj->lock); > } > - > - spin_unlock(&syncobj->lock); > - > - dma_fence_put(old_fence); > } > EXPORT_SYMBOL(drm_syncobj_replace_fence); > > @@ -220,6 +384,46 @@ static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj) > return 0; > } > > +static int > +drm_syncobj_point_get(struct drm_syncobj *syncobj, u64 point, u64 flags, > + struct dma_fence **fence) > +{ > + int ret = 0; > + > + if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { > + ret = wait_event_interruptible(syncobj->wq, > + point <= syncobj->signal_point); > + if (ret < 0) > + return ret; > + } > + spin_lock(&syncobj->lock); > + *fence = drm_syncobj_find_signal_pt_for_point(syncobj, point); > + if (!*fence) > + ret = -EINVAL; > + spin_unlock(&syncobj->lock); > + return ret; > +} > + > +int drm_syncobj_search_fence(struct drm_syncobj *syncobj, u64 point, > + u64 flags, struct dma_fence **fence) > +{ > + u64 pt_value = point; > + > + if (!syncobj) > + return -ENOENT; > + > + drm_syncobj_garbage_collection(syncobj); > + if (syncobj->type == DRM_SYNCOBJ_TYPE_BINARY) { > + /*BINARY syncobj always wait on last pt */ > + pt_value = syncobj->signal_point; > + > + if (pt_value == 0) > + pt_value += DRM_SYNCOBJ_BINARY_POINT; > + } > + return drm_syncobj_point_get(syncobj, pt_value, flags, fence); > +} > +EXPORT_SYMBOL(drm_syncobj_search_fence); > + > /** > * drm_syncobj_find_fence - lookup and reference the fence in a sync object > * @file_private: drm file private pointer > @@ -228,7 +432,7 @@ static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj) > * @fence: out parameter for the fence > * > * This is just a convenience function that combines drm_syncobj_find() and > - * drm_syncobj_fence_get(). > + * drm_syncobj_lookup_fence(). > * > * Returns 0 on success or a negative error value on failure. On success @fence > * contains a reference to the fence, which must be released by calling > @@ -239,15 +443,9 @@ int drm_syncobj_find_fence(struct drm_file *file_private, > struct dma_fence **fence) > { > struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle); > - int ret = 0; > - > - if (!syncobj) > - return -ENOENT; > + int ret; > > - *fence = drm_syncobj_fence_get(syncobj); > - if (!*fence) { > - ret = -EINVAL; > - } > + ret = drm_syncobj_search_fence(syncobj, point, flags, fence); > drm_syncobj_put(syncobj); > return ret; > } > @@ -264,7 +462,7 @@ void drm_syncobj_free(struct kref *kref) > struct drm_syncobj *syncobj = container_of(kref, > struct drm_syncobj, > refcount); > - drm_syncobj_replace_fence(syncobj, 0, NULL); > + drm_syncobj_fini(syncobj); > kfree(syncobj); > } > EXPORT_SYMBOL(drm_syncobj_free); > @@ -294,6 +492,11 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags, > kref_init(&syncobj->refcount); > INIT_LIST_HEAD(&syncobj->cb_list); > spin_lock_init(&syncobj->lock); > + if (flags & DRM_SYNCOBJ_CREATE_TYPE_TIMELINE) > + syncobj->type = DRM_SYNCOBJ_TYPE_TIMELINE; > + else > + syncobj->type = DRM_SYNCOBJ_TYPE_BINARY; > + drm_syncobj_init(syncobj); > > if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) { > ret = drm_syncobj_assign_null_handle(syncobj); > @@ -576,7 +779,8 @@ drm_syncobj_create_ioctl(struct drm_device *dev, void *data, > return -ENODEV; > > /* no valid flags yet */ > - if (args->flags & ~DRM_SYNCOBJ_CREATE_SIGNALED) > + if (args->flags & ~(DRM_SYNCOBJ_CREATE_SIGNALED | > + DRM_SYNCOBJ_CREATE_TYPE_TIMELINE)) > return -EINVAL; > > return drm_syncobj_create_as_handle(file_private, > @@ -669,9 +873,8 @@ static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj, > struct syncobj_wait_entry *wait = > container_of(cb, struct syncobj_wait_entry, syncobj_cb); > > - /* This happens inside the syncobj lock */ > - wait->fence = dma_fence_get(rcu_dereference_protected(syncobj->fence, > - lockdep_is_held(&syncobj->lock))); > + drm_syncobj_search_fence(syncobj, 0, 0, &wait->fence); > + > wake_up_process(wait->task); > } > > @@ -698,7 +901,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, > signaled_count = 0; > for (i = 0; i < count; ++i) { > entries[i].task = current; > - entries[i].fence = drm_syncobj_fence_get(syncobjs[i]); > + ret = drm_syncobj_search_fence(syncobjs[i], 0, 0, > + &entries[i].fence); > if (!entries[i].fence) { > if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { > continue; > @@ -970,12 +1174,13 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, void *data, > if (ret < 0) > return ret; > > - for (i = 0; i < args->count_handles; i++) > - drm_syncobj_replace_fence(syncobjs[i], 0, NULL); > - > + for (i = 0; i < args->count_handles; i++) { > + drm_syncobj_fini(syncobjs[i]); > + drm_syncobj_init(syncobjs[i]); > + } > drm_syncobj_array_free(syncobjs, args->count_handles); > > - return 0; > + return ret; > } > > int > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 0a8d2d64f380..8a8d21b24119 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -2137,7 +2137,7 @@ await_fence_array(struct i915_execbuffer *eb, > if (!(flags & I915_EXEC_FENCE_WAIT)) > continue; > > - fence = drm_syncobj_fence_get(syncobj); > + drm_syncobj_search_fence(syncobj, 0, 0, &fence); > if (!fence) > return -EINVAL; > > diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h > index 2eda44def639..85b36d4e53ee 100644 > --- a/include/drm/drm_syncobj.h > +++ b/include/drm/drm_syncobj.h > @@ -30,6 +30,11 @@ > > struct drm_syncobj_cb; > > +enum drm_syncobj_type { > + DRM_SYNCOBJ_TYPE_BINARY, > + DRM_SYNCOBJ_TYPE_TIMELINE > +}; > + > /** > * struct drm_syncobj - sync object. > * > @@ -41,19 +46,36 @@ struct drm_syncobj { > */ > struct kref refcount; > /** > - * @fence: > - * NULL or a pointer to the fence bound to this object. > - * > - * This field should not be used directly. Use drm_syncobj_fence_get() > - * and drm_syncobj_replace_fence() instead. > + * @type: indicate syncobj type > + */ > + enum drm_syncobj_type type; > + /** > + * @wq: wait signal operation work queue > + */ > + wait_queue_head_t wq; > + /** > + * @timeline_context: fence context used by timeline > */ > - struct dma_fence __rcu *fence; > + u64 timeline_context; > /** > - * @cb_list: List of callbacks to call when the &fence gets replaced. > + * @timeline: syncobj timeline value, which indicates point is signaled. > */ > + u64 timeline; > + /** > + * @signal_point: which indicates the latest signaler point. > + */ > + u64 signal_point; > + /** > + * @signal_pt_list: signaler point list. > + */ > + struct list_head signal_pt_list; > + > + /** > + * @cb_list: List of callbacks to call when the &fence gets replaced. > + */ > struct list_head cb_list; > /** > - * @lock: Protects &cb_list and write-locks &fence. > + * @lock: Protects syncobj list and write-locks &fence. > */ > spinlock_t lock; > /** > @@ -68,7 +90,7 @@ typedef void (*drm_syncobj_func_t)(struct drm_syncobj *syncobj, > /** > * struct drm_syncobj_cb - callback for drm_syncobj_add_callback > * @node: used by drm_syncob_add_callback to append this struct to > - * &drm_syncobj.cb_list > + * &drm_syncobj.cb_list > * @func: drm_syncobj_func_t to call > * > * This struct will be initialized by drm_syncobj_add_callback, additional > @@ -106,29 +128,6 @@ drm_syncobj_put(struct drm_syncobj *obj) > kref_put(&obj->refcount, drm_syncobj_free); > } > > -/** > - * drm_syncobj_fence_get - get a reference to a fence in a sync object > - * @syncobj: sync object. > - * > - * This acquires additional reference to &drm_syncobj.fence contained in @obj, > - * if not NULL. It is illegal to call this without already holding a reference. > - * No locks required. > - * > - * Returns: > - * Either the fence of @obj or NULL if there's none. > - */ > -static inline struct dma_fence * > -drm_syncobj_fence_get(struct drm_syncobj *syncobj) > -{ > - struct dma_fence *fence; > - > - rcu_read_lock(); > - fence = dma_fence_get_rcu_safe(&syncobj->fence); > - rcu_read_unlock(); > - > - return fence; > -} > - > struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private, > u32 handle); > void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, u64 point, > @@ -142,5 +141,7 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags, > int drm_syncobj_get_handle(struct drm_file *file_private, > struct drm_syncobj *syncobj, u32 *handle); > int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd); > +int drm_syncobj_search_fence(struct drm_syncobj *syncobj, u64 point, u64 flags, > + struct dma_fence **fence); > > #endif > diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h > index 300f336633f2..cebdb2541eb7 100644 > --- a/include/uapi/drm/drm.h > +++ b/include/uapi/drm/drm.h > @@ -717,6 +717,7 @@ struct drm_prime_handle { > struct drm_syncobj_create { > __u32 handle; > #define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0) > +#define DRM_SYNCOBJ_CREATE_TYPE_TIMELINE (1 << 1) > __u32 flags; > }; >