On 2018å¹´08æ??23æ?¥ 17:08, Daniel Vetter wrote: > On Thu, Aug 23, 2018 at 04:25:42PM +0800, Chunming Zhou wrote: >> VK_KHR_timeline_semaphore: >> This extension introduces a new type of semaphore that has an integer payload >> identifying a point in a timeline. Such timeline semaphores support the >> following operations: >> * Host query - A host operation that allows querying the payload of the >> timeline semaphore. >> * Host wait - A host operation that allows a blocking wait for a >> timeline semaphore to reach a specified value. >> * Device wait - A device operation that allows waiting for a >> timeline semaphore to reach a specified value. >> * Device signal - A device operation that allows advancing the >> timeline semaphore to a specified value. >> >> Since it's a timeline, that means the front time point(PT) always is signaled before the late PT. >> a. signal PT design: >> Signal PT fence N depends on PT[N-1] fence and signal opertion fence, when PT[N] fence is signaled, >> the timeline will increase to value of PT[N]. >> b. wait PT design: >> Wait PT fence is signaled by reaching timeline point value, when timeline is increasing, will compare >> wait PTs value with new timeline value, if PT value is lower than timeline value, then wait PT will be >> signaled, otherwise keep in list. semaphore wait operation can wait on any point of timeline, >> so need a RB tree to order them. And wait PT could ahead of signal PT, we need a sumission fence to >> perform that. >> >> v2: >> 1. remove unused DRM_SYNCOBJ_CREATE_TYPE_NORMAL. (Christian) >> 2. move unexposed denitions to .c file. (Daniel Vetter) >> 3. split up the change to drm_syncobj_find_fence() in a separate patch. (Christian) >> 4. split up the change to drm_syncobj_replace_fence() in a separate patch. >> 5. drop the submission_fence implementation and instead use wait_event() for that. (Christian) >> 6. WARN_ON(point != 0) for NORMAL type syncobj case. (Daniel Vetter) > Depending upon how it's going to be used, this is the wrong thing to do. > >> TODO: >> 1. CPU query and wait on timeline semaphore. >> 2. test application (Daniel Vetter) > I also had some more suggestions, around aligning the two concepts of > future fences submission fence is replaced by wait_event, so I don't address your future fence suggestion. And welcome to explain future fence status. > and at least trying to merge the timeline and the other > fence (which really is just a special case of a timeline with only 1 > slot). Could you detail that? Do you mean merge syncobj->fence to timeline point? Thanks, David Zhou > -Daniel > >> Signed-off-by: Chunming Zhou <david1.zhou at amd.com> >> Cc: Christian Konig <christian.koenig at amd.com> >> Cc: Dave Airlie <airlied at redhat.com> >> Cc: Daniel Rakos <Daniel.Rakos at amd.com> >> Cc: Daniel Vetter <daniel at ffwll.ch> >> --- >> drivers/gpu/drm/drm_syncobj.c | 383 +++++++++++++++++++++++++++++++++++++++--- >> include/drm/drm_syncobj.h | 28 +++ >> include/uapi/drm/drm.h | 1 + >> 3 files changed, 389 insertions(+), 23 deletions(-) >> >> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c >> index 6227df2cc0a4..f738d78edf65 100644 >> --- a/drivers/gpu/drm/drm_syncobj.c >> +++ b/drivers/gpu/drm/drm_syncobj.c >> @@ -56,6 +56,44 @@ >> #include "drm_internal.h" >> #include <drm/drm_syncobj.h> >> >> +struct drm_syncobj_stub_fence { >> + struct dma_fence base; >> + spinlock_t lock; >> +}; >> + >> +static const char *drm_syncobj_stub_fence_get_name(struct dma_fence *fence) >> +{ >> + return "syncobjstub"; >> +} >> + >> +static bool drm_syncobj_stub_fence_enable_signaling(struct dma_fence *fence) >> +{ >> + return !dma_fence_is_signaled(fence); >> +} >> + >> +static const struct dma_fence_ops drm_syncobj_stub_fence_ops = { >> + .get_driver_name = drm_syncobj_stub_fence_get_name, >> + .get_timeline_name = drm_syncobj_stub_fence_get_name, >> + .enable_signaling = drm_syncobj_stub_fence_enable_signaling, >> + .release = NULL, >> +}; >> + >> +struct drm_syncobj_wait_pt { >> + struct drm_syncobj_stub_fence base; >> + u64 value; >> + struct rb_node node; >> +}; >> +struct drm_syncobj_signal_pt { >> + struct drm_syncobj_stub_fence base; >> + struct dma_fence *signal_fence; >> + struct dma_fence *pre_pt_base; >> + struct dma_fence_cb signal_cb; >> + struct dma_fence_cb pre_pt_cb; >> + struct drm_syncobj *syncobj; >> + u64 value; >> + struct list_head list; >> +}; >> + >> /** >> * drm_syncobj_find - lookup and reference a sync object. >> * @file_private: drm file private pointer >> @@ -137,6 +175,150 @@ void drm_syncobj_remove_callback(struct drm_syncobj *syncobj, >> spin_unlock(&syncobj->lock); >> } >> >> +static void drm_syncobj_timeline_signal_wait_pts(struct drm_syncobj *syncobj) >> +{ >> + struct rb_node *node = NULL; >> + struct drm_syncobj_wait_pt *wait_pt = NULL; >> + >> + spin_lock(&syncobj->lock); >> + for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree); >> + node != NULL; ) { >> + wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node); >> + node = rb_next(node); >> + if (wait_pt->value <= syncobj->syncobj_timeline.timeline) { >> + dma_fence_signal(&wait_pt->base.base); >> + rb_erase(&wait_pt->node, >> + &syncobj->syncobj_timeline.wait_pt_tree); >> + RB_CLEAR_NODE(&wait_pt->node); >> + /* kfree(wait_pt) is excuted by fence put */ >> + dma_fence_put(&wait_pt->base.base); >> + } else { >> + /* the loop is from left to right, the later entry value is >> + * bigger, so don't need to check any more */ >> + break; >> + } >> + } >> + spin_unlock(&syncobj->lock); >> +} >> + >> + >> +static void pt_fence_cb(struct drm_syncobj_signal_pt *signal_pt) >> +{ >> + struct dma_fence *fence = NULL; >> + struct drm_syncobj *syncobj; >> + >> + fence = signal_pt->signal_fence; >> + signal_pt->signal_fence = NULL; >> + dma_fence_put(fence); >> + fence = signal_pt->pre_pt_base; >> + signal_pt->pre_pt_base = NULL; >> + dma_fence_put(fence); >> + >> + syncobj = signal_pt->syncobj; >> + spin_lock(&syncobj->lock); >> + list_del(&signal_pt->list); >> + syncobj->syncobj_timeline.timeline = signal_pt->value; >> + spin_unlock(&syncobj->lock); >> + /* kfree(signal_pt) will be executed by below fence put */ >> + dma_fence_put(&signal_pt->base.base); >> + drm_syncobj_timeline_signal_wait_pts(syncobj); >> +} >> +static void pt_signal_fence_func(struct dma_fence *fence, >> + struct dma_fence_cb *cb) >> +{ >> + struct drm_syncobj_signal_pt *signal_pt = >> + container_of(cb, struct drm_syncobj_signal_pt, signal_cb); >> + >> + if (signal_pt->pre_pt_base && >> + !dma_fence_is_signaled(signal_pt->pre_pt_base)) >> + return; >> + >> + pt_fence_cb(signal_pt); >> +} >> +static void pt_pre_fence_func(struct dma_fence *fence, >> + struct dma_fence_cb *cb) >> +{ >> + struct drm_syncobj_signal_pt *signal_pt = >> + container_of(cb, struct drm_syncobj_signal_pt, pre_pt_cb); >> + >> + if (signal_pt->signal_fence && >> + !dma_fence_is_signaled(signal_pt->pre_pt_base)) >> + return; >> + >> + pt_fence_cb(signal_pt); >> +} >> + >> +static int drm_syncobj_timeline_replace_fence(struct drm_syncobj *syncobj, >> + struct dma_fence *fence, >> + u64 point) >> +{ >> + struct drm_syncobj_signal_pt *signal_pt = >> + kzalloc(sizeof(struct drm_syncobj_signal_pt), GFP_KERNEL); >> + struct drm_syncobj_signal_pt *tail_pt; >> + struct dma_fence *tail_pt_fence = NULL; >> + int ret = 0; >> + >> + if (!signal_pt) >> + return -ENOMEM; >> + if (syncobj->syncobj_timeline.signal_point >= point) { >> + DRM_WARN("A later signal is ready!"); >> + goto out; >> + } >> + if (fence) >> + dma_fence_get(fence); >> + spin_lock(&syncobj->lock); >> + spin_lock_init(&signal_pt->base.lock); >> + dma_fence_init(&signal_pt->base.base, >> + &drm_syncobj_stub_fence_ops, >> + &signal_pt->base.lock, >> + syncobj->syncobj_timeline.timeline_context, point); >> + signal_pt->signal_fence = >> + rcu_dereference_protected(fence, >> + lockdep_is_held(&fence->lock)); >> + if (!list_empty(&syncobj->syncobj_timeline.signal_pt_list)) { >> + tail_pt = list_last_entry(&syncobj->syncobj_timeline.signal_pt_list, >> + struct drm_syncobj_signal_pt, list); >> + tail_pt_fence = &tail_pt->base.base; >> + if (dma_fence_is_signaled(tail_pt_fence)) >> + tail_pt_fence = NULL; >> + } >> + if (tail_pt_fence) >> + signal_pt->pre_pt_base = >> + dma_fence_get(rcu_dereference_protected(tail_pt_fence, >> + lockdep_is_held(&tail_pt_fence->lock))); >> + >> + signal_pt->value = point; >> + syncobj->syncobj_timeline.signal_point = point; >> + signal_pt->syncobj = syncobj; >> + INIT_LIST_HEAD(&signal_pt->list); >> + list_add_tail(&signal_pt->list, &syncobj->syncobj_timeline.signal_pt_list); >> + spin_unlock(&syncobj->lock); >> + wake_up_all(&syncobj->syncobj_timeline.wq); >> + /** >> + * Every pt is depending on signal fence and previous pt fence, add >> + * callbacks to them >> + */ >> + if (!dma_fence_is_signaled(signal_pt->signal_fence)) >> + dma_fence_add_callback(signal_pt->signal_fence, >> + &signal_pt->signal_cb, >> + pt_signal_fence_func); >> + else >> + pt_signal_fence_func(signal_pt->signal_fence, >> + &signal_pt->signal_cb); >> + if (signal_pt->pre_pt_base && !dma_fence_is_signaled(signal_pt->pre_pt_base)) >> + dma_fence_add_callback(signal_pt->pre_pt_base, >> + &signal_pt->pre_pt_cb, >> + pt_pre_fence_func); >> + else >> + pt_pre_fence_func(signal_pt->pre_pt_base, &signal_pt->pre_pt_cb); >> + >> + >> + return 0; >> +out: >> + kfree(signal_pt); >> + return ret; >> +} >> + >> /** >> * drm_syncobj_replace_fence - replace fence in a sync object. >> * @syncobj: Sync object to replace fence in >> @@ -152,6 +334,11 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, >> struct dma_fence *old_fence; >> struct drm_syncobj_cb *cur, *tmp; >> >> + if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) { >> + drm_syncobj_timeline_replace_fence(syncobj, fence, >> + point); >> + return; >> + } >> if (fence) >> dma_fence_get(fence); >> >> @@ -174,28 +361,6 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, >> } >> EXPORT_SYMBOL(drm_syncobj_replace_fence); >> >> -struct drm_syncobj_stub_fence { >> - struct dma_fence base; >> - spinlock_t lock; >> -}; >> - >> -static const char *drm_syncobj_stub_fence_get_name(struct dma_fence *fence) >> -{ >> - return "syncobjstub"; >> -} >> - >> -static bool drm_syncobj_stub_fence_enable_signaling(struct dma_fence *fence) >> -{ >> - return !dma_fence_is_signaled(fence); >> -} >> - >> -static const struct dma_fence_ops drm_syncobj_stub_fence_ops = { >> - .get_driver_name = drm_syncobj_stub_fence_get_name, >> - .get_timeline_name = drm_syncobj_stub_fence_get_name, >> - .enable_signaling = drm_syncobj_stub_fence_enable_signaling, >> - .release = NULL, >> -}; >> - >> static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj) >> { >> struct drm_syncobj_stub_fence *fence; >> @@ -215,6 +380,121 @@ static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj) >> return 0; >> } >> >> +static struct drm_syncobj_wait_pt * >> +drm_syncobj_timeline_lookup_wait_pt(struct drm_syncobj *syncobj, u64 point) >> +{ >> + struct rb_node *node = syncobj->syncobj_timeline.wait_pt_tree.rb_node; >> + struct drm_syncobj_wait_pt *wait_pt = NULL; >> + >> + >> + spin_lock(&syncobj->lock); >> + while(node) { >> + int result = point - wait_pt->value; >> + >> + wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node); >> + if (result < 0) >> + node = node->rb_left; >> + else if (result > 0) >> + node = node->rb_right; >> + else >> + break; >> + } >> + spin_unlock(&syncobj->lock); >> + >> + return wait_pt; >> +} >> + >> +static struct drm_syncobj_wait_pt * >> +drm_syncobj_timeline_create_wait_pt(struct drm_syncobj *syncobj, u64 point) >> +{ >> + struct drm_syncobj_wait_pt *wait_pt; >> + struct rb_node **new = &(syncobj->syncobj_timeline.wait_pt_tree.rb_node), *parent = NULL; >> + >> + wait_pt = kzalloc(sizeof(*wait_pt), GFP_KERNEL); >> + if (!wait_pt) >> + return NULL; >> + spin_lock_init(&wait_pt->base.lock); >> + dma_fence_init(&wait_pt->base.base, >> + &drm_syncobj_stub_fence_ops, >> + &wait_pt->base.lock, >> + syncobj->syncobj_timeline.timeline_context, point); >> + wait_pt->value = point; >> + >> + /* wait pt must be in an order, so that we can easily lookup and signal >> + * it */ >> + spin_lock(&syncobj->lock); >> + if (point <= syncobj->syncobj_timeline.timeline) >> + dma_fence_signal(&wait_pt->base.base); >> + while(*new) { >> + struct drm_syncobj_wait_pt *this = >> + rb_entry(*new, struct drm_syncobj_wait_pt, node); >> + int result = wait_pt->value - this->value; >> + >> + parent = *new; >> + if (result < 0) >> + new = &((*new)->rb_left); >> + else if (result > 0) >> + new = &((*new)->rb_right); >> + else >> + goto exist; >> + } >> + >> + rb_link_node(&wait_pt->node, parent, new); >> + rb_insert_color(&wait_pt->node, &syncobj->syncobj_timeline.wait_pt_tree); >> + spin_unlock(&syncobj->lock); >> + return wait_pt; >> +exist: >> + spin_unlock(&syncobj->lock); >> + dma_fence_put(&wait_pt->base.base); >> + wait_pt = drm_syncobj_timeline_lookup_wait_pt(syncobj, point); >> + return wait_pt; >> +} >> + >> +static struct dma_fence * >> +drm_syncobj_timeline_point_get(struct drm_syncobj *syncobj, u64 point, u64 flag) >> +{ >> + struct drm_syncobj_wait_pt *wait_pt; >> + >> + /* already signaled, simply return a signaled stub fence */ >> + if (point <= syncobj->syncobj_timeline.timeline) { >> + struct drm_syncobj_stub_fence *fence; >> + >> + fence = kzalloc(sizeof(*fence), GFP_KERNEL); >> + if (fence == NULL) >> + return NULL; >> + >> + spin_lock_init(&fence->lock); >> + dma_fence_init(&fence->base, &drm_syncobj_stub_fence_ops, >> + &fence->lock, 0, 0); >> + dma_fence_signal(&fence->base); >> + return &fence->base; >> + } >> + >> + /* check if the wait pt exists */ >> + wait_pt = drm_syncobj_timeline_lookup_wait_pt(syncobj, point); >> + if (!wait_pt) { >> + /* This is a new wait pt, so create it */ >> + wait_pt = drm_syncobj_timeline_create_wait_pt(syncobj, point); >> + if (!wait_pt) >> + return NULL; >> + } >> + if (wait_pt) { >> + struct dma_fence *fence; >> + int ret = >> + wait_event_interruptible_timeout(syncobj->syncobj_timeline.wq, >> + wait_pt->value <= syncobj->syncobj_timeline.signal_point, >> + msecs_to_jiffies(10000)); /* wait 10s */ >> + >> + if (ret <= 0) >> + return NULL; >> + rcu_read_lock(); >> + fence = dma_fence_get_rcu(&wait_pt->base.base); >> + rcu_read_unlock(); >> + return fence; >> + } >> + return NULL; >> +} >> + >> /** >> * drm_syncobj_find_fence - lookup and reference the fence in a sync object >> * @file_private: drm file private pointer >> @@ -240,7 +520,17 @@ int drm_syncobj_find_fence(struct drm_file *file_private, >> if (!syncobj) >> return -ENOENT; >> >> - *fence = drm_syncobj_fence_get(syncobj); >> + if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) { >> + /* NORMAL syncobj doesn't care point value */ >> + WARN_ON(point != 0); >> + *fence = drm_syncobj_fence_get(syncobj); >> + } else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) { >> + *fence = drm_syncobj_timeline_point_get(syncobj, point, >> + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT); >> + } else { >> + DRM_ERROR("Don't support this type syncobj\n"); >> + *fence = NULL; >> + } >> if (!*fence) { >> ret = -EINVAL; >> } >> @@ -249,6 +539,34 @@ int drm_syncobj_find_fence(struct drm_file *file_private, >> } >> EXPORT_SYMBOL(drm_syncobj_find_fence); >> >> +static void drm_syncobj_timeline_fini(struct drm_syncobj *syncobj, >> + struct drm_syncobj_timeline *syncobj_timeline) >> +{ >> + struct rb_node *node = NULL; >> + struct drm_syncobj_wait_pt *wait_pt = NULL; >> + struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp; >> + >> + spin_lock(&syncobj->lock); >> + for(node = rb_first(&syncobj_timeline->wait_pt_tree); >> + node != NULL; ) { >> + wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node); >> + node = rb_next(node); >> + rb_erase(&wait_pt->node, >> + &syncobj_timeline->wait_pt_tree); >> + RB_CLEAR_NODE(&wait_pt->node); >> + /* kfree(wait_pt) is excuted by fence put */ >> + dma_fence_put(&wait_pt->base.base); >> + } >> + list_for_each_entry_safe(signal_pt, tmp, >> + &syncobj_timeline->signal_pt_list, list) { >> + list_del(&signal_pt->list); >> + dma_fence_put(signal_pt->signal_fence); >> + dma_fence_put(signal_pt->pre_pt_base); >> + dma_fence_put(&signal_pt->base.base); >> + } >> + spin_unlock(&syncobj->lock); >> +} >> + >> /** >> * drm_syncobj_free - free a sync object. >> * @kref: kref to free. >> @@ -261,10 +579,23 @@ void drm_syncobj_free(struct kref *kref) >> struct drm_syncobj, >> refcount); >> drm_syncobj_replace_fence(syncobj, NULL, 0); >> + drm_syncobj_timeline_fini(syncobj, &syncobj->syncobj_timeline); >> kfree(syncobj); >> } >> EXPORT_SYMBOL(drm_syncobj_free); >> >> +static void drm_syncobj_timeline_init(struct drm_syncobj_timeline >> + *syncobj_timeline) >> +{ >> + syncobj_timeline->timeline_context = dma_fence_context_alloc(1); >> + syncobj_timeline->timeline = 0; >> + syncobj_timeline->signal_point = 0; >> + init_waitqueue_head(&syncobj_timeline->wq); >> + >> + syncobj_timeline->wait_pt_tree = RB_ROOT; >> + INIT_LIST_HEAD(&syncobj_timeline->signal_pt_list); >> +} >> + >> /** >> * drm_syncobj_create - create a new syncobj >> * @out_syncobj: returned syncobj >> @@ -290,6 +621,12 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags, >> kref_init(&syncobj->refcount); >> INIT_LIST_HEAD(&syncobj->cb_list); >> spin_lock_init(&syncobj->lock); >> + if (flags & DRM_SYNCOBJ_CREATE_TYPE_TIMELINE) { >> + syncobj->type = DRM_SYNCOBJ_TYPE_TIMELINE; >> + drm_syncobj_timeline_init(&syncobj->syncobj_timeline); >> + } else { >> + syncobj->type = DRM_SYNCOBJ_TYPE_NORMAL; >> + } >> >> if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) { >> ret = drm_syncobj_assign_null_handle(syncobj); >> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h >> index 335ec501001a..342b3ced3e56 100644 >> --- a/include/drm/drm_syncobj.h >> +++ b/include/drm/drm_syncobj.h >> @@ -30,6 +30,25 @@ >> >> struct drm_syncobj_cb; >> >> +enum drm_syncobj_type { >> + DRM_SYNCOBJ_TYPE_NORMAL, >> + DRM_SYNCOBJ_TYPE_TIMELINE >> +}; >> + >> +struct drm_syncobj_timeline { >> + wait_queue_head_t wq; >> + u64 timeline_context; >> + /** >> + * @timeline: syncobj timeline >> + */ >> + u64 timeline; >> + u64 signal_point; >> + >> + >> + struct rb_root wait_pt_tree; >> + struct list_head signal_pt_list; >> +}; >> + >> /** >> * struct drm_syncobj - sync object. >> * >> @@ -40,6 +59,15 @@ struct drm_syncobj { >> * @refcount: Reference count of this object. >> */ >> struct kref refcount; >> + /** >> + * @type: indicate syncobj type >> + */ >> + enum drm_syncobj_type type; >> + /** >> + * @syncobj_timeline: timeline >> + */ >> + struct drm_syncobj_timeline syncobj_timeline; >> + >> /** >> * @fence: >> * NULL or a pointer to the fence bound to this object. >> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h >> index 300f336633f2..cebdb2541eb7 100644 >> --- a/include/uapi/drm/drm.h >> +++ b/include/uapi/drm/drm.h >> @@ -717,6 +717,7 @@ struct drm_prime_handle { >> struct drm_syncobj_create { >> __u32 handle; >> #define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0) >> +#define DRM_SYNCOBJ_CREATE_TYPE_TIMELINE (1 << 1) >> __u32 flags; >> }; >> >> -- >> 2.14.1 >>