[PATCH 2/2] [RFC]drm: add syncobj timeline support

daniel@xxxxxxxx (Daniel Vetter) · Wed, 22 Aug 2018 12:08:56 +0200

On Wed, Aug 22, 2018 at 11:59 AM, zhoucm1 <zhoucm1 at amd.com> wrote:
>
>
> On 2018å¹´08æ??22æ?¥ 17:31, Daniel Vetter wrote:
>>
>> On Wed, Aug 22, 2018 at 05:28:17PM +0800, zhoucm1 wrote:
>>>
>>>
>>> On 2018å¹´08æ??22æ?¥ 17:24, Daniel Vetter wrote:
>>>>
>>>> On Wed, Aug 22, 2018 at 04:49:28PM +0800, Chunming Zhou wrote:
>>>>>
>>>>> VK_KHR_timeline_semaphore:
>>>>> This extension introduces a new type of semaphore that has an integer
>>>>> payload
>>>>> identifying a point in a timeline. Such timeline semaphores support the
>>>>> following operations:
>>>>>     * Host query - A host operation that allows querying the payload of
>>>>> the
>>>>>       timeline semaphore.
>>>>>     * Host wait - A host operation that allows a blocking wait for a
>>>>>       timeline semaphore to reach a specified value.
>>>>>     * Device wait - A device operation that allows waiting for a
>>>>>       timeline semaphore to reach a specified value.
>>>>>     * Device signal - A device operation that allows advancing the
>>>>>       timeline semaphore to a specified value.
>>>>>
>>>>> Since it's a timeline, that means the front time point(PT) always is
>>>>> signaled before the late PT.
>>>>> a. signal PT design:
>>>>> Signal PT fence N depends on PT[N-1] fence and signal opertion fence,
>>>>> when PT[N] fence is signaled,
>>>>> the timeline will increase to value of PT[N].
>>>>> b. wait PT design:
>>>>> Wait PT fence is signaled by reaching timeline point value, when
>>>>> timeline is increasing, will compare
>>>>> wait PTs value with new timeline value, if PT value is lower than
>>>>> timeline value, then wait PT will be
>>>>> signaled, otherwise keep in list. semaphore wait operation can wait on
>>>>> any point of timeline,
>>>>> so need a RB tree to order them. And wait PT could ahead of signal PT,
>>>>> we need a sumission fence to
>>>>> perform that.
>>>>>
>>>>> TODO:
>>>>> CPU query and wait on timeline semaphore.
>>>>
>>>> Another TODO: igt testcase for all the cornercases. We already have
>>>> other syncobj tests in there.
>>>
>>> Yes, I'm also trying to find where test should be wrote, Could you give a
>>> directory?
>>
>> There's already tests/syncobj_basic.c and tests/syncobj_wait.c. Either
>> extend those, or probably better to start a new tests/syncobj_timeline.c
>> since I expect this will have a lot of corner-cases we need to check.
>
> I failed to find them in both kernel and libdrm, Could you point which test
> you said?

igt testcases. It's a separate thing with lots of drm tests:

https://cgit.freedesktop.org/drm/igt-gpu-tools

I know that amdgpu has their tests in libdrm (which imo is a bit
unfortunate split, but there's also reasons and stuff).

Cheers, Daniel

> Thanks,
> David Zhou
>
>> -Daniel
>>
>>> Thanks,
>>> David Zhou
>>>>
>>>> That would also help with understanding how this is supposed to be used,
>>>> since I'm a bit too dense to immediately get your algorithm by just
>>>> staring at the code.
>>>>
>>>>
>>>>> Change-Id: I9f09aae225e268442c30451badac40406f24262c
>>>>> Signed-off-by: Chunming Zhou <david1.zhou at amd.com>
>>>>> Cc: Christian Konig <christian.koenig at amd.com>
>>>>> Cc: Dave Airlie <airlied at redhat.com>
>>>>> Cc: Daniel Rakos <Daniel.Rakos at amd.com>
>>>>> ---
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |   7 +-
>>>>>    drivers/gpu/drm/drm_syncobj.c          | 385
>>>>> ++++++++++++++++++++++++++++++++-
>>>>>    drivers/gpu/drm/v3d/v3d_gem.c          |   4 +-
>>>>>    drivers/gpu/drm/vc4/vc4_gem.c          |   2 +-
>>>>>    include/drm/drm_syncobj.h              |  45 +++-
>>>>>    include/uapi/drm/drm.h                 |   3 +-
>>>>>    6 files changed, 435 insertions(+), 11 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>> index d42d1c8f78f6..463cc8960723 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>> @@ -1105,7 +1105,7 @@ static int
>>>>> amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
>>>>>    {
>>>>>         int r;
>>>>>         struct dma_fence *fence;
>>>>> -       r = drm_syncobj_find_fence(p->filp, handle, &fence);
>>>>> +       r = drm_syncobj_find_fence(p->filp, handle, &fence, 0);
>>>>>         if (r)
>>>>>                 return r;
>>>>> @@ -1193,8 +1193,9 @@ static void amdgpu_cs_post_dependencies(struct
>>>>> amdgpu_cs_parser *p)
>>>>>    {
>>>>>         int i;
>>>>> -       for (i = 0; i < p->num_post_dep_syncobjs; ++i)
>>>>> -               drm_syncobj_replace_fence(p->post_dep_syncobjs[i],
>>>>> p->fence);
>>>>> +       for (i = 0; i < p->num_post_dep_syncobjs; ++i) {
>>>>> +               drm_syncobj_signal_fence(p->post_dep_syncobjs[i],
>>>>> p->fence, 0);
>>>>> +       }
>>>>>    }
>>>>>    static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>>>>> diff --git a/drivers/gpu/drm/drm_syncobj.c
>>>>> b/drivers/gpu/drm/drm_syncobj.c
>>>>> index 70af32d0def1..3709f36c901e 100644
>>>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>>>> @@ -187,6 +187,191 @@ void drm_syncobj_replace_fence(struct drm_syncobj
>>>>> *syncobj,
>>>>>    }
>>>>>    EXPORT_SYMBOL(drm_syncobj_replace_fence);
>>>>> +static void drm_syncobj_timeline_signal_submission_fences(struct
>>>>> drm_syncobj *syncobj)
>>>>> +{
>>>>> +       struct rb_node *node = NULL;
>>>>> +       struct drm_syncobj_wait_pt *wait_pt = NULL;
>>>>> +
>>>>> +       spin_lock(&syncobj->lock);
>>>>> +       for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
>>>>> +           node != NULL; node = rb_next(node)) {
>>>>> +           wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>>>>> +           if (wait_pt->value <=
>>>>> syncobj->syncobj_timeline.signal_point) {
>>>>> +                   if (wait_pt->submission_fence)
>>>>> +
>>>>> dma_fence_signal(&wait_pt->submission_fence->base);
>>>>> +           } else {
>>>>> +                   /* the loop is from left to right, the later entry
>>>>> value is
>>>>> +                    * bigger, so don't need to check any more */
>>>>> +                   break;
>>>>> +           }
>>>>
>>>> This seems to reinvet syncobj->cb_list. Since this is the exact same
>>>> "future fence that doesn't even exist yet" problem I think those two
>>>> code
>>>> path should be unified. In general I think it'd be much better if we
>>>> treat
>>>> the old syncobj as a timeline with a limit of 1 slot only.
>>>>
>>>> Or there needs to be a really good reason why all new code.
>>>>
>>>> Specifically for this stuff here having unified future fence semantics
>>>> will allow drivers to do clever stuff with them.
>>>>
>>>>> +       }
>>>>> +       spin_unlock(&syncobj->lock);
>>>>> +}
>>>>> +
>>>>> +static void drm_syncobj_timeline_signal_wait_pts(struct drm_syncobj
>>>>> *syncobj)
>>>>> +{
>>>>> +       struct rb_node *node = NULL;
>>>>> +       struct drm_syncobj_wait_pt *wait_pt = NULL;
>>>>> +
>>>>> +       spin_lock(&syncobj->lock);
>>>>> +       for(node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree);
>>>>> +           node != NULL; ) {
>>>>> +               wait_pt = rb_entry(node, struct drm_syncobj_wait_pt,
>>>>> node);
>>>>> +               node = rb_next(node);
>>>>> +               if (wait_pt->value <=
>>>>> syncobj->syncobj_timeline.timeline) {
>>>>> +                       dma_fence_signal(&wait_pt->base.base);
>>>>> +                       rb_erase(&wait_pt->node,
>>>>> +
>>>>> &syncobj->syncobj_timeline.wait_pt_tree);
>>>>> +                       RB_CLEAR_NODE(&wait_pt->node);
>>>>> +                       if (wait_pt->submission_fence)
>>>>> +
>>>>> dma_fence_put(&wait_pt->submission_fence->base);
>>>>> +                       /* kfree(wait_pt) is excuted by fence put */
>>>>> +                       dma_fence_put(&wait_pt->base.base);
>>>>> +               } else {
>>>>> +                       /* the loop is from left to right, the later
>>>>> entry value is
>>>>> +                        * bigger, so don't need to check any more */
>>>>> +                       break;
>>>>> +               }
>>>>> +       }
>>>>> +       spin_unlock(&syncobj->lock);
>>>>> +}
>>>>> +
>>>>> +
>>>>> +static void pt_fence_cb(struct drm_syncobj_signal_pt *signal_pt)
>>>>> +{
>>>>> +       struct dma_fence *fence = NULL;
>>>>> +       struct drm_syncobj *syncobj;
>>>>> +
>>>>> +       fence = signal_pt->signal_fence;
>>>>> +       signal_pt->signal_fence = NULL;
>>>>> +       dma_fence_put(fence);
>>>>> +       fence = signal_pt->pre_pt_base;
>>>>> +       signal_pt->pre_pt_base = NULL;
>>>>> +       dma_fence_put(fence);
>>>>> +
>>>>> +       syncobj = signal_pt->syncobj;
>>>>> +       spin_lock(&syncobj->lock);
>>>>> +       list_del(&signal_pt->list);
>>>>> +       syncobj->syncobj_timeline.timeline = signal_pt->value;
>>>>> +       spin_unlock(&syncobj->lock);
>>>>> +       /* kfree(signal_pt) will be  executed by below fence put */
>>>>> +       dma_fence_put(&signal_pt->base.base);
>>>>> +       drm_syncobj_timeline_signal_wait_pts(syncobj);
>>>>> +}
>>>>> +static void pt_signal_fence_func(struct dma_fence *fence,
>>>>> +                                struct dma_fence_cb *cb)
>>>>> +{
>>>>> +       struct drm_syncobj_signal_pt *signal_pt =
>>>>> +               container_of(cb, struct drm_syncobj_signal_pt,
>>>>> signal_cb);
>>>>> +
>>>>> +       if (signal_pt->pre_pt_base &&
>>>>> +           !dma_fence_is_signaled(signal_pt->pre_pt_base))
>>>>> +               return;
>>>>> +
>>>>> +       pt_fence_cb(signal_pt);
>>>>> +}
>>>>> +static void pt_pre_fence_func(struct dma_fence *fence,
>>>>> +                                struct dma_fence_cb *cb)
>>>>> +{
>>>>> +       struct drm_syncobj_signal_pt *signal_pt =
>>>>> +               container_of(cb, struct drm_syncobj_signal_pt,
>>>>> pre_pt_cb);
>>>>> +
>>>>> +       if (signal_pt->signal_fence &&
>>>>> +           !dma_fence_is_signaled(signal_pt->pre_pt_base))
>>>>> +               return;
>>>>> +
>>>>> +       pt_fence_cb(signal_pt);
>>>>> +}
>>>>> +
>>>>> +static int drm_syncobj_timeline_signal_fence(struct drm_syncobj
>>>>> *syncobj,
>>>>> +                                            struct dma_fence *fence,
>>>>> +                                            u64 point)
>>>>> +{
>>>>> +       struct drm_syncobj_signal_pt *signal_pt =
>>>>> +               kzalloc(sizeof(struct drm_syncobj_signal_pt),
>>>>> GFP_KERNEL);
>>>>> +       struct drm_syncobj_signal_pt *tail_pt;
>>>>> +       struct dma_fence *tail_pt_fence = NULL;
>>>>> +       int ret = 0;
>>>>> +
>>>>> +       if (!signal_pt)
>>>>> +               return -ENOMEM;
>>>>> +       if (syncobj->syncobj_timeline.signal_point >= point) {
>>>>> +               DRM_WARN("A later signal is ready!");
>>>>> +               goto out;
>>>>> +       }
>>>>> +       if (fence)
>>>>> +               dma_fence_get(fence);
>>>>> +       spin_lock(&syncobj->lock);
>>>>> +       spin_lock_init(&signal_pt->base.lock);
>>>>> +       dma_fence_init(&signal_pt->base.base,
>>>>> +                      &drm_syncobj_stub_fence_ops,
>>>>> +                      &signal_pt->base.lock,
>>>>> +                      syncobj->syncobj_timeline.timeline_context,
>>>>> point);
>>>>> +       signal_pt->signal_fence =
>>>>> +               rcu_dereference_protected(fence,
>>>>> +
>>>>> lockdep_is_held(&fence->lock));
>>>>> +       if (!list_empty(&syncobj->syncobj_timeline.signal_pt_list)) {
>>>>> +               tail_pt =
>>>>> list_last_entry(&syncobj->syncobj_timeline.signal_pt_list,
>>>>> +                                         struct drm_syncobj_signal_pt,
>>>>> list);
>>>>> +               tail_pt_fence = &tail_pt->base.base;
>>>>> +               if (dma_fence_is_signaled(tail_pt_fence))
>>>>> +                       tail_pt_fence = NULL;
>>>>> +       }
>>>>> +       if (tail_pt_fence)
>>>>> +               signal_pt->pre_pt_base =
>>>>> +
>>>>> dma_fence_get(rcu_dereference_protected(tail_pt_fence,
>>>>> +
>>>>> lockdep_is_held(&tail_pt_fence->lock)));
>>>>> +
>>>>> +       signal_pt->value = point;
>>>>> +       syncobj->syncobj_timeline.signal_point = point;
>>>>> +       signal_pt->syncobj = syncobj;
>>>>> +       INIT_LIST_HEAD(&signal_pt->list);
>>>>> +       list_add_tail(&signal_pt->list,
>>>>> &syncobj->syncobj_timeline.signal_pt_list);
>>>>> +       spin_unlock(&syncobj->lock);
>>>>> +       drm_syncobj_timeline_signal_submission_fences(syncobj);
>>>>> +       /**
>>>>> +        * Every pt is depending on signal fence and previous pt fence,
>>>>> add
>>>>> +        * callbacks to them
>>>>> +        */
>>>>> +       if (!dma_fence_is_signaled(signal_pt->signal_fence))
>>>>> +               dma_fence_add_callback(signal_pt->signal_fence,
>>>>> +                                      &signal_pt->signal_cb,
>>>>> +                                      pt_signal_fence_func);
>>>>> +       else
>>>>> +               pt_signal_fence_func(signal_pt->signal_fence,
>>>>> +                                    &signal_pt->signal_cb);
>>>>> +       if (signal_pt->pre_pt_base &&
>>>>> !dma_fence_is_signaled(signal_pt->pre_pt_base))
>>>>> +               dma_fence_add_callback(signal_pt->pre_pt_base,
>>>>> +                                      &signal_pt->pre_pt_cb,
>>>>> +                                      pt_pre_fence_func);
>>>>> +       else
>>>>> +               pt_pre_fence_func(signal_pt->pre_pt_base,
>>>>> &signal_pt->pre_pt_cb);
>>>>> +
>>>>> +
>>>>> +       return 0;
>>>>> +out:
>>>>> +       kfree(signal_pt);
>>>>> +       return ret;
>>>>> +}
>>>>> +
>>>>> +/**
>>>>> + * drm_syncobj_signal_fence - place fence into a sync object.
>>>>> + * @syncobj: Sync object to replace fence in
>>>>> + * @fence: fence to install in sync file.
>>>>> + *
>>>>> + * This places the fence into normal or timeline sync object
>>>>> + */
>>>>> +void drm_syncobj_signal_fence(struct drm_syncobj *syncobj,
>>>>> +                             struct dma_fence *fence,
>>>>> +                             u64 point)
>>>>
>>>> This is a very confusion function name. Feels like
>>>> drm_syncobj_timeline_signal_fence is rather misnamed. I think we should
>>>> just extend replace_fence like you do with find_fence already.
>>>>
>>>>> +{
>>>>> +       if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL)
>>>>> +               drm_syncobj_replace_fence(syncobj, fence);
>>>>> +       else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE)
>>>>> +               drm_syncobj_timeline_signal_fence(syncobj, fence,
>>>>> point);
>>>>> +}
>>>>> +EXPORT_SYMBOL(drm_syncobj_signal_fence);
>>>>> +
>>>>>    static int drm_syncobj_assign_null_handle(struct drm_syncobj
>>>>> *syncobj)
>>>>>    {
>>>>>         struct drm_syncobj_stub_fence *fence;
>>>>> @@ -205,12 +390,150 @@ static int drm_syncobj_assign_null_handle(struct
>>>>> drm_syncobj *syncobj)
>>>>>         return 0;
>>>>>    }
>>>>> +static struct drm_syncobj_wait_pt *
>>>>> +drm_syncobj_timeline_lookup_wait_pt(struct drm_syncobj *syncobj, u64
>>>>> point)
>>>>> +{
>>>>> +    struct rb_node *node =
>>>>> syncobj->syncobj_timeline.wait_pt_tree.rb_node;
>>>>> +    struct drm_syncobj_wait_pt *wait_pt = NULL;
>>>>> +
>>>>> +
>>>>> +    spin_lock(&syncobj->lock);
>>>>> +    while(node) {
>>>>> +           int result = point - wait_pt->value;
>>>>> +
>>>>> +           wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node);
>>>>> +           if (result < 0)
>>>>> +                   node = node->rb_left;
>>>>> +           else if (result > 0)
>>>>> +                   node = node->rb_right;
>>>>> +           else
>>>>> +                   break;
>>>>> +    }
>>>>> +    spin_unlock(&syncobj->lock);
>>>>> +
>>>>> +    return wait_pt;
>>>>> +}
>>>>> +
>>>>> +static struct drm_syncobj_wait_pt *
>>>>> +drm_syncobj_timeline_create_wait_pt(struct drm_syncobj *syncobj, u64
>>>>> point)
>>>>> +{
>>>>> +       struct drm_syncobj_wait_pt *wait_pt;
>>>>> +       struct rb_node **new =
>>>>> &(syncobj->syncobj_timeline.wait_pt_tree.rb_node), *parent = NULL;
>>>>> +
>>>>> +       wait_pt = kzalloc(sizeof(*wait_pt), GFP_KERNEL);
>>>>> +       if (!wait_pt)
>>>>> +               return NULL;
>>>>> +       spin_lock_init(&wait_pt->base.lock);
>>>>> +       dma_fence_init(&wait_pt->base.base,
>>>>> +                      &drm_syncobj_stub_fence_ops,
>>>>> +                      &wait_pt->base.lock,
>>>>> +                      syncobj->syncobj_timeline.timeline_context,
>>>>> point);
>>>>> +       wait_pt->submission_fence = NULL;
>>>>> +       /* check if the pt is already sumbitted, if yes, then don't
>>>>> need to
>>>>> +        * create submission fence, otherwise create it */
>>>>> +       if (point > syncobj->syncobj_timeline.signal_point) {
>>>>> +               wait_pt->submission_fence =
>>>>> +                       kzalloc(sizeof(struct drm_syncobj_stub_fence),
>>>>> +                               GFP_KERNEL);
>>>>> +               if (!wait_pt->submission_fence) {
>>>>> +                       dma_fence_put(&wait_pt->base.base);
>>>>> +                       return NULL;
>>>>> +               }
>>>>> +               spin_lock_init(&wait_pt->submission_fence->lock);
>>>>> +               dma_fence_init(&wait_pt->submission_fence->base,
>>>>> +                              &drm_syncobj_stub_fence_ops,
>>>>> +                              &wait_pt->submission_fence->lock,
>>>>> +
>>>>> syncobj->syncobj_timeline.submission_context,
>>>>> +                              point);
>>>>> +       }
>>>>> +       wait_pt->value = point;
>>>>> +
>>>>> +       /* wait pt must be in an order, so that we can easily lookup
>>>>> and signal
>>>>> +        * it */
>>>>> +       spin_lock(&syncobj->lock);
>>>>> +       if (point <= syncobj->syncobj_timeline.timeline)
>>>>> +               dma_fence_signal(&wait_pt->base.base);
>>>>> +       if (wait_pt->submission_fence &&
>>>>> +           point <= syncobj->syncobj_timeline.signal_point)
>>>>> +               dma_fence_signal(&wait_pt->submission_fence->base);
>>>>> +       while(*new) {
>>>>> +               struct drm_syncobj_wait_pt *this =
>>>>> +                       rb_entry(*new, struct drm_syncobj_wait_pt,
>>>>> node);
>>>>> +               int result = wait_pt->value - this->value;
>>>>> +
>>>>> +               parent = *new;
>>>>> +               if (result < 0)
>>>>> +                       new = &((*new)->rb_left);
>>>>> +               else if (result > 0)
>>>>> +                       new = &((*new)->rb_right);
>>>>> +               else
>>>>> +                       goto exist;
>>>>> +       }
>>>>> +
>>>>> +       rb_link_node(&wait_pt->node, parent, new);
>>>>> +       rb_insert_color(&wait_pt->node,
>>>>> &syncobj->syncobj_timeline.wait_pt_tree);
>>>>> +       spin_unlock(&syncobj->lock);
>>>>> +       return wait_pt;
>>>>> +exist:
>>>>> +       spin_unlock(&syncobj->lock);
>>>>> +       if (wait_pt->submission_fence)
>>>>> +               dma_fence_put(&wait_pt->submission_fence->base);
>>>>> +       dma_fence_put(&wait_pt->base.base);
>>>>> +       wait_pt = drm_syncobj_timeline_lookup_wait_pt(syncobj, point);
>>>>> +       return wait_pt;
>>>>> +}
>>>>> +
>>>>> +static struct dma_fence *
>>>>> +drm_syncobj_timeline_point_get(struct drm_syncobj *syncobj, u64 point,
>>>>> u64 flag)
>>>>> +{
>>>>> +       struct drm_syncobj_wait_pt *wait_pt;
>>>>> +
>>>>> +       /* already signaled, simply return a signaled stub fence */
>>>>> +       if (point <= syncobj->syncobj_timeline.timeline) {
>>>>> +               struct drm_syncobj_stub_fence *fence;
>>>>> +
>>>>> +               fence = kzalloc(sizeof(*fence), GFP_KERNEL);
>>>>> +               if (fence == NULL)
>>>>> +                       return NULL;
>>>>> +
>>>>> +               spin_lock_init(&fence->lock);
>>>>> +               dma_fence_init(&fence->base,
>>>>> &drm_syncobj_stub_fence_ops,
>>>>> +                              &fence->lock, 0, 0);
>>>>> +               dma_fence_signal(&fence->base);
>>>>> +               return &fence->base;
>>>>> +       }
>>>>> +
>>>>> +       /* check if the wait pt exists */
>>>>> +       wait_pt = drm_syncobj_timeline_lookup_wait_pt(syncobj, point);
>>>>> +       if (!wait_pt) {
>>>>> +               /* This is a new wait pt, so create it */
>>>>> +               wait_pt = drm_syncobj_timeline_create_wait_pt(syncobj,
>>>>> point);
>>>>> +               if (!wait_pt)
>>>>> +                       return NULL;
>>>>> +       }
>>>>> +       if (wait_pt) {
>>>>> +               struct dma_fence *fence;
>>>>> +               if (wait_pt->submission_fence) {
>>>>> +                       if (flag &
>>>>> DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
>>>>> +
>>>>> dma_fence_wait(&wait_pt->submission_fence->base, true);
>>>>> +                       else if
>>>>> (!dma_fence_is_signaled(&wait_pt->submission_fence->base))
>>>>> +                               return NULL;
>>>>> +               }
>>>>> +               rcu_read_lock();
>>>>> +               fence = dma_fence_get_rcu(&wait_pt->base.base);
>>>>> +               rcu_read_unlock();
>>>>> +               return fence;
>>>>> +       }
>>>>> +       return NULL;
>>>>> +}
>>>>> +
>>>>>    /**
>>>>>     * drm_syncobj_find_fence - lookup and reference the fence in a sync
>>>>> object
>>>>>     * @file_private: drm file private pointer
>>>>>     * @handle: sync object handle to lookup.
>>>>>     * @fence: out parameter for the fence
>>>>> + * @point: timeline point
>>>>>     *
>>>>>     * This is just a convenience function that combines
>>>>> drm_syncobj_find() and
>>>>>     * drm_syncobj_fence_get().
>>>>> @@ -221,7 +544,8 @@ static int drm_syncobj_assign_null_handle(struct
>>>>> drm_syncobj *syncobj)
>>>>>     */
>>>>>    int drm_syncobj_find_fence(struct drm_file *file_private,
>>>>>                            u32 handle,
>>>>> -                          struct dma_fence **fence)
>>>>> +                          struct dma_fence **fence,
>>>>> +                          u64 point)
>>>>>    {
>>>>>         struct drm_syncobj *syncobj = drm_syncobj_find(file_private,
>>>>> handle);
>>>>>         int ret = 0;
>>>>> @@ -229,7 +553,15 @@ int drm_syncobj_find_fence(struct drm_file
>>>>> *file_private,
>>>>>         if (!syncobj)
>>>>>                 return -ENOENT;
>>>>> -       *fence = drm_syncobj_fence_get(syncobj);
>>>>> +       if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) {
>>>>> +               *fence = drm_syncobj_fence_get(syncobj);
>>>>
>>>> Needs at least a WARN_ON(point != 0) here, since I'd say that would be a
>>>> driver bug. Or userspace bug, in which case we need to return -EINVAL;
>>>>
>>>> Hard to tell without the driver implementation.
>>>>
>>>>> +       }else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
>>>>> +               *fence = drm_syncobj_timeline_point_get(syncobj, point,
>>>>> +
>>>>> DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT);
>>>>> +       } else {
>>>>> +               DRM_ERROR("invalid syncobj type\n");
>>>>> +               return -EINVAL;
>>>>> +       }
>>>>>         if (!*fence) {
>>>>>                 ret = -EINVAL;
>>>>>         }
>>>>> @@ -238,6 +570,35 @@ int drm_syncobj_find_fence(struct drm_file
>>>>> *file_private,
>>>>>    }
>>>>>    EXPORT_SYMBOL(drm_syncobj_find_fence);
>>>>> +static void drm_syncobj_timeline_fini(struct drm_syncobj *syncobj,
>>>>> +                                     struct drm_syncobj_timeline
>>>>> *syncobj_timeline)
>>>>> +{
>>>>> +       struct rb_node *node = NULL;
>>>>> +       struct drm_syncobj_wait_pt *wait_pt = NULL;
>>>>> +       struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp;
>>>>> +
>>>>> +       spin_lock(&syncobj->lock);
>>>>> +       for(node = rb_first(&syncobj_timeline->wait_pt_tree);
>>>>> +           node != NULL; ) {
>>>>> +               wait_pt = rb_entry(node, struct drm_syncobj_wait_pt,
>>>>> node);
>>>>> +               node = rb_next(node);
>>>>> +               rb_erase(&wait_pt->node,
>>>>> +                        &syncobj_timeline->wait_pt_tree);
>>>>> +               RB_CLEAR_NODE(&wait_pt->node);
>>>>> +               if (wait_pt->submission_fence)
>>>>> +
>>>>> dma_fence_put(&wait_pt->submission_fence->base);
>>>>> +               /* kfree(wait_pt) is excuted by fence put */
>>>>> +               dma_fence_put(&wait_pt->base.base);
>>>>> +       }
>>>>> +       list_for_each_entry_safe(signal_pt, tmp,
>>>>> +                                &syncobj_timeline->signal_pt_list,
>>>>> list) {
>>>>> +               list_del(&signal_pt->list);
>>>>> +               dma_fence_put(signal_pt->signal_fence);
>>>>> +               dma_fence_put(signal_pt->pre_pt_base);
>>>>> +               dma_fence_put(&signal_pt->base.base);
>>>>> +       }
>>>>> +       spin_unlock(&syncobj->lock);
>>>>> +}
>>>>>    /**
>>>>>     * drm_syncobj_free - free a sync object.
>>>>>     * @kref: kref to free.
>>>>> @@ -250,10 +611,22 @@ void drm_syncobj_free(struct kref *kref)
>>>>>                                                    struct drm_syncobj,
>>>>>                                                    refcount);
>>>>>         drm_syncobj_replace_fence(syncobj, NULL);
>>>>> +       drm_syncobj_timeline_fini(syncobj, &syncobj->syncobj_timeline);
>>>>>         kfree(syncobj);
>>>>>    }
>>>>>    EXPORT_SYMBOL(drm_syncobj_free);
>>>>> +static void drm_syncobj_timeline_init(struct drm_syncobj_timeline
>>>>> +                                     *syncobj_timeline)
>>>>> +{
>>>>> +       syncobj_timeline->timeline_context =
>>>>> dma_fence_context_alloc(1);
>>>>> +       syncobj_timeline->submission_context =
>>>>> dma_fence_context_alloc(1);
>>>>> +       syncobj_timeline->timeline = 0;
>>>>> +       syncobj_timeline->signal_point = 0;
>>>>> +
>>>>> +       syncobj_timeline->wait_pt_tree = RB_ROOT;
>>>>> +       INIT_LIST_HEAD(&syncobj_timeline->signal_pt_list);
>>>>> +}
>>>>>    /**
>>>>>     * drm_syncobj_create - create a new syncobj
>>>>>     * @out_syncobj: returned syncobj
>>>>> @@ -279,6 +652,12 @@ int drm_syncobj_create(struct drm_syncobj
>>>>> **out_syncobj, uint32_t flags,
>>>>>         kref_init(&syncobj->refcount);
>>>>>         INIT_LIST_HEAD(&syncobj->cb_list);
>>>>>         spin_lock_init(&syncobj->lock);
>>>>> +       if (flags & DRM_SYNCOBJ_CREATE_TYPE_TIMELINE) {
>>>>> +               syncobj->type = DRM_SYNCOBJ_TYPE_TIMELINE;
>>>>> +               drm_syncobj_timeline_init(&syncobj->syncobj_timeline);
>>>>> +       } else {
>>>>> +               syncobj->type = DRM_SYNCOBJ_TYPE_NORMAL;
>>>>> +       }
>>>>>         if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
>>>>>                 ret = drm_syncobj_assign_null_handle(syncobj);
>>>>> @@ -491,7 +870,7 @@ static int drm_syncobj_export_sync_file(struct
>>>>> drm_file *file_private,
>>>>>         if (fd < 0)
>>>>>                 return fd;
>>>>> -       ret = drm_syncobj_find_fence(file_private, handle, &fence);
>>>>> +       ret = drm_syncobj_find_fence(file_private, handle, &fence, 0);
>>>>>         if (ret)
>>>>>                 goto err_put_fd;
>>>>> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c
>>>>> b/drivers/gpu/drm/v3d/v3d_gem.c
>>>>> index 9029590267aa..f24c3eccb4d5 100644
>>>>> --- a/drivers/gpu/drm/v3d/v3d_gem.c
>>>>> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
>>>>> @@ -521,12 +521,12 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void
>>>>> *data,
>>>>>         kref_init(&exec->refcount);
>>>>>         ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl,
>>>>> -                                    &exec->bin.in_fence);
>>>>> +                                    &exec->bin.in_fence, 0);
>>>>>         if (ret == -EINVAL)
>>>>>                 goto fail;
>>>>>         ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl,
>>>>> -                                    &exec->render.in_fence);
>>>>> +                                    &exec->render.in_fence, 0);
>>>>>         if (ret == -EINVAL)
>>>>>                 goto fail;
>>>>> diff --git a/drivers/gpu/drm/vc4/vc4_gem.c
>>>>> b/drivers/gpu/drm/vc4/vc4_gem.c
>>>>> index 7910b9acedd6..f7b4971342e8 100644
>>>>> --- a/drivers/gpu/drm/vc4/vc4_gem.c
>>>>> +++ b/drivers/gpu/drm/vc4/vc4_gem.c
>>>>> @@ -1173,7 +1173,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void
>>>>> *data,
>>>>>         if (args->in_sync) {
>>>>>                 ret = drm_syncobj_find_fence(file_priv, args->in_sync,
>>>>> -                                            &in_fence);
>>>>> +                                            &in_fence, 0);
>>>>>                 if (ret)
>>>>>                         goto fail;
>>>>> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
>>>>> index b04c492ddbb5..b6e193c6cc9a 100644
>>>>> --- a/include/drm/drm_syncobj.h
>>>>> +++ b/include/drm/drm_syncobj.h
>>>>> @@ -54,6 +54,41 @@ const struct dma_fence_ops
>>>>> drm_syncobj_stub_fence_ops = {
>>>>>         .release = NULL,
>>>>>    };
>>>>> +enum drm_syncobj_type {
>>>>> +       DRM_SYNCOBJ_TYPE_NORMAL,
>>>>> +       DRM_SYNCOBJ_TYPE_TIMELINE
>>>>> +};
>>>>> +
>>>>> +struct drm_syncobj;
>>>>> +struct drm_syncobj_wait_pt {
>>>>> +       struct drm_syncobj_stub_fence base;
>>>>> +       struct drm_syncobj_stub_fence *submission_fence;
>>>>> +       u64    value;
>>>>> +       struct rb_node   node;
>>>>> +};
>>>>> +struct drm_syncobj_signal_pt {
>>>>> +       struct drm_syncobj_stub_fence base;
>>>>> +       struct dma_fence *signal_fence;
>>>>> +       struct dma_fence *pre_pt_base;
>>>>> +       struct dma_fence_cb signal_cb;
>>>>> +       struct dma_fence_cb pre_pt_cb;
>>>>> +       struct drm_syncobj *syncobj;
>>>>> +       u64    value;
>>>>> +       struct list_head list;
>>>>> +};
>>>>
>>>> The above internal structs shouldn't be in the public header.
>>>>
>>>> Some tiny comments about what each piece is for would also go a long way
>>>> towards me being able to understand things.
>>>>
>>>> Thanks, Daniel
>>>>
>>>>> +struct drm_syncobj_timeline {
>>>>> +       u64 timeline_context;
>>>>> +       u64 submission_context;
>>>>> +       /**
>>>>> +        * @timeline: syncobj timeline
>>>>> +        */
>>>>> +       u64 timeline;
>>>>> +       u64 signal_point;
>>>>> +
>>>>> +
>>>>> +       struct rb_root wait_pt_tree;
>>>>> +       struct list_head signal_pt_list;
>>>>> +};
>>>>>    /**
>>>>>     * struct drm_syncobj - sync object.
>>>>>     *
>>>>> @@ -64,6 +99,11 @@ struct drm_syncobj {
>>>>>          * @refcount: Reference count of this object.
>>>>>          */
>>>>>         struct kref refcount;
>>>>> +       /**
>>>>> +        * @type: indicate syncobj type
>>>>> +        */
>>>>> +       enum drm_syncobj_type type;
>>>>> +       struct drm_syncobj_timeline syncobj_timeline;
>>>>>         /**
>>>>>          * @fence:
>>>>>          * NULL or a pointer to the fence bound to this object.
>>>>> @@ -162,9 +202,12 @@ void drm_syncobj_remove_callback(struct
>>>>> drm_syncobj *syncobj,
>>>>>                                  struct drm_syncobj_cb *cb);
>>>>>    void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
>>>>>                                struct dma_fence *fence);
>>>>> +void drm_syncobj_signal_fence(struct drm_syncobj *syncobj,
>>>>> +                             struct dma_fence *fence,
>>>>> +                             u64 point);
>>>>>    int drm_syncobj_find_fence(struct drm_file *file_private,
>>>>>                            u32 handle,
>>>>> -                          struct dma_fence **fence);
>>>>> +                          struct dma_fence **fence, u64 point);
>>>>>    void drm_syncobj_free(struct kref *kref);
>>>>>    int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t
>>>>> flags,
>>>>>                        struct dma_fence *fence);
>>>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>>>> index 300f336633f2..71e6cd1c88f8 100644
>>>>> --- a/include/uapi/drm/drm.h
>>>>> +++ b/include/uapi/drm/drm.h
>>>>> @@ -717,6 +717,8 @@ struct drm_prime_handle {
>>>>>    struct drm_syncobj_create {
>>>>>         __u32 handle;
>>>>>    #define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
>>>>> +#define DRM_SYNCOBJ_CREATE_TYPE_NORMAL (1 << 1)
>>>>> +#define DRM_SYNCOBJ_CREATE_TYPE_TIMELINE (1 << 2)
>>>>>         __u32 flags;
>>>>>    };
>>>>> @@ -734,7 +736,6 @@ struct drm_syncobj_handle {
>>>>>         __s32 fd;
>>>>>         __u32 pad;
>>>>>    };
>>>>> -
>>>>>    #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
>>>>>    #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
>>>>>    struct drm_syncobj_wait {
>>>>> --
>>>>> 2.14.1
>>>>>
>>>>> _______________________________________________
>>>>> dri-devel mailing list
>>>>> dri-devel at lists.freedesktop.org
>>>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
>

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch