On 2018å¹´09æ??12æ?¥ 15:22, Christian König wrote: > Ping? Have you seen my comments here? Sorry, I didn't see this reply. inline... > > Looks like you haven't addressed any of them in your last mail. > > Christian. > > Am 06.09.2018 um 09:25 schrieb Christian König: >> Am 06.09.2018 um 08:25 schrieb Chunming Zhou: >>> VK_KHR_timeline_semaphore: >>> This extension introduces a new type of semaphore that has an >>> integer payload >>> identifying a point in a timeline. Such timeline semaphores support the >> >> Drop the term semaphore here, better use syncobj. This is from VK_KHR_timeline_semaphore extension describe, not my invention, I just quote it. In kernel side, we call syncobj, in UMD, they still call semaphore. >> >>> following operations: >>>    * CPU query - A host operation that allows querying the payload >>> of the >>>      timeline semaphore. >>>    * CPU wait - A host operation that allows a blocking wait for a >>>      timeline semaphore to reach a specified value. >>>    * Device wait - A device operation that allows waiting for a >>>      timeline semaphore to reach a specified value. >>>    * Device signal - A device operation that allows advancing the >>>      timeline semaphore to a specified value. >>> >>> Since it's a timeline, that means the front time point(PT) always is >>> signaled before the late PT. >>> a. signal PT design: >>> Signal PT fence N depends on PT[N-1] fence and signal opertion >>> fence, when PT[N] fence is signaled, >>> the timeline will increase to value of PT[N]. >>> b. wait PT design: >>> Wait PT fence is signaled by reaching timeline point value, when >>> timeline is increasing, will compare >>> wait PTs value with new timeline value, if PT value is lower than >>> timeline value, then wait PT will be >>> signaled, otherwise keep in list. semaphore wait operation can wait >>> on any point of timeline, >>> so need a RB tree to order them. And wait PT could ahead of signal >>> PT, we need a sumission fence to >>> perform that. >>> >>> v2: >>> 1. remove unused DRM_SYNCOBJ_CREATE_TYPE_NORMAL. (Christian) >>> 2. move unexposed denitions to .c file. (Daniel Vetter) >>> 3. split up the change to drm_syncobj_find_fence() in a separate >>> patch. (Christian) >>> 4. split up the change to drm_syncobj_replace_fence() in a separate >>> patch. >>> 5. drop the submission_fence implementation and instead use >>> wait_event() for that. (Christian) >>> 6. WARN_ON(point != 0) for NORMAL type syncobj case. (Daniel Vetter) >>> >>> v3: >>> 1. replace normal syncobj with timeline implemenation. (Vetter and >>> Christian) >>>     a. normal syncobj signal op will create a signal PT to tail of >>> signal pt list. >>>     b. normal syncobj wait op will create a wait pt with last >>> signal point, and this wait PT is only signaled by related signal >>> point PT. >>> 2. many bug fix and clean up >>> 3. stub fence moving is moved to other patch. >>> >>> v4ï¼? >>> 1. fix RB tree loop with while(node=rb_first(...)). (Christian) >>> 2. fix syncobj lifecycle. (Christian) >>> 3. only enable_signaling when there is wait_pt. (Christian) >>> >>> Tested by ./deqp-vk -n dEQP-VK*semaphore* for normal syncobj >>> >>> Signed-off-by: Chunming Zhou <david1.zhou at amd.com> >>> Cc: Christian Konig <christian.koenig at amd.com> >>> Cc: Dave Airlie <airlied at redhat.com> >>> Cc: Daniel Rakos <Daniel.Rakos at amd.com> >>> Cc: Daniel Vetter <daniel at ffwll.ch> >>> --- >>>  drivers/gpu/drm/drm_syncobj.c             | 516 >>> +++++++++++++++++---- >>>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |  4 +- >>>  include/drm/drm_syncobj.h                 | 78 ++-- >>>  include/uapi/drm/drm.h                    |  1 + >>>  4 files changed, 448 insertions(+), 151 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/drm_syncobj.c >>> b/drivers/gpu/drm/drm_syncobj.c >>> index e9ce623d049e..94b31de23858 100644 >>> --- a/drivers/gpu/drm/drm_syncobj.c >>> +++ b/drivers/gpu/drm/drm_syncobj.c >>> @@ -56,6 +56,9 @@ >>>  #include "drm_internal.h" >>>  #include <drm/drm_syncobj.h> >>>  +/* merge normal syncobj to timeline syncobj, the point interval >>> is 1 */ >>> +#define DRM_SYNCOBJ_NORMAL_POINT 1 >>> + >>>  struct drm_syncobj_stub_fence { >>>      struct dma_fence base; >>>      spinlock_t lock; >>> @@ -82,6 +85,18 @@ static const struct dma_fence_ops >>> drm_syncobj_stub_fence_ops = { >>>      .release = drm_syncobj_stub_fence_release, >>>  }; >>>  +struct drm_syncobj_signal_pt { >>> +   struct dma_fence_array *base; >>> +   u64   value; >>> +   struct list_head list; >>> +}; >>> +struct drm_syncobj_wait_pt { >>> +   struct drm_syncobj_stub_fence base; >>> +   struct dma_fence *signal_pt_fence; >>> +   struct dma_fence_cb wait_cb; >>> +   struct rb_node  node; >>> +   u64   value; >>> +}; >>>   /** >>>   * drm_syncobj_find - lookup and reference a sync object. >>> @@ -109,61 +124,238 @@ struct drm_syncobj *drm_syncobj_find(struct >>> drm_file *file_private, >>>  } >>>  EXPORT_SYMBOL(drm_syncobj_find); >>>  -static void drm_syncobj_add_callback_locked(struct drm_syncobj >>> *syncobj, >>> -                       struct drm_syncobj_cb *cb, >>> -                       drm_syncobj_func_t func) >>> +static void drm_syncobj_timeline_init(struct drm_syncobj *syncobj, >>> +                     struct drm_syncobj_timeline *syncobj_timeline) >> >> Since we merged timeline and singleton syncobj you can drop the extra >> _timeline_ part in the function name I think. Will try in v5. >> >>>  { >>> -   cb->func = func; >>> -   list_add_tail(&cb->node, &syncobj->cb_list); >>> +   spin_lock(&syncobj->lock); >>> +   syncobj_timeline->timeline_context = dma_fence_context_alloc(1); >>> +   syncobj_timeline->timeline = 0; >>> +   syncobj_timeline->signal_point = 0; >>> +   init_waitqueue_head(&syncobj_timeline->wq); >>> + >>> +   syncobj_timeline->wait_pt_tree = RB_ROOT; >>> +   INIT_LIST_HEAD(&syncobj_timeline->signal_pt_list); >>> +   spin_unlock(&syncobj->lock); >>>  } >>>  -static int drm_syncobj_fence_get_or_add_callback(struct >>> drm_syncobj *syncobj, >>> -                        struct dma_fence **fence, >>> -                        struct drm_syncobj_cb *cb, >>> -                        drm_syncobj_func_t func) >>> +static void drm_syncobj_timeline_fini(struct drm_syncobj *syncobj, >>> +                     struct drm_syncobj_timeline *syncobj_timeline) >>>  { >>> -   int ret; >>> - >>> -   *fence = drm_syncobj_fence_get(syncobj); >>> -   if (*fence) >>> -       return 1; >>> +   struct rb_node *node = NULL; >>> +   struct drm_syncobj_wait_pt *wait_pt = NULL; >>> +   struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp; >>>       spin_lock(&syncobj->lock); >>> -   /* We've already tried once to get a fence and failed. Now that we >>> -    * have the lock, try one more time just to be sure we don't add a >>> -    * callback when a fence has already been set. >>> -    */ >>> -   if (syncobj->fence) { >>> -       *fence = >>> dma_fence_get(rcu_dereference_protected(syncobj->fence, >>> - lockdep_is_held(&syncobj->lock))); >>> -       ret = 1; >>> -   } else { >>> -       *fence = NULL; >>> -       drm_syncobj_add_callback_locked(syncobj, cb, func); >>> -       ret = 0; >>> +   if (!RB_EMPTY_ROOT(&syncobj_timeline->wait_pt_tree)) >>> +       DRM_ERROR("There still are advanced wait_pts! pls check!"); >>> +   while ((node = rb_first(&syncobj_timeline->wait_pt_tree))) { >>> +       wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node); >>> +       rb_erase(&wait_pt->node, >>> +            &syncobj_timeline->wait_pt_tree); >>> +       RB_CLEAR_NODE(&wait_pt->node); >>> +       if (!wait_pt->signal_pt_fence) { >>> +           /* if wait_pt still is advanced, fake signal it. >>> +            * generally shouldn't happens */ >>> +           dma_fence_set_error(&wait_pt->base.base, -ENOENT); >>> +           dma_fence_signal(&wait_pt->base.base); >>> +       } >>> +       /* kfree(wait_pt) is excuted by fence put */ >>> +       dma_fence_put(&wait_pt->base.base); >>> +   } >>> +   list_for_each_entry_safe(signal_pt, tmp, >>> +                &syncobj_timeline->signal_pt_list, list) { >>> +       list_del(&signal_pt->list); >>> +       dma_fence_put(&signal_pt->base->base); >>> +       kfree(signal_pt); >>>      } >>>      spin_unlock(&syncobj->lock); >>> +} >>>  -   return ret; >>> +static void wait_pt_func(struct dma_fence *fence, struct >>> dma_fence_cb *cb) >>> +{ >>> +   struct drm_syncobj_wait_pt *wait_pt = >>> +       container_of(cb, struct drm_syncobj_wait_pt, wait_cb); >>> + >>> +   dma_fence_signal(&wait_pt->base.base); >>> +   dma_fence_put(wait_pt->signal_pt_fence); >>> +   wait_pt->signal_pt_fence = NULL; >>> +   dma_fence_put(&wait_pt->base.base); >>> +} >>> + >>> +static void drm_syncobj_find_signal_pt_for_wait_pt(struct >>> drm_syncobj *syncobj, >>> +                          struct drm_syncobj_wait_pt *wait_pt) >>> +{ >> >> That whole approach still looks horrible complicated to me. It's already very close to what you said before. >> >> Especially the separation of signal and wait pt is completely >> unnecessary as far as I can see. >> When a wait pt is requested we just need to search for the signal >> point which it will trigger. Yeah, I tried this, but when I implement cpu wait ioctl on specific point, we need a advanced wait pt fence, otherwise, we could still need old syncobj cb. Thanks, David Zhou >> >> Regards, >> Christian. >> >>> +   struct drm_syncobj_timeline *timeline = >>> &syncobj->syncobj_timeline; >>> +   struct drm_syncobj_signal_pt *signal_pt; >>> +   int ret; >>> + >>> +   if (wait_pt->signal_pt_fence) { >>> +       return; >>> +   } else if ((syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) && >>> +          (wait_pt->value <= timeline->timeline)) { >>> +       dma_fence_signal(&wait_pt->base.base); >>> +       rb_erase(&wait_pt->node, >>> +            &timeline->wait_pt_tree); >>> +       RB_CLEAR_NODE(&wait_pt->node); >>> +       dma_fence_put(&wait_pt->base.base); >>> +       return; >>> +   } >>> + >>> +   list_for_each_entry(signal_pt, &timeline->signal_pt_list, list) { >>> +       if (wait_pt->value < signal_pt->value) >>> +           continue; >>> +       if ((syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) && >>> +           (wait_pt->value != signal_pt->value)) >>> +           continue; >>> +       wait_pt->signal_pt_fence = >>> dma_fence_get(&signal_pt->base->base); >>> +       ret = dma_fence_add_callback(wait_pt->signal_pt_fence, >>> +                        &wait_pt->wait_cb, >>> +                        wait_pt_func); >>> +       if (ret == -ENOENT) { >>> +           dma_fence_signal(&wait_pt->base.base); >>> +           dma_fence_put(wait_pt->signal_pt_fence); >>> +           wait_pt->signal_pt_fence = NULL; >>> +           rb_erase(&wait_pt->node, >>> +                &timeline->wait_pt_tree); >>> +           RB_CLEAR_NODE(&wait_pt->node); >>> +           dma_fence_put(&wait_pt->base.base); >>> +       } else if (ret < 0) { >>> +           dma_fence_put(wait_pt->signal_pt_fence); >>> +           DRM_ERROR("add callback error!"); >>> +       } else { >>> +           /* after adding callback, remove from rb tree */ >>> +           rb_erase(&wait_pt->node, >>> +                &timeline->wait_pt_tree); >>> +           RB_CLEAR_NODE(&wait_pt->node); >>> +       } >>> +       return; >>> +   } >>> +   /* signaled pt was released */ >>> +   if (!wait_pt->signal_pt_fence && (wait_pt->value <= >>> +                     timeline->signal_point)) { >>> +       dma_fence_signal(&wait_pt->base.base); >>> +       rb_erase(&wait_pt->node, >>> +            &timeline->wait_pt_tree); >>> +       RB_CLEAR_NODE(&wait_pt->node); >>> +       dma_fence_put(&wait_pt->base.base); >>> +   } >>>  } >>>  -void drm_syncobj_add_callback(struct drm_syncobj *syncobj, >>> -                 struct drm_syncobj_cb *cb, >>> -                 drm_syncobj_func_t func) >>> +static int drm_syncobj_timeline_create_signal_pt(struct drm_syncobj >>> *syncobj, >>> +                        struct dma_fence *fence, >>> +                        u64 point) >>>  { >>> +   struct drm_syncobj_signal_pt *signal_pt = >>> +       kzalloc(sizeof(struct drm_syncobj_signal_pt), GFP_KERNEL); >>> +   struct drm_syncobj_signal_pt *tail_pt; >>> +   struct dma_fence **fences; >>> +   struct rb_node *node; >>> +   struct drm_syncobj_wait_pt *tail_wait_pt = NULL; >>> +   int num_fences = 0; >>> +   int ret = 0, i; >>> + >>> +   if (!signal_pt) >>> +       return -ENOMEM; >>> +   if (syncobj->syncobj_timeline.signal_point >= point) { >>> +       DRM_WARN("A later signal is ready!"); >>> +       goto out; >>> +   } >>> +   if (!fence) >>> +       goto out; >>> + >>> +   fences = kmalloc_array(sizeof(void *), 2, GFP_KERNEL); >>> +   if (!fences) >>> +       goto out; >>> +   fences[num_fences++] = dma_fence_get(fence); >>> +   /* timeline syncobj must take this dependency */ >>> +   if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) { >>> +       spin_lock(&syncobj->lock); >>> +       if (!list_empty(&syncobj->syncobj_timeline.signal_pt_list)) { >>> +           tail_pt = >>> list_last_entry(&syncobj->syncobj_timeline.signal_pt_list, >>> +                         struct drm_syncobj_signal_pt, list); >>> +           fences[num_fences++] = >>> dma_fence_get(&tail_pt->base->base); >>> +       } >>> +       spin_unlock(&syncobj->lock); >>> +   } >>> +   signal_pt->base = dma_fence_array_create(num_fences, fences, >>> + syncobj->syncobj_timeline.timeline_context, >>> +                        point, false); >>> +   if (!signal_pt->base) >>> +       goto fail; >>> + >>> +   signal_pt->value = point; >>>      spin_lock(&syncobj->lock); >>> -   drm_syncobj_add_callback_locked(syncobj, cb, func); >>> +   syncobj->syncobj_timeline.signal_point = point; >>> +   INIT_LIST_HEAD(&signal_pt->list); >>> +   list_add_tail(&signal_pt->list, >>> &syncobj->syncobj_timeline.signal_pt_list); >>> + >>> +   /* check if there is advanced wait */ >>> +   node = rb_last(&syncobj->syncobj_timeline.wait_pt_tree); >>> +   if (node) >>> +       tail_wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, >>> node); >>> +   if (tail_wait_pt && !tail_wait_pt->signal_pt_fence) { >>> +       for (node = rb_first(&syncobj->syncobj_timeline.wait_pt_tree); >>> +            node != NULL; node = rb_next(node)) { >>> +           struct drm_syncobj_wait_pt *wait_pt = >>> +               rb_entry(node, struct drm_syncobj_wait_pt, >>> +                    node); >>> + >>> +           drm_syncobj_find_signal_pt_for_wait_pt(syncobj, >>> +                                  wait_pt); >>> +       } >>> +   } >>> + >>>      spin_unlock(&syncobj->lock); >>> +   wake_up_all(&syncobj->syncobj_timeline.wq); >>> + >>> +   return 0; >>> +fail: >>> +   for (i = 0; i < num_fences; i++) >>> +       dma_fence_put(fences[i]); >>> +   kfree(fences); >>> +out: >>> +   kfree(signal_pt); >>> +   return ret; >>>  } >>>  -void drm_syncobj_remove_callback(struct drm_syncobj *syncobj, >>> -                struct drm_syncobj_cb *cb) >>> +static void drm_syncobj_timeline_garbage_collection(struct >>> drm_syncobj *syncobj) >>>  { >>> +   struct drm_syncobj_timeline *timeline = >>> &syncobj->syncobj_timeline; >>> +   struct rb_node *node; >>> +   struct drm_syncobj_wait_pt *wait_pt; >>> +   struct drm_syncobj_signal_pt *signal_pt, *tmp; >>> + >>>      spin_lock(&syncobj->lock); >>> -   list_del_init(&cb->node); >>> +   node = rb_first(&timeline->wait_pt_tree); >>> +   while (node) { >>> +       wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node); >>> +       if (!dma_fence_is_signaled(&wait_pt->base.base)) { >>> +           node = rb_next(node); >>> +           continue; >>> +       } >>> +       rb_erase(&wait_pt->node, >>> +            &timeline->wait_pt_tree); >>> +       RB_CLEAR_NODE(&wait_pt->node); >>> +       /* kfree(wait_pt) is excuted by fence put */ >>> +       dma_fence_put(&wait_pt->base.base); >>> +       node = rb_first(&timeline->wait_pt_tree); >>> +   } >>> +   list_for_each_entry_safe(signal_pt, tmp, >>> +                &timeline->signal_pt_list, list) { >>> +       if (dma_fence_is_signaled(&signal_pt->base->base)) { >>> +           timeline->timeline = signal_pt->value; >>> +           list_del(&signal_pt->list); >>> +           dma_fence_put(&signal_pt->base->base); >>> +           kfree(signal_pt); >>> +       } else { >>> +           /*signal_pt is in order in list, from small to big, so >>> +            * the later must not be signal either */ >>> +           break; >>> +       } >>> +   } >>> + >>>      spin_unlock(&syncobj->lock); >>>  } >>> - >>>  /** >>>   * drm_syncobj_replace_fence - replace fence in a sync object. >>>   * @syncobj: Sync object to replace fence in >>> @@ -176,28 +368,29 @@ void drm_syncobj_replace_fence(struct >>> drm_syncobj *syncobj, >>>                     u64 point, >>>                     struct dma_fence *fence) >>>  { >>> -   struct dma_fence *old_fence; >>> -   struct drm_syncobj_cb *cur, *tmp; >>> - >>> -   if (fence) >>> -       dma_fence_get(fence); >>> - >>> -   spin_lock(&syncobj->lock); >>> - >>> -   old_fence = rcu_dereference_protected(syncobj->fence, >>> - lockdep_is_held(&syncobj->lock)); >>> -   rcu_assign_pointer(syncobj->fence, fence); >>> - >>> -   if (fence != old_fence) { >>> -       list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) { >>> -           list_del_init(&cur->node); >>> -           cur->func(syncobj, cur); >>> +   drm_syncobj_timeline_garbage_collection(syncobj); >>> +   if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) { >>> +       if (fence) >>> +           drm_syncobj_timeline_create_signal_pt(syncobj, fence, >>> +                                 point); >>> +       return; >>> +   } else if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) { >>> +       u64 pt_value; >>> + >>> +       if (!fence) { >>> +           drm_syncobj_timeline_fini(syncobj, >>> +                         &syncobj->syncobj_timeline); >>> +           drm_syncobj_timeline_init(syncobj, >>> +                         &syncobj->syncobj_timeline); >>> +           return; >>>          } >>> +       pt_value = syncobj->syncobj_timeline.signal_point + >>> +           DRM_SYNCOBJ_NORMAL_POINT; >>> +       drm_syncobj_timeline_create_signal_pt(syncobj, fence, >>> pt_value); >>> +       return; >>> +   } else { >>> +       DRM_ERROR("the syncobj type isn't support\n"); >>>      } >>> - >>> -   spin_unlock(&syncobj->lock); >>> - >>> -   dma_fence_put(old_fence); >>>  } >>>  EXPORT_SYMBOL(drm_syncobj_replace_fence); >>>  @@ -220,6 +413,120 @@ static int >>> drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj) >>>      return 0; >>>  } >>>  +static struct drm_syncobj_wait_pt * >>> +drm_syncobj_timeline_lookup_wait_pt_fence(struct drm_syncobj *syncobj, >>> +                     u64 point, >>> +                     struct dma_fence **fence) >>> +{ >>> +   struct rb_node *node = >>> syncobj->syncobj_timeline.wait_pt_tree.rb_node; >>> +   struct drm_syncobj_wait_pt *wait_pt = NULL; >>> + >>> +   spin_lock(&syncobj->lock); >>> +   while(node) { >>> +       int result; >>> + >>> +       wait_pt = rb_entry(node, struct drm_syncobj_wait_pt, node); >>> +       result = point - wait_pt->value; >>> +       if (result < 0) { >>> +           node = node->rb_left; >>> +       } else if (result > 0) { >>> +           node = node->rb_right; >>> +       } else { >>> +           if (fence) >>> +               *fence = dma_fence_get(&wait_pt->base.base); >>> +           break; >>> +       } >>> +   } >>> +   spin_unlock(&syncobj->lock); >>> + >>> +   return wait_pt; >>> +} >>> + >>> +static struct drm_syncobj_wait_pt * >>> +drm_syncobj_timeline_create_wait_pt_and_return_fence(struct >>> drm_syncobj *syncobj, >>> +                            u64 point, >>> +                            struct dma_fence **fence) >>> +{ >>> +   struct drm_syncobj_wait_pt *wait_pt = kzalloc(sizeof(*wait_pt), >>> +                             GFP_KERNEL); >>> +   struct drm_syncobj_stub_fence *base; >>> +   struct rb_node **new = >>> &(syncobj->syncobj_timeline.wait_pt_tree.rb_node), *parent = NULL; >>> + >>> +   if (!wait_pt) >>> +       return NULL; >>> +   base = &wait_pt->base; >>> +   spin_lock_init(&base->lock); >>> +   dma_fence_init(&base->base, >>> +              &drm_syncobj_stub_fence_ops, >>> +              &base->lock, >>> +              syncobj->syncobj_timeline.timeline_context, point); >>> +   wait_pt->value = point; >>> +   wait_pt->signal_pt_fence = NULL; >>> + >>> +   /* wait pt must be in an order, so that we can easily lookup >>> and signal >>> +    * it */ >>> +   spin_lock(&syncobj->lock); >>> +   while(*new) { >>> +       struct drm_syncobj_wait_pt *this = >>> +           rb_entry(*new, struct drm_syncobj_wait_pt, node); >>> +       int result = wait_pt->value - this->value; >>> + >>> +       parent = *new; >>> +       if (result < 0) >>> +           new = &((*new)->rb_left); >>> +       else if (result > 0) >>> +           new = &((*new)->rb_right); >>> +       else >>> +           goto exist; >>> +   } >>> +   if (fence) >>> +       *fence = dma_fence_get(&wait_pt->base.base); >>> +   rb_link_node(&wait_pt->node, parent, new); >>> +   rb_insert_color(&wait_pt->node, >>> &syncobj->syncobj_timeline.wait_pt_tree); >>> +   spin_unlock(&syncobj->lock); >>> +   return wait_pt; >>> +exist: >>> +   spin_unlock(&syncobj->lock); >>> +   dma_fence_put(&wait_pt->base.base); >>> +   wait_pt = drm_syncobj_timeline_lookup_wait_pt_fence(syncobj, >>> point, >>> +                               fence); >>> +   return wait_pt; >>> +} >>> + >>> +static struct dma_fence * >>> +drm_syncobj_timeline_point_get(struct drm_syncobj *syncobj, u64 >>> point, u64 flags) >>> +{ >>> +   struct drm_syncobj_wait_pt *wait_pt; >>> +   struct dma_fence *fence = NULL; >>> + >>> +   /* check if the wait pt exists */ >>> +   wait_pt = drm_syncobj_timeline_lookup_wait_pt_fence(syncobj, >>> point, >>> +                               &fence); >>> +   if (!fence) { >>> +       /* This is a new wait pt, so create it */ >>> +       wait_pt = >>> drm_syncobj_timeline_create_wait_pt_and_return_fence(syncobj, point, >>> +                                 &fence); >>> +       if (!fence) >>> +           return NULL; >>> +   } >>> +   if (wait_pt) { >>> +       int ret = 0; >>> + >>> +       if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { >>> +           ret = >>> wait_event_interruptible_timeout(syncobj->syncobj_timeline.wq, >>> +                                  wait_pt->value <= >>> syncobj->syncobj_timeline.signal_point, >>> +                                  msecs_to_jiffies(10000)); /* >>> wait 10s */ >>> +           if (ret <= 0) >>> +               return NULL; >>> +       } >>> +       spin_lock(&syncobj->lock); >>> +       drm_syncobj_find_signal_pt_for_wait_pt(syncobj, wait_pt); >>> +       spin_unlock(&syncobj->lock); >>> +       return fence; >>> +   } >>> +   return NULL; >>> +} >>> + >>>  /** >>>   * drm_syncobj_find_fence - lookup and reference the fence in a >>> sync object >>>   * @file_private: drm file private pointer >>> @@ -234,20 +541,46 @@ static int >>> drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj) >>>   * contains a reference to the fence, which must be released by >>> calling >>>   * dma_fence_put(). >>>   */ >>> -int drm_syncobj_find_fence(struct drm_file *file_private, >>> -              u32 handle, u64 point, >>> -              struct dma_fence **fence) >>> +int drm_syncobj_search_fence(struct drm_syncobj *syncobj, u64 point, >>> +                u64 flags, struct dma_fence **fence) >>>  { >>> -   struct drm_syncobj *syncobj = drm_syncobj_find(file_private, >>> handle); >>>      int ret = 0; >>>       if (!syncobj) >>>          return -ENOENT; >>>  -   *fence = drm_syncobj_fence_get(syncobj); >>> +   drm_syncobj_timeline_garbage_collection(syncobj); >>> +   if (syncobj->type == DRM_SYNCOBJ_TYPE_NORMAL) { >>> +       /*NORMAL syncobj always wait on last pt */ >>> +       u64 tail_pt_value = syncobj->syncobj_timeline.signal_point; >>> + >>> +       if (tail_pt_value == 0) >>> +           tail_pt_value += DRM_SYNCOBJ_NORMAL_POINT; >>> +       /* NORMAL syncobj doesn't care point value */ >>> +       WARN_ON(point != 0); >>> +       *fence = drm_syncobj_timeline_point_get(syncobj, >>> tail_pt_value, >>> +                           flags); >>> +   } else if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) { >>> +       *fence = drm_syncobj_timeline_point_get(syncobj, point, >>> +                           flags); >>> +   } else { >>> +       DRM_ERROR("Don't support this type syncobj\n"); >>> +       *fence = NULL; >>> +   } >>>      if (!*fence) { >>>          ret = -EINVAL; >>>      } >>> +   return ret; >>> +} >>> +EXPORT_SYMBOL(drm_syncobj_search_fence); >>> +int drm_syncobj_find_fence(struct drm_file *file_private, >>> +              u32 handle, u64 point, >>> +              struct dma_fence **fence) { >>> +   struct drm_syncobj *syncobj = drm_syncobj_find(file_private, >>> handle); >>> + >>> +   int ret = drm_syncobj_search_fence(syncobj, point, >>> +                   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, >>> +                   fence); >>>      drm_syncobj_put(syncobj); >>>      return ret; >>>  } >>> @@ -264,7 +597,7 @@ void drm_syncobj_free(struct kref *kref) >>>      struct drm_syncobj *syncobj = container_of(kref, >>>                             struct drm_syncobj, >>>                             refcount); >>> -   drm_syncobj_replace_fence(syncobj, 0, NULL); >>> +   drm_syncobj_timeline_fini(syncobj, &syncobj->syncobj_timeline); >>>      kfree(syncobj); >>>  } >>>  EXPORT_SYMBOL(drm_syncobj_free); >>> @@ -292,8 +625,12 @@ int drm_syncobj_create(struct drm_syncobj >>> **out_syncobj, uint32_t flags, >>>          return -ENOMEM; >>>       kref_init(&syncobj->refcount); >>> -   INIT_LIST_HEAD(&syncobj->cb_list); >>>      spin_lock_init(&syncobj->lock); >>> +   if (flags & DRM_SYNCOBJ_CREATE_TYPE_TIMELINE) >>> +       syncobj->type = DRM_SYNCOBJ_TYPE_TIMELINE; >>> +   else >>> +       syncobj->type = DRM_SYNCOBJ_TYPE_NORMAL; >>> +   drm_syncobj_timeline_init(syncobj, &syncobj->syncobj_timeline); >>>       if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) { >>>          ret = drm_syncobj_assign_null_handle(syncobj); >>> @@ -651,7 +988,6 @@ struct syncobj_wait_entry { >>>      struct task_struct *task; >>>      struct dma_fence *fence; >>>      struct dma_fence_cb fence_cb; >>> -   struct drm_syncobj_cb syncobj_cb; >>>  }; >>>   static void syncobj_wait_fence_func(struct dma_fence *fence, >>> @@ -663,18 +999,6 @@ static void syncobj_wait_fence_func(struct >>> dma_fence *fence, >>>      wake_up_process(wait->task); >>>  } >>>  -static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj, >>> -                     struct drm_syncobj_cb *cb) >>> -{ >>> -   struct syncobj_wait_entry *wait = >>> -       container_of(cb, struct syncobj_wait_entry, syncobj_cb); >>> - >>> -   /* This happens inside the syncobj lock */ >>> -   wait->fence = >>> dma_fence_get(rcu_dereference_protected(syncobj->fence, >>> - lockdep_is_held(&syncobj->lock))); >>> -   wake_up_process(wait->task); >>> -} >>> - >>>  static signed long drm_syncobj_array_wait_timeout(struct >>> drm_syncobj **syncobjs, >>>                            uint32_t count, >>>                            uint32_t flags, >>> @@ -698,14 +1022,11 @@ static signed long >>> drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, >>>      signaled_count = 0; >>>      for (i = 0; i < count; ++i) { >>>          entries[i].task = current; >>> -       entries[i].fence = drm_syncobj_fence_get(syncobjs[i]); >>> +       ret = drm_syncobj_search_fence(syncobjs[i], 0, 0, >>> +                          &entries[i].fence); >>>          if (!entries[i].fence) { >>> -           if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { >>> -               continue; >>> -           } else { >>> -               ret = -EINVAL; >>> -               goto cleanup_entries; >>> -           } >>> +           ret = -EINVAL; >>> +           goto cleanup_entries; >>>          } >>>           if (dma_fence_is_signaled(entries[i].fence)) { >>> @@ -733,15 +1054,6 @@ static signed long >>> drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, >>>       * fallthough and try a 0 timeout wait! >>>       */ >>>  -   if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { >>> -       for (i = 0; i < count; ++i) { >>> - drm_syncobj_fence_get_or_add_callback(syncobjs[i], >>> -                                 &entries[i].fence, >>> -                                 &entries[i].syncobj_cb, >>> -                                 syncobj_wait_syncobj_func); >>> -       } >>> -   } >>> - >>>      do { >>>          set_current_state(TASK_INTERRUPTIBLE); >>>  @@ -789,13 +1101,10 @@ static signed long >>> drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, >>>   cleanup_entries: >>>      for (i = 0; i < count; ++i) { >>> -       if (entries[i].syncobj_cb.func) >>> -           drm_syncobj_remove_callback(syncobjs[i], >>> -                           &entries[i].syncobj_cb); >>> +       dma_fence_put(entries[i].fence); >>>          if (entries[i].fence_cb.func) >>>              dma_fence_remove_callback(entries[i].fence, >>>                            &entries[i].fence_cb); >>> -       dma_fence_put(entries[i].fence); >>>      } >>>      kfree(entries); >>>  @@ -970,12 +1279,21 @@ drm_syncobj_reset_ioctl(struct drm_device >>> *dev, void *data, >>>      if (ret < 0) >>>          return ret; >>>  -   for (i = 0; i < args->count_handles; i++) >>> -       drm_syncobj_replace_fence(syncobjs[i], 0, NULL); >>> - >>> +   for (i = 0; i < args->count_handles; i++) { >>> +       if (syncobjs[i]->type == DRM_SYNCOBJ_TYPE_TIMELINE) { >>> +           DRM_ERROR("timeline syncobj cannot reset!\n"); >>> +           ret = -EINVAL; >>> +           goto out; >>> +       } >>> +       drm_syncobj_timeline_fini(syncobjs[i], >>> +                     &syncobjs[i]->syncobj_timeline); >>> +       drm_syncobj_timeline_init(syncobjs[i], >>> +                     &syncobjs[i]->syncobj_timeline); >>> +   } >>> +out: >>>      drm_syncobj_array_free(syncobjs, args->count_handles); >>>  -   return 0; >>> +   return ret; >>>  } >>>   int >>> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c >>> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c >>> index 0a8d2d64f380..579e91a5858b 100644 >>> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c >>> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c >>> @@ -2137,7 +2137,9 @@ await_fence_array(struct i915_execbuffer *eb, >>>          if (!(flags & I915_EXEC_FENCE_WAIT)) >>>              continue; >>>  -       fence = drm_syncobj_fence_get(syncobj); >>> +       drm_syncobj_search_fence(syncobj, 0, >>> +                    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, >>> +                    &fence); >>>          if (!fence) >>>              return -EINVAL; >>>  diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h >>> index 425432b85a87..657c97dc25ec 100644 >>> --- a/include/drm/drm_syncobj.h >>> +++ b/include/drm/drm_syncobj.h >>> @@ -30,6 +30,25 @@ >>>   struct drm_syncobj_cb; >>>  +enum drm_syncobj_type { >>> +   DRM_SYNCOBJ_TYPE_NORMAL, >>> +   DRM_SYNCOBJ_TYPE_TIMELINE >>> +}; >>> + >>> +struct drm_syncobj_timeline { >>> +   wait_queue_head_t   wq; >>> +   u64 timeline_context; >>> +   /** >>> +    * @timeline: syncobj timeline >>> +    */ >>> +   u64 timeline; >>> +   u64 signal_point; >>> + >>> + >>> +   struct rb_root wait_pt_tree; >>> +   struct list_head signal_pt_list; >>> +}; >>> + >>>  /** >>>   * struct drm_syncobj - sync object. >>>   * >>> @@ -41,19 +60,16 @@ struct drm_syncobj { >>>       */ >>>      struct kref refcount; >>>      /** >>> -    * @fence: >>> -    * NULL or a pointer to the fence bound to this object. >>> -    * >>> -    * This field should not be used directly. Use >>> drm_syncobj_fence_get() >>> -    * and drm_syncobj_replace_fence() instead. >>> +    * @type: indicate syncobj type >>>       */ >>> -   struct dma_fence __rcu *fence; >>> +   enum drm_syncobj_type type; >>>      /** >>> -    * @cb_list: List of callbacks to call when the &fence gets >>> replaced. >>> +    * @syncobj_timeline: timeline >>>       */ >>> -   struct list_head cb_list; >>> +   struct drm_syncobj_timeline syncobj_timeline; >>> + >>>      /** >>> -    * @lock: Protects &cb_list and write-locks &fence. >>> +    * @lock: Protects timeline list and write-locks &fence. >>>       */ >>>      spinlock_t lock; >>>      /** >>> @@ -62,25 +78,6 @@ struct drm_syncobj { >>>      struct file *file; >>>  }; >>>  -typedef void (*drm_syncobj_func_t)(struct drm_syncobj *syncobj, >>> -                  struct drm_syncobj_cb *cb); >>> - >>> -/** >>> - * struct drm_syncobj_cb - callback for drm_syncobj_add_callback >>> - * @node: used by drm_syncob_add_callback to append this struct to >>> - *     &drm_syncobj.cb_list >>> - * @func: drm_syncobj_func_t to call >>> - * >>> - * This struct will be initialized by drm_syncobj_add_callback, >>> additional >>> - * data can be passed along by embedding drm_syncobj_cb in another >>> struct. >>> - * The callback will get called the next time >>> drm_syncobj_replace_fence is >>> - * called. >>> - */ >>> -struct drm_syncobj_cb { >>> -   struct list_head node; >>> -   drm_syncobj_func_t func; >>> -}; >>> - >>>  void drm_syncobj_free(struct kref *kref); >>>   /** >>> @@ -106,29 +103,6 @@ drm_syncobj_put(struct drm_syncobj *obj) >>>      kref_put(&obj->refcount, drm_syncobj_free); >>>  } >>>  -/** >>> - * drm_syncobj_fence_get - get a reference to a fence in a sync object >>> - * @syncobj: sync object. >>> - * >>> - * This acquires additional reference to &drm_syncobj.fence >>> contained in @obj, >>> - * if not NULL. It is illegal to call this without already holding >>> a reference. >>> - * No locks required. >>> - * >>> - * Returns: >>> - * Either the fence of @obj or NULL if there's none. >>> - */ >>> -static inline struct dma_fence * >>> -drm_syncobj_fence_get(struct drm_syncobj *syncobj) >>> -{ >>> -   struct dma_fence *fence; >>> - >>> -   rcu_read_lock(); >>> -   fence = dma_fence_get_rcu_safe(&syncobj->fence); >>> -   rcu_read_unlock(); >>> - >>> -   return fence; >>> -} >>> - >>>  struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private, >>>                       u32 handle); >>>  void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, u64 >>> point, >>> @@ -142,5 +116,7 @@ int drm_syncobj_create(struct drm_syncobj >>> **out_syncobj, uint32_t flags, >>>  int drm_syncobj_get_handle(struct drm_file *file_private, >>>                 struct drm_syncobj *syncobj, u32 *handle); >>>  int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd); >>> +int drm_syncobj_search_fence(struct drm_syncobj *syncobj, u64 point, >>> +                u64 flags, struct dma_fence **fence); >>>   #endif >>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h >>> index 300f336633f2..cebdb2541eb7 100644 >>> --- a/include/uapi/drm/drm.h >>> +++ b/include/uapi/drm/drm.h >>> @@ -717,6 +717,7 @@ struct drm_prime_handle { >>>  struct drm_syncobj_create { >>>      __u32 handle; >>>  #define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0) >>> +#define DRM_SYNCOBJ_CREATE_TYPE_TIMELINE (1 << 1) >>>      __u32 flags; >>>  }; >> > > _______________________________________________ > amd-gfx mailing list > amd-gfx at lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx