On Thu, Jun 29, 2017 at 01:59:30PM +0100, Chris Wilson wrote: > Reduce the list iteration when incrementing the timeline by storing the > fences in increasing order. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Sumit Semwal <sumit.semwal@xxxxxxxxxx> > Cc: Sean Paul <seanpaul@xxxxxxxxxxxx> > Cc: Gustavo Padovan <gustavo@xxxxxxxxxxx> > --- > drivers/dma-buf/sw_sync.c | 49 ++++++++++++++++++++++++++++++++++++++------ > drivers/dma-buf/sync_debug.h | 5 +++++ > 2 files changed, 48 insertions(+), 6 deletions(-) > > diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c > index e51fe11bbbea..8cef5d345316 100644 > --- a/drivers/dma-buf/sw_sync.c > +++ b/drivers/dma-buf/sw_sync.c > @@ -96,6 +96,7 @@ static struct sync_timeline *sync_timeline_create(const char *name) > obj->context = dma_fence_context_alloc(1); > strlcpy(obj->name, name, sizeof(obj->name)); > > + obj->pt_tree = RB_ROOT; > INIT_LIST_HEAD(&obj->pt_list); > spin_lock_init(&obj->lock); > > @@ -142,9 +143,13 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) > > obj->value += inc; > > - list_for_each_entry_safe(pt, next, &obj->pt_list, link) > - if (dma_fence_is_signaled_locked(&pt->base)) > - list_del_init(&pt->link); > + list_for_each_entry_safe(pt, next, &obj->pt_list, link) { > + if (!dma_fence_is_signaled_locked(&pt->base)) > + break; > + > + list_del_init(&pt->link); > + rb_erase(&pt->node, &obj->pt_tree); > + } > > spin_unlock_irq(&obj->lock); > } > @@ -174,8 +179,38 @@ static struct sync_pt *sync_pt_create(struct sync_timeline *obj, > INIT_LIST_HEAD(&pt->link); > > spin_lock_irq(&obj->lock); > - if (!dma_fence_is_signaled_locked(&pt->base)) > - list_add_tail(&pt->link, &obj->pt_list); > + if (!dma_fence_is_signaled_locked(&pt->base)) { > + struct rb_node **p = &obj->pt_tree.rb_node; > + struct rb_node *parent = NULL; > + > + while (*p) { > + struct sync_pt *other; > + int cmp; > + > + parent = *p; > + other = rb_entry(parent, typeof(*pt), node); > + cmp = value - other->base.seqno; We're imposing an implicit limitation on userspace here that points cannot be > INT_MAX apart (since they'll be in the wrong order in the tree). I wonder how much this patch will actually save, given that the number of active points on a given timeline should be small. Do we have any evidence that this optimization is warranted? Sean > + if (cmp > 0) { > + p = &parent->rb_right; > + } else if (cmp < 0) { > + p = &parent->rb_left; > + } else { > + if (dma_fence_get_rcu(&other->base)) { > + dma_fence_put(&pt->base); > + pt = other; > + goto unlock; > + } > + p = &parent->rb_left; > + } > + } > + rb_link_node(&pt->node, parent, p); > + rb_insert_color(&pt->node, &obj->pt_tree); > + > + parent = rb_next(&pt->node); > + list_add_tail(&pt->link, > + parent ? &rb_entry(parent, typeof(*pt), node)->link : &obj->pt_list); > + } > +unlock: > spin_unlock_irq(&obj->lock); > > return pt; > @@ -201,8 +236,10 @@ static void timeline_fence_release(struct dma_fence *fence) > > spin_lock_irqsave(fence->lock, flags); > > - if (!list_empty(&pt->link)) > + if (!list_empty(&pt->link)) { > list_del(&pt->link); > + rb_erase(&pt->node, &parent->pt_tree); > + } > > spin_unlock_irqrestore(fence->lock, flags); > > diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h > index 899ba0e19fd3..b7a5fab12179 100644 > --- a/drivers/dma-buf/sync_debug.h > +++ b/drivers/dma-buf/sync_debug.h > @@ -14,6 +14,7 @@ > #define _LINUX_SYNC_H > > #include <linux/list.h> > +#include <linux/rbtree.h> > #include <linux/spinlock.h> > #include <linux/dma-fence.h> > > @@ -25,6 +26,7 @@ > * @kref: reference count on fence. > * @name: name of the sync_timeline. Useful for debugging > * @lock: lock protecting @child_list_head and fence.status > + * @pt_tree: rbtree of active (unsignaled/errored) sync_pts > * @pt_list: list of active (unsignaled/errored) sync_pts > * @sync_timeline_list: membership in global sync_timeline_list > */ > @@ -36,6 +38,7 @@ struct sync_timeline { > u64 context; > int value; > > + struct rb_root pt_tree; > struct list_head pt_list; > spinlock_t lock; > > @@ -51,10 +54,12 @@ static inline struct sync_timeline *dma_fence_parent(struct dma_fence *fence) > * struct sync_pt - sync_pt object > * @base: base fence object > * @link: link on the sync timeline's list > + * @node: node in the sync timeline's tree > */ > struct sync_pt { > struct dma_fence base; > struct list_head link; > + struct rb_node node; > }; > > #ifdef CONFIG_SW_SYNC > -- > 2.13.1 -- Sean Paul, Software Engineer, Google / Chromium OS _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx