Track the latest fence waited upon on each context, and only add a new asynchronous wait if the new fence is more recent than the recorded fence for that context. This requires us to filter out unordered timelines, which are noted by DMA_FENCE_NO_CONTEXT. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem_request.c | 33 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_request.h | 2 ++ lib/radix-tree.c | 1 + 3 files changed, 36 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 313cdff7c6dd..c184f1d26f25 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -606,6 +606,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, i915_priotree_init(&req->priotree); + INIT_RADIX_TREE(&req->waits, GFP_KERNEL); INIT_LIST_HEAD(&req->active_list); req->i915 = dev_priv; req->engine = engine; @@ -723,6 +724,27 @@ i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req, if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) return 0; + /* Squash repeated waits to the same timelines, picking the latest */ + if (fence->context != DMA_FENCE_NO_CONTEXT) { + void __rcu **slot; + + slot = radix_tree_lookup_slot(&req->waits, fence->context); + if (!slot) { + ret = radix_tree_insert(&req->waits, + fence->context, fence); + if (ret) + return ret; + } else { + struct dma_fence *old = + rcu_dereference_protected(*slot, true); + + if (!dma_fence_is_later(fence, old)) + return 0; + + radix_tree_replace_slot(&req->waits, slot, fence); + } + } + if (dma_fence_is_i915(fence)) return i915_gem_request_await_request(req, to_request(fence)); @@ -843,6 +865,15 @@ static void i915_gem_mark_busy(const struct intel_engine_cs *engine) round_jiffies_up_relative(HZ)); } +static void free_radixtree(struct radix_tree_root *root) +{ + struct radix_tree_iter iter; + void __rcu **slot; + + radix_tree_for_each_slot(slot, root, &iter, 0) + radix_tree_iter_delete(root, &iter, slot); +} + /* * NB: This function is not allowed to fail. Doing so would mean the the * request is not being tracked for completion but the work itself is @@ -943,6 +974,8 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) local_bh_disable(); i915_sw_fence_commit(&request->submit); local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ + + free_radixtree(&request->waits); } static unsigned long local_clock_us(unsigned int *cpu) diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index a211c53c813f..638899b9c170 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -137,6 +137,8 @@ struct drm_i915_gem_request { struct i915_priotree priotree; struct i915_dependency dep; + struct radix_tree_root waits; + /** GEM sequence number associated with this request on the * global execution timeline. It is zero when the request is not * on the HW queue (i.e. not on the engine timeline list). diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 691a9ad48497..84cccf7138c4 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -2022,6 +2022,7 @@ void radix_tree_iter_delete(struct radix_tree_root *root, if (__radix_tree_delete(root, iter->node, slot)) iter->index = iter->next_index; } +EXPORT_SYMBOL(radix_tree_iter_delete); /** * radix_tree_delete_item - delete an item from a radix tree -- 2.11.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx