A complementary possibility is to implement a different sorting algorithm under the same API as the kernel's sort() and so keep the simplicity, potentially moving the new sort under lib/ if it would be found more widely useful. v2: * Hold on to fence references and reduce commentary. (Christian) * Record and use latest signaled timestamp in the 2nd loop too. * Consolidate zero or one fences fast paths. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxx> Fixes: 245a4a7b531c ("dma-buf: generalize dma_fence unwrap & merging v3") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3617 Cc: Christian König <christian.koenig@xxxxxxx> Cc: Daniel Vetter <daniel.vetter@xxxxxxxx> Cc: Sumit Semwal <sumit.semwal@xxxxxxxxxx> Cc: Gustavo Padovan <gustavo@xxxxxxxxxxx> Cc: Friedrich Vock <friedrich.vock@xxxxxx> Cc: linux-media@xxxxxxxxxxxxxxx Cc: dri-devel@xxxxxxxxxxxxxxxxxxxxx Cc: linaro-mm-sig@xxxxxxxxxxxxxxxx Cc: <stable@xxxxxxxxxxxxxxx> # v6.0+ --- drivers/dma-buf/dma-fence-unwrap.c | 129 ++++++++++++++--------------- 1 file changed, 64 insertions(+), 65 deletions(-) diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c index 628af51c81af..26cad03340ce 100644 --- a/drivers/dma-buf/dma-fence-unwrap.c +++ b/drivers/dma-buf/dma-fence-unwrap.c @@ -12,6 +12,7 @@ #include <linux/dma-fence-chain.h> #include <linux/dma-fence-unwrap.h> #include <linux/slab.h> +#include <linux/sort.h> /* Internal helper to start new array iteration, don't use directly */ static struct dma_fence * @@ -59,6 +60,25 @@ struct dma_fence *dma_fence_unwrap_next(struct dma_fence_unwrap *cursor) } EXPORT_SYMBOL_GPL(dma_fence_unwrap_next); + +static int fence_cmp(const void *_a, const void *_b) +{ + struct dma_fence *a = *(struct dma_fence **)_a; + struct dma_fence *b = *(struct dma_fence **)_b; + + if (a->context < b->context) + return -1; + else if (a->context > b->context) + return 1; + + if (dma_fence_is_later(b, a)) + return -1; + else if (dma_fence_is_later(a, b)) + return 1; + + return 0; +} + /* Implementation for the dma_fence_merge() marco, don't use directly */ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, struct dma_fence **fences, @@ -67,8 +87,7 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, struct dma_fence_array *result; struct dma_fence *tmp, **array; ktime_t timestamp; - unsigned int i; - size_t count; + int i, j, count; count = 0; timestamp = ns_to_ktime(0); @@ -96,78 +115,58 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, if (!array) return NULL; - /* - * This trashes the input fence array and uses it as position for the - * following merge loop. This works because the dma_fence_merge() - * wrapper macro is creating this temporary array on the stack together - * with the iterators. - */ - for (i = 0; i < num_fences; ++i) - fences[i] = dma_fence_unwrap_first(fences[i], &iter[i]); - count = 0; - do { - unsigned int sel; - -restart: - tmp = NULL; - for (i = 0; i < num_fences; ++i) { - struct dma_fence *next; - - while (fences[i] && dma_fence_is_signaled(fences[i])) - fences[i] = dma_fence_unwrap_next(&iter[i]); - - next = fences[i]; - if (!next) - continue; - - /* - * We can't guarantee that inpute fences are ordered by - * context, but it is still quite likely when this - * function is used multiple times. So attempt to order - * the fences by context as we pass over them and merge - * fences with the same context. - */ - if (!tmp || tmp->context > next->context) { - tmp = next; - sel = i; - - } else if (tmp->context < next->context) { - continue; - - } else if (dma_fence_is_later(tmp, next)) { - fences[i] = dma_fence_unwrap_next(&iter[i]); - goto restart; + for (i = 0; i < num_fences; ++i) { + dma_fence_unwrap_for_each(tmp, &iter[i], fences[i]) { + if (!dma_fence_is_signaled(tmp)) { + array[count++] = dma_fence_get(tmp); } else { - fences[sel] = dma_fence_unwrap_next(&iter[sel]); - goto restart; + ktime_t t = dma_fence_timestamp(tmp); + + if (ktime_after(t, timestamp)) + timestamp = t; } } + } - if (tmp) { - array[count++] = dma_fence_get(tmp); - fences[sel] = dma_fence_unwrap_next(&iter[sel]); + if (count == 0 || count == 1) + goto return_fastpath; + + sort(array, count, sizeof(*array), fence_cmp, NULL); + + /* + * Only keep the most recent fence for each context. + */ + j = 0; + tmp = array[0]; + for (i = 1; i < count; i++) { + if (array[i]->context != tmp->context) + array[j++] = tmp; + else + dma_fence_put(tmp); + tmp = array[i]; + } + if (j == 0 || tmp->context != array[j - 1]->context) { + array[j++] = tmp; + } + count = j; + + if (count > 1) { + result = dma_fence_array_create(count, array, + dma_fence_context_alloc(1), + 1, false); + if (!result) { + tmp = NULL; + goto return_tmp; } - } while (tmp); - - if (count == 0) { - tmp = dma_fence_allocate_private_stub(ktime_get()); - goto return_tmp; + return &result->base; } - if (count == 1) { +return_fastpath: + if (count == 0) + tmp = dma_fence_allocate_private_stub(timestamp); + else tmp = array[0]; - goto return_tmp; - } - - result = dma_fence_array_create(count, array, - dma_fence_context_alloc(1), - 1, false); - if (!result) { - tmp = NULL; - goto return_tmp; - } - return &result->base; return_tmp: kfree(array); -- 2.46.0