A complementary possibility is to implement a different sorting algorithm under the same API as the kernel's sort() and so keep the simplicity, potentially moving the new sort under lib/ if it would be found more widely useful. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxx> Cc: Christian König <christian.koenig@xxxxxxx> Cc: Friedrich Vock <friedrich.vock@xxxxxx> --- drivers/dma-buf/dma-fence-unwrap.c | 129 ++++++++++++++++------------- 1 file changed, 73 insertions(+), 56 deletions(-) diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c index 75c3e37fd617..750dc20a9e9d 100644 --- a/drivers/dma-buf/dma-fence-unwrap.c +++ b/drivers/dma-buf/dma-fence-unwrap.c @@ -12,6 +12,7 @@ #include <linux/dma-fence-chain.h> #include <linux/dma-fence-unwrap.h> #include <linux/slab.h> +#include <linux/sort.h> /* Internal helper to start new array iteration, don't use directly */ static struct dma_fence * @@ -59,6 +60,25 @@ struct dma_fence *dma_fence_unwrap_next(struct dma_fence_unwrap *cursor) } EXPORT_SYMBOL_GPL(dma_fence_unwrap_next); + +static int fence_cmp(const void *_a, const void *_b) +{ + struct dma_fence *a = *(struct dma_fence **)_a; + struct dma_fence *b = *(struct dma_fence **)_b; + + if (a->context < b->context) + return -1; + else if (a->context > b->context) + return 1; + + if (dma_fence_is_later(b, a)) + return -1; + else if (dma_fence_is_later(a, b)) + return 1; + + return 0; +} + /* Implementation for the dma_fence_merge() marco, don't use directly */ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, struct dma_fence **fences, @@ -67,9 +87,12 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, struct dma_fence *tmp, *signaled, **array; struct dma_fence_array *result; ktime_t timestamp; - unsigned int i; - size_t count; + int i, j, count; + /* + * Count number of unwrapped fences and fince the latest signaled + * timestamp. + */ count = 0; timestamp = ns_to_ktime(0); for (i = 0; i < num_fences; ++i) { @@ -98,74 +121,68 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, else if (count == 1) return dma_fence_get(signaled); + /* + * Allocate and populate the array. + */ array = kmalloc_array(count, sizeof(*array), GFP_KERNEL); if (!array) return NULL; - /* - * This trashes the input fence array and uses it as position for the - * following merge loop. This works because the dma_fence_merge() - * wrapper macro is creating this temporary array on the stack together - * with the iterators. - */ - for (i = 0; i < num_fences; ++i) - fences[i] = dma_fence_unwrap_first(fences[i], &iter[i]); - count = 0; - do { - unsigned int sel; - -restart: - tmp = NULL; - for (i = 0; i < num_fences; ++i) { - struct dma_fence *next; - - while (fences[i] && dma_fence_is_signaled(fences[i])) - fences[i] = dma_fence_unwrap_next(&iter[i]); - - next = fences[i]; - if (!next) - continue; - - /* - * We can't guarantee that inpute fences are ordered by - * context, but it is still quite likely when this - * function is used multiple times. So attempt to order - * the fences by context as we pass over them and merge - * fences with the same context. - */ - if (!tmp || tmp->context > next->context) { - tmp = next; - sel = i; - - } else if (tmp->context < next->context) { - continue; - - } else if (dma_fence_is_later(tmp, next)) { - fences[i] = dma_fence_unwrap_next(&iter[i]); - goto restart; - } else { - fences[sel] = dma_fence_unwrap_next(&iter[sel]); - goto restart; - } + for (i = 0; i < num_fences; ++i) { + dma_fence_unwrap_for_each(tmp, &iter[i], fences[i]) { + if (!dma_fence_is_signaled(tmp)) + array[count++] = tmp; } - - if (tmp) { - array[count++] = dma_fence_get(tmp); - fences[sel] = dma_fence_unwrap_next(&iter[sel]); + } + + /* + * Equal fast-path as the above one, in case some fences got signalled + * in the meantime. + */ + if (count == 0) { + tmp = dma_fence_allocate_private_stub(timestamp); + goto return_tmp; + } else if (count == 1) { + tmp = dma_fence_get(array[0]); + goto return_tmp; + } + + /* + * Sort in context and seqno order. + */ + sort(array, count, sizeof(*array), fence_cmp, NULL); + + /* + * Only keep the most recent fence for each context. + */ + j = 0; + tmp = array[0]; + for (i = 1; i < count; i++) { + if (array[i]->context != tmp->context) { + array[j++] = dma_fence_get(tmp); } - } while (tmp); - + tmp = array[i]; + } + if (j == 0 || tmp->context != array[j - 1]->context) { + array[j++] = dma_fence_get(tmp); + } + count = j; + + /* + * And another fast-path as the earlier ones. + */ if (count == 0) { tmp = dma_fence_allocate_private_stub(ktime_get()); goto return_tmp; - } - - if (count == 1) { + } else if (count == 1) { tmp = array[0]; goto return_tmp; } + /* + * Finnaly create the output fence array. + */ result = dma_fence_array_create(count, array, dma_fence_context_alloc(1), 1, false); -- 2.46.0