We allow exported sync_file fences to be used as submit fences, but they are not the only source of user fences. We also accept an array of syncobj, and as with sync_file these are dma_fences underneath and so feature the same set of controls. The submit-fence allows for a request to be scheduled at the same time as the signaler, rather than as normal after. Userspace can combine submit-fence with its own semaphores for intra-batch scheduling. Not exposing submit-fences to syncobj was at the time just a matter of pragmatic expediency. Fixes: a88b6e4cbafd ("drm/i915: Allow specification of parallel execbuf") Link: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4854 Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Cc: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 14 ++- drivers/gpu/drm/i915/i915_request.c | 110 ++++++++++++++++++ include/uapi/drm/i915_drm.h | 7 +- 3 files changed, 123 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 8b854f87a249..67ba33b3de60 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2432,7 +2432,7 @@ static void __free_fence_array(struct drm_syncobj **fences, unsigned int n) { while (n--) - drm_syncobj_put(ptr_mask_bits(fences[n], 2)); + drm_syncobj_put(ptr_mask_bits(fences[n], 3)); kvfree(fences); } @@ -2489,7 +2489,7 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args, BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); - fences[n] = ptr_pack_bits(syncobj, fence.flags, 2); + fences[n] = ptr_pack_bits(syncobj, fence.flags, 3); } return fences; @@ -2520,7 +2520,7 @@ await_fence_array(struct i915_execbuffer *eb, struct dma_fence *fence; unsigned int flags; - syncobj = ptr_unpack_bits(fences[n], &flags, 2); + syncobj = ptr_unpack_bits(fences[n], &flags, 3); if (!(flags & I915_EXEC_FENCE_WAIT)) continue; @@ -2544,7 +2544,11 @@ await_fence_array(struct i915_execbuffer *eb, spin_unlock(&syncobj->lock); } - err = i915_request_await_dma_fence(eb->request, fence); + if (flags & I915_EXEC_FENCE_WAIT_SUBMIT) + err = i915_request_await_execution(eb->request, fence, + eb->engine->bond_execute); + else + err = i915_request_await_dma_fence(eb->request, fence); dma_fence_put(fence); if (err < 0) return err; @@ -2565,7 +2569,7 @@ signal_fence_array(struct i915_execbuffer *eb, struct drm_syncobj *syncobj; unsigned int flags; - syncobj = ptr_unpack_bits(fences[n], &flags, 2); + syncobj = ptr_unpack_bits(fences[n], &flags, 3); if (!(flags & I915_EXEC_FENCE_SIGNAL)) continue; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 95edc5523a01..248efbc01224 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -23,6 +23,7 @@ */ #include <linux/dma-fence-array.h> +#include <linux/dma-fence-proxy.h> #include <linux/irq_work.h> #include <linux/prefetch.h> #include <linux/sched.h> @@ -1213,6 +1214,110 @@ __i915_request_await_execution(struct i915_request *to, &from->fence); } +struct execution_proxy { + struct wait_queue_entry base; + void (*hook)(struct i915_request *rq, + struct dma_fence *signal); + struct i915_request *request; + struct dma_fence *fence; + struct timer_list timer; + struct work_struct work; +}; + +static void execution_proxy_work(struct work_struct *work) +{ + struct execution_proxy *wait = container_of(work, typeof(*wait), work); + struct i915_request *rq = wait->request; + + del_timer_sync(&wait->timer); + + if (wait->fence) { + int err; + + mutex_lock(&rq->context->timeline->mutex); + + if (dma_fence_is_i915(wait->fence)) + err = __i915_request_await_execution(rq, + to_request(wait->fence), + wait->hook); + else + err = i915_sw_fence_await_dma_fence(&rq->submit, + wait->fence, + I915_FENCE_TIMEOUT, + GFP_KERNEL); + if (err < 0) + i915_request_set_error_once(rq, err); + + mutex_unlock(&rq->context->timeline->mutex); + } + + i915_sw_fence_complete(&rq->submit); + + dma_fence_put(wait->fence); + kfree(wait); +} + +static int +execution_proxy_wake(struct wait_queue_entry *entry, + unsigned int mode, + int flags, + void *fence) +{ + struct execution_proxy *wait = container_of(entry, typeof(*wait), base); + + wait->fence = dma_fence_get(fence); + schedule_work(&wait->work); + + return 0; +} + +static void +execution_proxy_timer(struct timer_list *t) +{ + struct execution_proxy *wait = container_of(t, typeof(*wait), timer); + + if (dma_fence_remove_proxy_listener(wait->base.private, &wait->base)) { + struct i915_request *rq = wait->request; + + pr_notice("Asynchronous wait on proxy fence for %s:%s:%llx timed out\n", + rq->fence.ops->get_driver_name(&rq->fence), + rq->fence.ops->get_timeline_name(&rq->fence), + rq->fence.seqno); + i915_request_set_error_once(rq, -ETIMEDOUT); + + schedule_work(&wait->work); + } +} + +static int +__i915_request_await_proxy_execution(struct i915_request *rq, + struct dma_fence *fence, + unsigned long timeout, + void (*hook)(struct i915_request *rq, + struct dma_fence *signal)) +{ + struct execution_proxy *wait; + + wait = kzalloc(sizeof(*wait), GFP_KERNEL); + if (!wait) + return -ENOMEM; + + i915_sw_fence_await(&rq->submit); + + wait->base.private = fence; + wait->base.func = execution_proxy_wake; + wait->request = rq; + wait->hook = hook; + INIT_WORK(&wait->work, execution_proxy_work); + + timer_setup(&wait->timer, execution_proxy_timer, 0); + if (timeout) + mod_timer(&wait->timer, round_jiffies_up(jiffies + timeout)); + + dma_fence_add_proxy_listener(fence, &wait->base); + return 0; +} + int i915_request_await_execution(struct i915_request *rq, struct dma_fence *fence, @@ -1249,6 +1354,11 @@ i915_request_await_execution(struct i915_request *rq, ret = __i915_request_await_execution(rq, to_request(fence), hook); + else if (dma_fence_is_proxy(fence)) + ret = __i915_request_await_proxy_execution(rq, + fence, + I915_FENCE_TIMEOUT, + hook); else ret = i915_sw_fence_await_dma_fence(&rq->submit, fence, I915_FENCE_TIMEOUT, diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 14b67cd6b54b..704dd0e3bc1d 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1040,9 +1040,10 @@ struct drm_i915_gem_exec_fence { */ __u32 handle; -#define I915_EXEC_FENCE_WAIT (1<<0) -#define I915_EXEC_FENCE_SIGNAL (1<<1) -#define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SIGNAL << 1)) +#define I915_EXEC_FENCE_WAIT (1u << 0) +#define I915_EXEC_FENCE_SIGNAL (1u << 1) +#define I915_EXEC_FENCE_WAIT_SUBMIT (1u << 2) +#define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_WAIT_SUBMIT << 1)) __u32 flags; }; -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx