Reorder the code so that we can reuse the await_execution from a special case in await_request in the next patch. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_request.c | 264 ++++++++++++++-------------- 1 file changed, 132 insertions(+), 132 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index c282719ad3ac..33bbad623e02 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1053,37 +1053,91 @@ emit_semaphore_wait(struct i915_request *to, I915_FENCE_GFP); } +static bool intel_timeline_sync_has_start(struct intel_timeline *tl, + struct dma_fence *fence) +{ + return __intel_timeline_sync_is_later(tl, + fence->context, + fence->seqno - 1); +} + +static int intel_timeline_sync_set_start(struct intel_timeline *tl, + const struct dma_fence *fence) +{ + return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1); +} + static int -i915_request_await_request(struct i915_request *to, struct i915_request *from) +__i915_request_await_execution(struct i915_request *to, + struct i915_request *from, + void (*hook)(struct i915_request *rq, + struct dma_fence *signal)) { - int ret; + int err; - GEM_BUG_ON(to == from); - GEM_BUG_ON(to->timeline == from->timeline); + GEM_BUG_ON(intel_context_is_barrier(from->context)); - if (i915_request_completed(from)) { - i915_sw_fence_set_error_once(&to->submit, from->fence.error); + /* Submit both requests at the same time */ + err = __await_execution(to, from, hook, I915_FENCE_GFP); + if (err) + return err; + + /* Squash repeated depenendices to the same timelines */ + if (intel_timeline_sync_has_start(i915_request_timeline(to), + &from->fence)) return 0; + + /* + * Wait until the start of this request. + * + * The execution cb fires when we submit the request to HW. But in + * many cases this may be long before the request itself is ready to + * run (consider that we submit 2 requests for the same context, where + * the request of interest is behind an indefinite spinner). So we hook + * up to both to reduce our queues and keep the execution lag minimised + * in the worst case, though we hope that the await_start is elided. + */ + err = i915_request_await_start(to, from); + if (err < 0) + return err; + + /* + * Ensure both start together [after all semaphores in signal] + * + * Now that we are queued to the HW at roughly the same time (thanks + * to the execute cb) and are ready to run at roughly the same time + * (thanks to the await start), our signaler may still be indefinitely + * delayed by waiting on a semaphore from a remote engine. If our + * signaler depends on a semaphore, so indirectly do we, and we do not + * want to start our payload until our signaler also starts theirs. + * So we wait. + * + * However, there is also a second condition for which we need to wait + * for the precise start of the signaler. Consider that the signaler + * was submitted in a chain of requests following another context + * (with just an ordinary intra-engine fence dependency between the + * two). In this case the signaler is queued to HW, but not for + * immediate execution, and so we must wait until it reaches the + * active slot. + */ + if (intel_engine_has_semaphores(to->engine) && + !i915_request_has_initial_breadcrumb(to)) { + err = __emit_semaphore_wait(to, from, from->fence.seqno - 1); + if (err < 0) + return err; } + /* Couple the dependency tree for PI on this exposed to->fence */ if (to->engine->schedule) { - ret = i915_sched_node_add_dependency(&to->sched, + err = i915_sched_node_add_dependency(&to->sched, &from->sched, - I915_DEPENDENCY_EXTERNAL); - if (ret < 0) - return ret; + I915_DEPENDENCY_WEAK); + if (err < 0) + return err; } - if (to->engine == from->engine) - ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, - &from->submit, - I915_FENCE_GFP); - else - ret = emit_semaphore_wait(to, from, I915_FENCE_GFP); - if (ret < 0) - return ret; - - return 0; + return intel_timeline_sync_set_start(i915_request_timeline(to), + &from->fence); } static void mark_external(struct i915_request *rq) @@ -1136,23 +1190,20 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) } int -i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) +i915_request_await_execution(struct i915_request *rq, + struct dma_fence *fence, + void (*hook)(struct i915_request *rq, + struct dma_fence *signal)) { struct dma_fence **child = &fence; unsigned int nchild = 1; int ret; - /* - * Note that if the fence-array was created in signal-on-any mode, - * we should *not* decompose it into its individual fences. However, - * we don't currently store which mode the fence-array is operating - * in. Fortunately, the only user of signal-on-any is private to - * amdgpu and we should not see any incoming fence-array from - * sync-file being in signal-on-any mode. - */ if (dma_fence_is_array(fence)) { struct dma_fence_array *array = to_dma_fence_array(fence); + /* XXX Error for signal-on-any fence arrays */ + child = array->fences; nchild = array->num_fences; GEM_BUG_ON(!nchild); @@ -1165,138 +1216,78 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) continue; } - /* - * Requests on the same timeline are explicitly ordered, along - * with their dependencies, by i915_request_add() which ensures - * that requests are submitted in-order through each ring. - */ if (fence->context == rq->fence.context) continue; - /* Squash repeated waits to the same timelines */ - if (fence->context && - intel_timeline_sync_is_later(i915_request_timeline(rq), - fence)) - continue; + /* + * We don't squash repeated fence dependencies here as we + * want to run our callback in all cases. + */ if (dma_fence_is_i915(fence)) - ret = i915_request_await_request(rq, to_request(fence)); + ret = __i915_request_await_execution(rq, + to_request(fence), + hook); else ret = i915_request_await_external(rq, fence); if (ret < 0) return ret; - - /* Record the latest fence used against each timeline */ - if (fence->context) - intel_timeline_sync_set(i915_request_timeline(rq), - fence); } while (--nchild); return 0; } -static bool intel_timeline_sync_has_start(struct intel_timeline *tl, - struct dma_fence *fence) -{ - return __intel_timeline_sync_is_later(tl, - fence->context, - fence->seqno - 1); -} - -static int intel_timeline_sync_set_start(struct intel_timeline *tl, - const struct dma_fence *fence) -{ - return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1); -} - static int -__i915_request_await_execution(struct i915_request *to, - struct i915_request *from, - void (*hook)(struct i915_request *rq, - struct dma_fence *signal)) +i915_request_await_request(struct i915_request *to, struct i915_request *from) { - int err; - - GEM_BUG_ON(intel_context_is_barrier(from->context)); + int ret; - /* Submit both requests at the same time */ - err = __await_execution(to, from, hook, I915_FENCE_GFP); - if (err) - return err; + GEM_BUG_ON(to == from); + GEM_BUG_ON(to->timeline == from->timeline); - /* Squash repeated depenendices to the same timelines */ - if (intel_timeline_sync_has_start(i915_request_timeline(to), - &from->fence)) + if (i915_request_completed(from)) { + i915_sw_fence_set_error_once(&to->submit, from->fence.error); return 0; - - /* - * Wait until the start of this request. - * - * The execution cb fires when we submit the request to HW. But in - * many cases this may be long before the request itself is ready to - * run (consider that we submit 2 requests for the same context, where - * the request of interest is behind an indefinite spinner). So we hook - * up to both to reduce our queues and keep the execution lag minimised - * in the worst case, though we hope that the await_start is elided. - */ - err = i915_request_await_start(to, from); - if (err < 0) - return err; - - /* - * Ensure both start together [after all semaphores in signal] - * - * Now that we are queued to the HW at roughly the same time (thanks - * to the execute cb) and are ready to run at roughly the same time - * (thanks to the await start), our signaler may still be indefinitely - * delayed by waiting on a semaphore from a remote engine. If our - * signaler depends on a semaphore, so indirectly do we, and we do not - * want to start our payload until our signaler also starts theirs. - * So we wait. - * - * However, there is also a second condition for which we need to wait - * for the precise start of the signaler. Consider that the signaler - * was submitted in a chain of requests following another context - * (with just an ordinary intra-engine fence dependency between the - * two). In this case the signaler is queued to HW, but not for - * immediate execution, and so we must wait until it reaches the - * active slot. - */ - if (intel_engine_has_semaphores(to->engine) && - !i915_request_has_initial_breadcrumb(to)) { - err = __emit_semaphore_wait(to, from, from->fence.seqno - 1); - if (err < 0) - return err; } - /* Couple the dependency tree for PI on this exposed to->fence */ if (to->engine->schedule) { - err = i915_sched_node_add_dependency(&to->sched, + ret = i915_sched_node_add_dependency(&to->sched, &from->sched, - I915_DEPENDENCY_WEAK); - if (err < 0) - return err; + I915_DEPENDENCY_EXTERNAL); + if (ret < 0) + return ret; } - return intel_timeline_sync_set_start(i915_request_timeline(to), - &from->fence); + if (to->engine == READ_ONCE(from->engine)) + ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, + &from->submit, + I915_FENCE_GFP); + else + ret = emit_semaphore_wait(to, from, I915_FENCE_GFP); + if (ret < 0) + return ret; + + return 0; } int -i915_request_await_execution(struct i915_request *rq, - struct dma_fence *fence, - void (*hook)(struct i915_request *rq, - struct dma_fence *signal)) +i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) { struct dma_fence **child = &fence; unsigned int nchild = 1; int ret; + /* + * Note that if the fence-array was created in signal-on-any mode, + * we should *not* decompose it into its individual fences. However, + * we don't currently store which mode the fence-array is operating + * in. Fortunately, the only user of signal-on-any is private to + * amdgpu and we should not see any incoming fence-array from + * sync-file being in signal-on-any mode. + */ if (dma_fence_is_array(fence)) { struct dma_fence_array *array = to_dma_fence_array(fence); - /* XXX Error for signal-on-any fence arrays */ - child = array->fences; nchild = array->num_fences; GEM_BUG_ON(!nchild); @@ -1309,22 +1300,31 @@ i915_request_await_execution(struct i915_request *rq, continue; } + /* + * Requests on the same timeline are explicitly ordered, along + * with their dependencies, by i915_request_add() which ensures + * that requests are submitted in-order through each ring. + */ if (fence->context == rq->fence.context) continue; - /* - * We don't squash repeated fence dependencies here as we - * want to run our callback in all cases. - */ + /* Squash repeated waits to the same timelines */ + if (fence->context && + intel_timeline_sync_is_later(i915_request_timeline(rq), + fence)) + continue; if (dma_fence_is_i915(fence)) - ret = __i915_request_await_execution(rq, - to_request(fence), - hook); + ret = i915_request_await_request(rq, to_request(fence)); else ret = i915_request_await_external(rq, fence); if (ret < 0) return ret; + + /* Record the latest fence used against each timeline */ + if (fence->context) + intel_timeline_sync_set(i915_request_timeline(rq), + fence); } while (--nchild); return 0; -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx