Virtual engines are fleeting. They carry a reference count and may be freed when their last request is retired. This makes them unsuitable for the task of housing engine->retire.work so assert that it is not used. Tvrtko tracked down an instance where we did indeed violate this rule. In virtual_submit_request, we flush a completed request directly with __i915_request_submit and this causes us to queue that request on the veng's breadcrumb list and signal it. Leading us down a path where we should not attach the retire. v2: Always select a physical engine before submitting, and so avoid using the veng as a signaler. Reported-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Fixes: dc93c9b69315 ("drm/i915/gt: Schedule request retirement when signaler idles") Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> --- drivers/gpu/drm/i915/gt/intel_engine.h | 1 + drivers/gpu/drm/i915/gt/intel_gt_requests.c | 3 +++ drivers/gpu/drm/i915/gt/intel_lrc.c | 17 ++++++++++++++--- drivers/gpu/drm/i915/i915_request.c | 2 ++ 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index b36ec1fddc3d..5b21ca5478c2 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -217,6 +217,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); static inline void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine) { + GEM_BUG_ON(!engine->breadcrumbs.irq_work.func); irq_work_queue(&engine->breadcrumbs.irq_work); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index 7ef1d37970f6..8a5054f21bf8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -99,6 +99,9 @@ static bool add_retire(struct intel_engine_cs *engine, void intel_engine_add_retire(struct intel_engine_cs *engine, struct intel_timeline *tl) { + /* We don't deal well with the engine disappearing beneath us */ + GEM_BUG_ON(intel_engine_is_virtual(engine)); + if (add_retire(engine, tl)) schedule_work(&engine->retire_work); } diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index c196fb90c59f..e2bd1c357afc 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -4883,6 +4883,18 @@ static void virtual_submission_tasklet(unsigned long data) local_irq_enable(); } +static void __ve_request_submit(const struct virtual_engine *ve, + struct i915_request *rq) +{ + /* + * Select a real engine to act as our permanent storage + * and signaler for the stale request, and prevent + * this virtual engine from leaking into the execution state. + */ + rq->engine = ve->siblings[0]; /* chosen at random! */ + __i915_request_submit(rq); +} + static void virtual_submit_request(struct i915_request *rq) { struct virtual_engine *ve = to_virtual_engine(rq->engine); @@ -4900,12 +4912,12 @@ static void virtual_submit_request(struct i915_request *rq) old = ve->request; if (old) { /* background completion event from preempt-to-busy */ GEM_BUG_ON(!i915_request_completed(old)); - __i915_request_submit(old); + __ve_request_submit(ve, old); i915_request_put(old); } if (i915_request_completed(rq)) { - __i915_request_submit(rq); + __ve_request_submit(ve, rq); ve->base.execlists.queue_priority_hint = INT_MIN; ve->request = NULL; @@ -5004,7 +5016,6 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); intel_engine_init_active(&ve->base, ENGINE_VIRTUAL); - intel_engine_init_breadcrumbs(&ve->base); intel_engine_init_execlists(&ve->base); ve->base.cops = &virtual_context_ops; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 0ecc2cf64216..2c45d4b93e2c 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -358,6 +358,8 @@ bool __i915_request_submit(struct i915_request *request) GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->active.lock); + GEM_BUG_ON(intel_engine_is_virtual(engine)); + /* * With the advent of preempt-to-busy, we frequently encounter * requests that we have unsubmitted from HW, but left running -- 2.25.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx