Continuing the themem of bypassing ksoftirqd latency, also first try to directly submit from the CS interrupt handler to clear the ELSP and queue the next. In the past, we have been hesitant to do this as the context switch processing has been quite heavy, requiring forcewaked mmio. However, as we now can read the GPU state from the cacheable HWSP, it is relatively cheap! v2: Explain why we test_bit(IRQ_EXECLIST) after doing notify_ring (it's because the notify_ring() may itself trigger direct submission clearing the bit) Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_irq.c | 21 ++++++++++++++------- drivers/gpu/drm/i915/intel_guc_submission.c | 2 ++ drivers/gpu/drm/i915/intel_ringbuffer.h | 16 ++++++++++++++++ 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index f9bc3aaa90d0..eecfeabbb006 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1465,19 +1465,26 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) struct intel_engine_execlists * const execlists = &engine->execlists; bool tasklet = false; - if (iir & GT_CONTEXT_SWITCH_INTERRUPT) { - if (READ_ONCE(engine->execlists.active)) - tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST, - &engine->irq_posted); - } + if (iir & GT_CONTEXT_SWITCH_INTERRUPT && READ_ONCE(execlists->active)) + tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST, + &engine->irq_posted); if (iir & GT_RENDER_USER_INTERRUPT) { notify_ring(engine); - tasklet |= USES_GUC_SUBMISSION(engine->i915); + /* + * notify_ring() may trigger direct submission onto this + * engine, clearing the ENGINE_IRQ_EXECLIST bit. In that + * case, we don't want to resubmit and so clear the tasklet + * boolean. GuC never sets the ENGINE_IRQ_EXECLIST bit and + * so when using the GuC this equates to an unconditional + * setting of tasklet to true. + */ + if (!test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) + tasklet = USES_GUC_SUBMISSION(engine->i915); } if (tasklet) - tasklet_hi_schedule(&execlists->tasklet); + execlists_tasklet(execlists); } static void gen8_gt_irq_ack(struct drm_i915_private *i915, diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 6bfe30af7826..7d4542b46f5e 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -782,6 +782,8 @@ static void guc_submission_tasklet(unsigned long data) struct execlist_port *port = execlists->port; struct i915_request *rq; + clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + rq = port_request(port); while (rq && i915_request_completed(rq)) { trace_i915_request_out(rq); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index f5545391d76a..da7e00ff2c6b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -717,6 +717,22 @@ execlists_port_complete(struct intel_engine_execlists * const execlists, return port; } +static inline void +execlists_tasklet(struct intel_engine_execlists * const execlists) +{ + struct tasklet_struct * const t = &execlists->tasklet; + + if (unlikely(atomic_read(&t->count))) /* GPU reset active */ + return; + + if (tasklet_trylock(t)) { + t->func(t->data); + tasklet_unlock(t); + } else { + tasklet_hi_schedule(t); + } +} + static inline unsigned int intel_engine_flag(const struct intel_engine_cs *engine) { -- 2.17.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx