From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Currently a single request submission in execlist mode results in two emitted interrupts. First the user interrupt arrives and is then followed by the context complete interrupt. (This time delay is caused by the time taken by the GPU to write out the context and update the CSB.) This also means every individual batch submission, and also the end of the coalesced request submission, will result in two interrupts instead of potentially one. If we make sure that the last or solitary request does not emit the MI_USER_INTERRUPT, but instead rely on context complete to wake up the waiters, we can reduce the CPU time spent in interrupt handlers. In some interrupt heavy benchmarks this can be even 50% fewer interrupts and 1-2% reduced CPU usage (it halved the time spent in interrupt handlers on an i7 BDW). But on the other hand this also has the theoretical potential of increasing the wake-up latency (see earlier reference for the delay between emitting the user interrupt and context complete). v2: Clear MI_USER_INTERRUPT instructions when applicable rather than emitting them for potentially more robustness and not breaking GuC submission. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- I failed to find a benchmark, or a test, which would reliably show a win (or lose) with this patch. Therefore I do not think it is worth the complexity and am filing it for reference only. --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_irq.c | 24 ++++++++++++------------ drivers/gpu/drm/i915/intel_lrc.c | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9b82c4532893..94d1084a090b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2825,6 +2825,7 @@ ibx_disable_display_interrupt(struct drm_i915_private *dev_priv, uint32_t bits) ibx_display_interrupt_update(dev_priv, bits, 0); } +void intel_notify_ring(struct intel_engine_cs *ring); /* i915_gem.c */ int i915_gem_create_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 3f8c753997ba..a2087bad0d64 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -994,7 +994,7 @@ static void ironlake_rps_change_irq_handler(struct drm_device *dev) return; } -static void notify_ring(struct intel_engine_cs *ring) +void intel_notify_ring(struct intel_engine_cs *ring) { if (!intel_ring_initialized(ring)) return; @@ -1291,9 +1291,9 @@ static void ilk_gt_irq_handler(struct drm_device *dev, { if (gt_iir & (GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT)) - notify_ring(&dev_priv->ring[RCS]); + intel_notify_ring(&dev_priv->ring[RCS]); if (gt_iir & ILK_BSD_USER_INTERRUPT) - notify_ring(&dev_priv->ring[VCS]); + intel_notify_ring(&dev_priv->ring[VCS]); } static void snb_gt_irq_handler(struct drm_device *dev, @@ -1303,11 +1303,11 @@ static void snb_gt_irq_handler(struct drm_device *dev, if (gt_iir & (GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT)) - notify_ring(&dev_priv->ring[RCS]); + intel_notify_ring(&dev_priv->ring[RCS]); if (gt_iir & GT_BSD_USER_INTERRUPT) - notify_ring(&dev_priv->ring[VCS]); + intel_notify_ring(&dev_priv->ring[VCS]); if (gt_iir & GT_BLT_USER_INTERRUPT) - notify_ring(&dev_priv->ring[BCS]); + intel_notify_ring(&dev_priv->ring[BCS]); if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT | GT_BSD_CS_ERROR_INTERRUPT | @@ -1322,7 +1322,7 @@ static __always_inline void gen8_cs_irq_handler(struct intel_engine_cs *ring, u32 iir, int test_shift) { if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) - notify_ring(ring); + intel_notify_ring(ring); if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) intel_lrc_irq_handler(ring); } @@ -1629,7 +1629,7 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) if (HAS_VEBOX(dev_priv->dev)) { if (pm_iir & PM_VEBOX_USER_INTERRUPT) - notify_ring(&dev_priv->ring[VECS]); + intel_notify_ring(&dev_priv->ring[VECS]); if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); @@ -3961,7 +3961,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg) new_iir = I915_READ16(IIR); /* Flush posted writes */ if (iir & I915_USER_INTERRUPT) - notify_ring(&dev_priv->ring[RCS]); + intel_notify_ring(&dev_priv->ring[RCS]); for_each_pipe(dev_priv, pipe) { int plane = pipe; @@ -4157,7 +4157,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg) new_iir = I915_READ(IIR); /* Flush posted writes */ if (iir & I915_USER_INTERRUPT) - notify_ring(&dev_priv->ring[RCS]); + intel_notify_ring(&dev_priv->ring[RCS]); for_each_pipe(dev_priv, pipe) { int plane = pipe; @@ -4387,9 +4387,9 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) new_iir = I915_READ(IIR); /* Flush posted writes */ if (iir & I915_USER_INTERRUPT) - notify_ring(&dev_priv->ring[RCS]); + intel_notify_ring(&dev_priv->ring[RCS]); if (iir & I915_BSD_USER_INTERRUPT) - notify_ring(&dev_priv->ring[VCS]); + intel_notify_ring(&dev_priv->ring[VCS]); for_each_pipe(dev_priv, pipe) { if (pipe_stats[pipe] & PIPE_START_VBLANK_INTERRUPT_STATUS && diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 27f06198a51e..173b3faf5a2a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -359,6 +359,20 @@ static void execlists_elsp_write(struct drm_i915_gem_request *rq0, spin_unlock(&dev_priv->uncore.lock); } +static void execlists_clear_user_interrupt(struct drm_i915_gem_request *req) +{ + struct intel_ringbuffer *ringbuf = req->ringbuf; + + iowrite32(MI_NOOP, ringbuf->virtual_start + req->tail - 8); +} + +static void execlists_emit_user_interrupt(struct drm_i915_gem_request *req) +{ + struct intel_ringbuffer *ringbuf = req->ringbuf; + + iowrite32(MI_USER_INTERRUPT, ringbuf->virtual_start + req->tail - 8); +} + static int execlists_update_context(struct drm_i915_gem_request *rq) { struct intel_engine_cs *ring = rq->ring; @@ -433,6 +447,14 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) cursor->elsp_submitted = req0->elsp_submitted; list_move_tail(&req0->execlist_link, &ring->execlist_retired_req_list); + /* + * When coalescing previosly submitted request, + * it might have been the last one in the previous + * submission so had its user interrupt cleared. + * Put it back in. + */ + if (req0->elsp_submitted) + execlists_emit_user_interrupt(req0); req0 = cursor; } else { req1 = cursor; @@ -440,6 +462,15 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) } } + /* + * Last requests submitted on each port do not need to generate + * user interrupts since we will get context complete. + */ + if (req0->elsp_submitted == 0) + execlists_clear_user_interrupt(req0); + if (req1 && req1->elsp_submitted == 0) + execlists_clear_user_interrupt(req1); + if (IS_GEN8(ring->dev) || IS_GEN9(ring->dev)) { /* * WaIdleLiteRestore: make sure we never cause a lite @@ -472,6 +503,12 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring, assert_spin_locked(&ring->execlist_lock); + /* + * This is effectively a context complete interrupt so wake + * up potential waiters on this ring. + */ + intel_notify_ring(ring); + head_req = list_first_entry_or_null(&ring->execlist_queue, struct drm_i915_gem_request, execlist_link); -- 1.9.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx