From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> We can rely on context complete interrupt to wake up the waiters apart in the case where requests are merged into a single ELSP submission. In this case we inject MI_USER_INTERRUPTS in the ring buffer to ensure prompt wake-ups. This optimization has the effect on for example GLBenchmark Egypt off-screen test of decreasing the number of generated interrupts per second by a factor of two, and context switched by factor of five to six. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_irq.c | 24 ++++++++++++------------ drivers/gpu/drm/i915/intel_lrc.c | 26 +++++++++++++++++++++++++- 3 files changed, 38 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1d28d90ed901..f6fd2023aaf0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2811,6 +2811,7 @@ ibx_disable_display_interrupt(struct drm_i915_private *dev_priv, uint32_t bits) ibx_display_interrupt_update(dev_priv, bits, 0); } +void intel_notify_ring(struct intel_engine_cs *ring); /* i915_gem.c */ int i915_gem_create_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 3f8c753997ba..a2087bad0d64 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -994,7 +994,7 @@ static void ironlake_rps_change_irq_handler(struct drm_device *dev) return; } -static void notify_ring(struct intel_engine_cs *ring) +void intel_notify_ring(struct intel_engine_cs *ring) { if (!intel_ring_initialized(ring)) return; @@ -1291,9 +1291,9 @@ static void ilk_gt_irq_handler(struct drm_device *dev, { if (gt_iir & (GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT)) - notify_ring(&dev_priv->ring[RCS]); + intel_notify_ring(&dev_priv->ring[RCS]); if (gt_iir & ILK_BSD_USER_INTERRUPT) - notify_ring(&dev_priv->ring[VCS]); + intel_notify_ring(&dev_priv->ring[VCS]); } static void snb_gt_irq_handler(struct drm_device *dev, @@ -1303,11 +1303,11 @@ static void snb_gt_irq_handler(struct drm_device *dev, if (gt_iir & (GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT)) - notify_ring(&dev_priv->ring[RCS]); + intel_notify_ring(&dev_priv->ring[RCS]); if (gt_iir & GT_BSD_USER_INTERRUPT) - notify_ring(&dev_priv->ring[VCS]); + intel_notify_ring(&dev_priv->ring[VCS]); if (gt_iir & GT_BLT_USER_INTERRUPT) - notify_ring(&dev_priv->ring[BCS]); + intel_notify_ring(&dev_priv->ring[BCS]); if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT | GT_BSD_CS_ERROR_INTERRUPT | @@ -1322,7 +1322,7 @@ static __always_inline void gen8_cs_irq_handler(struct intel_engine_cs *ring, u32 iir, int test_shift) { if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) - notify_ring(ring); + intel_notify_ring(ring); if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) intel_lrc_irq_handler(ring); } @@ -1629,7 +1629,7 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) if (HAS_VEBOX(dev_priv->dev)) { if (pm_iir & PM_VEBOX_USER_INTERRUPT) - notify_ring(&dev_priv->ring[VECS]); + intel_notify_ring(&dev_priv->ring[VECS]); if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); @@ -3961,7 +3961,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg) new_iir = I915_READ16(IIR); /* Flush posted writes */ if (iir & I915_USER_INTERRUPT) - notify_ring(&dev_priv->ring[RCS]); + intel_notify_ring(&dev_priv->ring[RCS]); for_each_pipe(dev_priv, pipe) { int plane = pipe; @@ -4157,7 +4157,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg) new_iir = I915_READ(IIR); /* Flush posted writes */ if (iir & I915_USER_INTERRUPT) - notify_ring(&dev_priv->ring[RCS]); + intel_notify_ring(&dev_priv->ring[RCS]); for_each_pipe(dev_priv, pipe) { int plane = pipe; @@ -4387,9 +4387,9 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) new_iir = I915_READ(IIR); /* Flush posted writes */ if (iir & I915_USER_INTERRUPT) - notify_ring(&dev_priv->ring[RCS]); + intel_notify_ring(&dev_priv->ring[RCS]); if (iir & I915_BSD_USER_INTERRUPT) - notify_ring(&dev_priv->ring[VCS]); + intel_notify_ring(&dev_priv->ring[VCS]); for_each_pipe(dev_priv, pipe) { if (pipe_stats[pipe] & PIPE_START_VBLANK_INTERRUPT_STATUS && diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 27f06198a51e..d9be878dbde7 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -359,6 +359,13 @@ static void execlists_elsp_write(struct drm_i915_gem_request *rq0, spin_unlock(&dev_priv->uncore.lock); } +static void execlists_emit_user_interrupt(struct drm_i915_gem_request *req) +{ + struct intel_ringbuffer *ringbuf = req->ringbuf; + + iowrite32(MI_USER_INTERRUPT, ringbuf->virtual_start + req->tail - 8); +} + static int execlists_update_context(struct drm_i915_gem_request *rq) { struct intel_engine_cs *ring = rq->ring; @@ -433,6 +440,12 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) cursor->elsp_submitted = req0->elsp_submitted; list_move_tail(&req0->execlist_link, &ring->execlist_retired_req_list); + /* + * When merging requests make sure there is still + * something after each batch buffer to wake up waiters. + */ + if (cursor != req0) + execlists_emit_user_interrupt(req0); req0 = cursor; } else { req1 = cursor; @@ -472,6 +485,12 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring, assert_spin_locked(&ring->execlist_lock); + /* + * This is effectively a context complete interrupt so wake + * up potential waiters on this ring. + */ + intel_notify_ring(ring); + head_req = list_first_entry_or_null(&ring->execlist_queue, struct drm_i915_gem_request, execlist_link); @@ -1812,7 +1831,12 @@ static int gen8_emit_request(struct drm_i915_gem_request *request) (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT))); intel_logical_ring_emit(ringbuf, 0); intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(request)); - intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT); + /* + * Following noop is a placeholder for an optional MI_USER_INTERRUPT + * which will be patched in at ELSP submission time if this request + * will not be ending with a context complete interrupt. + */ + intel_logical_ring_emit(ringbuf, MI_NOOP); intel_logical_ring_emit(ringbuf, MI_NOOP); intel_logical_ring_advance_and_submit(request); -- 1.9.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx