[RFC v2] drm/i915/bdw+: Do not emit user interrupts when not needed

Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxxxxxxx> · Mon, 21 Dec 2015 15:33:01 +0000

From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>

Currently a single request submission in execlist mode results
in two emitted interrupts. First the user interrupt arrives and
is then followed by the context complete interrupt. (This time
delay is caused by the time taken by the GPU to write out the
context and update the CSB.)

This also means every individual batch submission, and also the
end of the coalesced request submission, will result in two
interrupts instead of potentially one.

If we make sure that the last or solitary request does not emit
the MI_USER_INTERRUPT, but instead rely on context complete to
wake up the waiters, we can reduce the CPU time spent in
interrupt handlers.

In some interrupt heavy benchmarks this can be even 50% fewer
interrupts and 1-2% reduced CPU usage (it halved the time spent
in interrupt handlers on an i7 BDW). But on the other hand this
also has the theoretical potential of increasing the wake-up
latency (see earlier reference for the delay between emitting
the user interrupt and context complete).

v2: Clear MI_USER_INTERRUPT instructions when applicable
    rather than emitting them for potentially more robustness
    and not breaking GuC submission.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
---
I failed to find a benchmark, or a test, which would reliably
show a win (or lose) with this patch.

Therefore I do not think it is worth the complexity and am
filing it for reference only.
---
 drivers/gpu/drm/i915/i915_drv.h  |  1 +
 drivers/gpu/drm/i915/i915_irq.c  | 24 ++++++++++++------------
 drivers/gpu/drm/i915/intel_lrc.c | 37 +++++++++++++++++++++++++++++++++++++
 3 files changed, 50 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9b82c4532893..94d1084a090b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2825,6 +2825,7 @@ ibx_disable_display_interrupt(struct drm_i915_private *dev_priv, uint32_t bits)
 	ibx_display_interrupt_update(dev_priv, bits, 0);
 }
 
+void intel_notify_ring(struct intel_engine_cs *ring);
 
 /* i915_gem.c */
 int i915_gem_create_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 3f8c753997ba..a2087bad0d64 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -994,7 +994,7 @@ static void ironlake_rps_change_irq_handler(struct drm_device *dev)
 	return;
 }
 
-static void notify_ring(struct intel_engine_cs *ring)
+void intel_notify_ring(struct intel_engine_cs *ring)
 {
 	if (!intel_ring_initialized(ring))
 		return;
@@ -1291,9 +1291,9 @@ static void ilk_gt_irq_handler(struct drm_device *dev,
 {
 	if (gt_iir &
 	    (GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT))
-		notify_ring(&dev_priv->ring[RCS]);
+		intel_notify_ring(&dev_priv->ring[RCS]);
 	if (gt_iir & ILK_BSD_USER_INTERRUPT)
-		notify_ring(&dev_priv->ring[VCS]);
+		intel_notify_ring(&dev_priv->ring[VCS]);
 }
 
 static void snb_gt_irq_handler(struct drm_device *dev,
@@ -1303,11 +1303,11 @@ static void snb_gt_irq_handler(struct drm_device *dev,
 
 	if (gt_iir &
 	    (GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT))
-		notify_ring(&dev_priv->ring[RCS]);
+		intel_notify_ring(&dev_priv->ring[RCS]);
 	if (gt_iir & GT_BSD_USER_INTERRUPT)
-		notify_ring(&dev_priv->ring[VCS]);
+		intel_notify_ring(&dev_priv->ring[VCS]);
 	if (gt_iir & GT_BLT_USER_INTERRUPT)
-		notify_ring(&dev_priv->ring[BCS]);
+		intel_notify_ring(&dev_priv->ring[BCS]);
 
 	if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
 		      GT_BSD_CS_ERROR_INTERRUPT |
@@ -1322,7 +1322,7 @@ static __always_inline void
 gen8_cs_irq_handler(struct intel_engine_cs *ring, u32 iir, int test_shift)
 {
 	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift))
-		notify_ring(ring);
+		intel_notify_ring(ring);
 	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift))
 		intel_lrc_irq_handler(ring);
 }
@@ -1629,7 +1629,7 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
 
 	if (HAS_VEBOX(dev_priv->dev)) {
 		if (pm_iir & PM_VEBOX_USER_INTERRUPT)
-			notify_ring(&dev_priv->ring[VECS]);
+			intel_notify_ring(&dev_priv->ring[VECS]);
 
 		if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
 			DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
@@ -3961,7 +3961,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
 		new_iir = I915_READ16(IIR); /* Flush posted writes */
 
 		if (iir & I915_USER_INTERRUPT)
-			notify_ring(&dev_priv->ring[RCS]);
+			intel_notify_ring(&dev_priv->ring[RCS]);
 
 		for_each_pipe(dev_priv, pipe) {
 			int plane = pipe;
@@ -4157,7 +4157,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
 		new_iir = I915_READ(IIR); /* Flush posted writes */
 
 		if (iir & I915_USER_INTERRUPT)
-			notify_ring(&dev_priv->ring[RCS]);
+			intel_notify_ring(&dev_priv->ring[RCS]);
 
 		for_each_pipe(dev_priv, pipe) {
 			int plane = pipe;
@@ -4387,9 +4387,9 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
 		new_iir = I915_READ(IIR); /* Flush posted writes */
 
 		if (iir & I915_USER_INTERRUPT)
-			notify_ring(&dev_priv->ring[RCS]);
+			intel_notify_ring(&dev_priv->ring[RCS]);
 		if (iir & I915_BSD_USER_INTERRUPT)
-			notify_ring(&dev_priv->ring[VCS]);
+			intel_notify_ring(&dev_priv->ring[VCS]);
 
 		for_each_pipe(dev_priv, pipe) {
 			if (pipe_stats[pipe] & PIPE_START_VBLANK_INTERRUPT_STATUS &&
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 27f06198a51e..173b3faf5a2a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -359,6 +359,20 @@ static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
 	spin_unlock(&dev_priv->uncore.lock);
 }
 
+static void execlists_clear_user_interrupt(struct drm_i915_gem_request *req)
+{
+	struct intel_ringbuffer *ringbuf = req->ringbuf;
+
+	iowrite32(MI_NOOP, ringbuf->virtual_start + req->tail - 8);
+}
+
+static void execlists_emit_user_interrupt(struct drm_i915_gem_request *req)
+{
+	struct intel_ringbuffer *ringbuf = req->ringbuf;
+
+	iowrite32(MI_USER_INTERRUPT, ringbuf->virtual_start + req->tail - 8);
+}
+
 static int execlists_update_context(struct drm_i915_gem_request *rq)
 {
 	struct intel_engine_cs *ring = rq->ring;
@@ -433,6 +447,14 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
 			cursor->elsp_submitted = req0->elsp_submitted;
 			list_move_tail(&req0->execlist_link,
 				       &ring->execlist_retired_req_list);
+			/*
+			 * When coalescing previosly submitted request,
+			 * it might have been the last one in the previous
+			 * submission so had its user interrupt cleared.
+			 * Put it back in.
+			 */
+			if (req0->elsp_submitted)
+				execlists_emit_user_interrupt(req0);
 			req0 = cursor;
 		} else {
 			req1 = cursor;
@@ -440,6 +462,15 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
 		}
 	}
 
+	/*
+	 * Last requests submitted on each port do not need to generate
+	 * user interrupts since we will get context complete.
+	 */
+	if (req0->elsp_submitted == 0)
+		execlists_clear_user_interrupt(req0);
+	if (req1 && req1->elsp_submitted == 0)
+		execlists_clear_user_interrupt(req1);
+
 	if (IS_GEN8(ring->dev) || IS_GEN9(ring->dev)) {
 		/*
 		 * WaIdleLiteRestore: make sure we never cause a lite
@@ -472,6 +503,12 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring,
 
 	assert_spin_locked(&ring->execlist_lock);
 
+	/*
+	 * This is effectively a context complete interrupt so wake
+	 * up potential waiters on this ring.
+	 */
+	intel_notify_ring(ring);
+
 	head_req = list_first_entry_or_null(&ring->execlist_queue,
 					    struct drm_i915_gem_request,
 					    execlist_link);
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx