[PATCH 17/19] drm/i915/execlists: Process the CSB directly from inside the irq handler

Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> · Thu, 17 May 2018 08:40:53 +0100

As we now only use the cached HWSP access to read the CSB buffer and no
longer use any forcewaked mmio, processing the CSB is fast and possible
to do so directly from inside the CS interrupt handler.

We have to rearrange the irq handler slightly as we wish to preserve the
single threaded access to the event buffer and ELSP, so we need to
perform the processing with the master interrupt still disabled to avoid
concurrent CS interrupts on other CPUs. We also have to ensure that no
one else is updating the execlist->port[] simultaneously (i.e.
execlists_dequeue) and so encompass ourselves inside the
engine->timeline.lock (although we may lift this restriction in the
future by separating the two phases to operate on distinct port[]), and
similarly must flush the interrupt handler and disable further processing
during a reset.

References: https://bugs.freedesktop.org/show_bug.cgi?id=106373
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_gem.h         |  5 +++
 drivers/gpu/drm/i915/i915_irq.c         | 14 +++-----
 drivers/gpu/drm/i915/intel_engine_cs.c  |  8 ++---
 drivers/gpu/drm/i915/intel_lrc.c        | 45 +++++++++++--------------
 drivers/gpu/drm/i915/intel_lrc.h        |  1 +
 drivers/gpu/drm/i915/intel_ringbuffer.h |  1 -
 6 files changed, 32 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 5bf24cfc218c..5b098acec566 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -79,4 +79,9 @@ static inline void __tasklet_disable_sync_once(struct tasklet_struct *t)
 		tasklet_unlock_wait(t);
 }
 
+static inline bool __tasklet_is_enabled(const struct tasklet_struct *t)
+{
+	return likely(!atomic_read(&t->count));
+}
+
 #endif /* __I915_GEM_H__ */
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 460878572515..3f139ff64385 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1462,13 +1462,11 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
 static void
 gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 {
-	struct intel_engine_execlists * const execlists = &engine->execlists;
 	bool tasklet = false;
 
 	if (iir & GT_CONTEXT_SWITCH_INTERRUPT) {
-		if (READ_ONCE(engine->execlists.active))
-			tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST,
-						    &engine->irq_posted);
+		intel_engine_handle_execlists_irq(engine);
+		tasklet = true;
 	}
 
 	if (iir & GT_RENDER_USER_INTERRUPT) {
@@ -1479,7 +1477,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 	}
 
 	if (tasklet)
-		tasklet_hi_schedule(&execlists->tasklet);
+		tasklet_hi_schedule(&engine->execlists.tasklet);
 }
 
 static void gen8_gt_irq_ack(struct drm_i915_private *i915,
@@ -2165,6 +2163,7 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg)
 		I915_WRITE(VLV_IER, 0);
 
 		gen8_gt_irq_ack(dev_priv, master_ctl, gt_iir);
+		gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir);
 
 		if (iir & I915_DISPLAY_PORT_INTERRUPT)
 			hotplug_status = i9xx_hpd_irq_ack(dev_priv);
@@ -2189,8 +2188,6 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg)
 		I915_WRITE(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL);
 		POSTING_READ(GEN8_MASTER_IRQ);
 
-		gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir);
-
 		if (hotplug_status)
 			i9xx_hpd_irq_handler(dev_priv, hotplug_status);
 
@@ -2761,6 +2758,7 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
 
 	/* Find, clear, then process each source of interrupt */
 	gen8_gt_irq_ack(dev_priv, master_ctl, gt_iir);
+	gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir);
 
 	/* IRQs are synced during runtime_suspend, we don't require a wakeref */
 	if (master_ctl & ~GEN8_GT_IRQS) {
@@ -2771,8 +2769,6 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
 
 	I915_WRITE_FW(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL);
 
-	gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir);
-
 	return IRQ_HANDLED;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index ad6508e991f3..7165a3d21443 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1297,12 +1297,10 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
 		ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
 		read = GEN8_CSB_READ_PTR(ptr);
 		write = GEN8_CSB_WRITE_PTR(ptr);
-		drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s, tasklet queued? %s (%s)\n",
+		drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], tasklet queued? %s (%s)\n",
 			   read, execlists->csb_head,
 			   write,
 			   intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)),
-			   yesno(test_bit(ENGINE_IRQ_EXECLIST,
-					  &engine->irq_posted)),
 			   yesno(test_bit(TASKLET_STATE_SCHED,
 					  &engine->execlists.tasklet.state)),
 			   enableddisabled(!atomic_read(&engine->execlists.tasklet.count)));
@@ -1483,11 +1481,9 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	spin_unlock(&b->rb_lock);
 	local_irq_restore(flags);
 
-	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s) (execlists? %s)\n",
+	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s)\n",
 		   engine->irq_posted,
 		   yesno(test_bit(ENGINE_IRQ_BREADCRUMB,
-				  &engine->irq_posted)),
-		   yesno(test_bit(ENGINE_IRQ_EXECLIST,
 				  &engine->irq_posted)));
 
 	drm_printf(m, "HWSP:\n");
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6701778c7dc1..954eb3a71051 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -567,8 +567,10 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
 {
 	GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
 
+	__unwind_incomplete_requests(container_of(execlists,
+						  typeof(struct intel_engine_cs),
+						  execlists));
 	execlists_cancel_port_requests(execlists);
-	execlists_unwind_incomplete_requests(execlists);
 
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
@@ -880,14 +882,6 @@ static void reset_irq(struct intel_engine_cs *engine)
 	synchronize_hardirq(engine->i915->drm.irq);
 
 	clear_gtiir(engine);
-
-	/*
-	 * The port is checked prior to scheduling a tasklet, but
-	 * just in case we have suspended the tasklet to do the
-	 * wedging make sure that when it wakes, it decides there
-	 * is no work to do by clearing the irq_posted bit.
-	 */
-	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 }
 
 static void execlists_cancel_requests(struct intel_engine_cs *engine)
@@ -960,7 +954,13 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	local_irq_restore(flags);
 }
 
-static void process_csb(struct intel_engine_cs *engine)
+static inline bool
+reset_in_progress(const struct intel_engine_execlists *execlists)
+{
+	return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
+}
+
+void intel_engine_handle_execlists_irq(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port *port = execlists->port;
@@ -973,9 +973,8 @@ static void process_csb(struct intel_engine_cs *engine)
 	 */
 	GEM_BUG_ON(!engine->i915->gt.awake);
 
-	/* Clear before reading to catch new interrupts */
-	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-	smp_mb__after_atomic();
+	if (reset_in_progress(execlists))
+		return;
 
 	/* Note that csb_write, csb_status may be either in HWSP or mmio */
 	head = execlists->csb_head;
@@ -986,6 +985,8 @@ static void process_csb(struct intel_engine_cs *engine)
 
 	rmb(); /* Hopefully paired with a wmb() in HW */
 
+	spin_lock(&engine->timeline.lock);
+
 	do {
 		struct i915_request *rq;
 		unsigned int status;
@@ -1104,6 +1105,8 @@ static void process_csb(struct intel_engine_cs *engine)
 	writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
 	       execlists->csb_read);
 	execlists->csb_head = head;
+
+	spin_unlock(&engine->timeline.lock);
 }
 
 /*
@@ -1114,19 +1117,10 @@ static void execlists_submission_tasklet(unsigned long data)
 {
 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 
-	GEM_TRACE("%s awake?=%d, active=%x, irq-posted?=%d\n",
+	GEM_TRACE("%s awake?=%d, active=%x\n",
 		  engine->name,
 		  engine->i915->gt.awake,
-		  engine->execlists.active,
-		  test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
-
-	/*
-	 * Prefer doing test_and_clear_bit() as a two stage operation to avoid
-	 * imposing the cost of a locked atomic transaction when submitting a
-	 * new request (outside of the context-switch interrupt).
-	 */
-	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
-		process_csb(engine);
+		  engine->execlists.active);
 
 	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
 		execlists_dequeue(engine);
@@ -1836,8 +1830,7 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
 	 * and avoid blaming an innocent request if the stall was due to the
 	 * preemption itself.
 	 */
-	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
-		process_csb(engine);
+	synchronize_hardirq(engine->i915->drm.irq);
 
 	/*
 	 * The last active request can then be no later than the last request
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 1593194e930c..772ca3993e51 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -102,6 +102,7 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine);
 struct drm_i915_private;
 struct i915_gem_context;
 
+void intel_engine_handle_execlists_irq(struct intel_engine_cs *engine);
 void intel_lr_context_resume(struct drm_i915_private *dev_priv);
 
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 0a96088f522f..42a136810e15 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -358,7 +358,6 @@ struct intel_engine_cs {
 	atomic_t irq_count;
 	unsigned long irq_posted;
 #define ENGINE_IRQ_BREADCRUMB 0
-#define ENGINE_IRQ_EXECLIST 1
 
 	/* Rather than have every client wait upon all user interrupts,
 	 * with the herd waking after every interrupt and each doing the
-- 
2.17.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx