Re: [PATCH 2/2] drm/i915: Delay disabling the user interrupt for breadcrumbs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 17/02/2017 15:51, Chris Wilson wrote:
A significant cost in setting up a wait is the overhead of enabling the
interrupt. As we disable the interrupt whenever the queue of waiters is
empty, if we are frequently waiting on alternating batches, we end up
re-enabling the interrupt on a frequent basis. We do want to disable the
interrupt during normal operations as under high load it may add several
thousand interrupts/s - we have been known in the past to occupy whole
cores with our interrupt handler after accidentally leaving user
interrupts enabled. As a compromise, leave the interrupt enabled until
the next IRQ, or the system is idle. This gives a small window for a
waiter to keep the interrupt active and not be delayed by having to
re-enable the interrupt.

Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_gem.c          |  4 +-
 drivers/gpu/drm/i915/i915_irq.c          |  5 +-
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 95 +++++++++++++++++++-------------
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  4 +-
 4 files changed, 65 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6745dcbf3799..9c87aacce43b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3003,8 +3003,10 @@ i915_gem_idle_work_handler(struct work_struct *work)
 	if (wait_for(intel_execlists_idle(dev_priv), 10))
 		DRM_ERROR("Timeout waiting for engines to idle\n");

-	for_each_engine(engine, dev_priv, id)
+	for_each_engine(engine, dev_priv, id) {
+		intel_engine_disarm_breadcrumbs(engine);
 		i915_gem_batch_pool_fini(&engine->batch_pool);
+	}

 	GEM_BUG_ON(!dev_priv->gt.awake);
 	dev_priv->gt.awake = false;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 0c370c687c2a..fa597a29bc1d 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1036,9 +1036,6 @@ static void notify_ring(struct intel_engine_cs *engine)
 	struct drm_i915_gem_request *rq = NULL;
 	struct intel_wait *wait;

-	if (!intel_engine_has_waiter(engine))
-		return;
-
 	trace_i915_gem_request_notify(engine);
 	atomic_inc(&engine->irq_count);
 	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
@@ -1064,6 +1061,8 @@ static void notify_ring(struct intel_engine_cs *engine)
 			rq = wait->request;

 		wake_up_process(wait->tsk);
+	} else {
+		__intel_engine_disarm_breadcrumbs(engine);

So we disarm on the irq following the waiter exiting. But the overall effect depends on the timings of the wake up thread getting scheduled in combined with batch duration which I am not sure I quite like.

 	}
 	spin_unlock(&engine->breadcrumbs.lock);

diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 860372653a59..94c6ce9c0a6f 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -35,8 +35,10 @@ static void intel_breadcrumbs_hangcheck(unsigned long data)
 {
 	struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	struct intel_wait *wait;
+	unsigned long flags;

-	if (!b->irq_enabled)
+	if (!intel_engine_has_waiter(engine))
 		return;

 	if (b->hangcheck_interrupts != atomic_read(&engine->irq_count)) {
@@ -49,10 +51,15 @@ static void intel_breadcrumbs_hangcheck(unsigned long data)
 	 * to process the pending interupt (e.g, low priority task on a loaded
 	 * system) and wait until it sleeps before declaring a missed interrupt.
 	 */
-	if (!intel_engine_wakeup(engine)) {
+	spin_lock_irqsave(&b->lock, flags);
+	wait = b->first_wait;
+	if (wait && !wake_up_process(wait->tsk)) {
 		mod_timer(&b->hangcheck, wait_timeout());
-		return;
+		wait = NULL;
 	}
+	spin_unlock_irqrestore(&b->lock, flags);
+	if (!wait)
+		return;

 	DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name);
 	set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
@@ -107,6 +114,34 @@ static void irq_disable(struct intel_engine_cs *engine)
 	spin_unlock(&engine->i915->irq_lock);
 }

+void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
+{
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+	assert_spin_locked(&b->lock);
+
+	if (b->irq_enabled) {
+		irq_disable(engine);
+		b->irq_enabled = false;
+	}
+
+	b->irq_armed = false;

Do we need the two level, armed & enabled scheme? I mean, why not just have irq_enabled and disable it from the timer callback? It is a bit challenging to figure out how the two level thing works.

Would a simpler scheme work where we would just bump the irq_enabled counter to N on every waiter entering, decrement by one in each user interrupt, and finally turn off on reaching zero? Maybe it would be too costly in terms of useless wake-ups. Hm, not sure.

+}
+
+void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
+{
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	unsigned long flags;
+
+	if (!b->irq_armed)
+		return;
+
+	spin_lock_irqsave(&b->lock, flags);
+	if (!intel_engine_has_waiter(engine))
+		__intel_engine_disarm_breadcrumbs(engine);
+	spin_unlock_irqrestore(&b->lock, flags);
+}
+
 static bool use_fake_irq(const struct intel_breadcrumbs *b)
 {
 	const struct intel_engine_cs *engine =
@@ -131,9 +166,17 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
 	struct drm_i915_private *i915 = engine->i915;

 	assert_spin_locked(&b->lock);
-	if (b->rpm_wakelock)
+	if (b->irq_armed)
 		return;

+	/* The breadcrumb irq will be disarmed on the interrupt after the
+	 * waiters are signaled. This gives us a single interrupt window in
+	 * which we can add a new waiter and avoid the cost of re-enabling
+	 * the irq.
+	 */
+	b->irq_armed = true;
+	GEM_BUG_ON(b->irq_enabled);
+
 	if (I915_SELFTEST_ONLY(b->mock)) {
 		/* For our mock objects we want to avoid interaction
 		 * with the real hardware (which is not set up). So
@@ -142,17 +185,15 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
 		 * implementation to call intel_engine_wakeup()
 		 * itself when it wants to simulate a user interrupt,
 		 */
-		b->rpm_wakelock = true;
 		return;
 	}

 	/* Since we are waiting on a request, the GPU should be busy
-	 * and should have its own rpm reference. For completeness,
-	 * record an rpm reference for ourselves to cover the
-	 * interrupt we unmask.
+	 * and should have its own rpm reference. This is tracked
+	 * by i915->gt.awake, we can forgo holding our own wakref
+	 * for the interrupt as before i915->gt.awake is released (when
+	 * the driver is idle) we disarm the breadcrumbs.
 	 */
-	intel_runtime_pm_get_noresume(i915);
-	b->rpm_wakelock = true;

 	/* No interrupts? Kick the waiter every jiffie! */
 	if (intel_irqs_enabled(i915)) {
@@ -171,29 +212,6 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
 	}
 }

-static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b)
-{
-	struct intel_engine_cs *engine =
-		container_of(b, struct intel_engine_cs, breadcrumbs);
-
-	assert_spin_locked(&b->lock);
-	if (!b->rpm_wakelock)
-		return;
-
-	if (I915_SELFTEST_ONLY(b->mock)) {
-		b->rpm_wakelock = false;
-		return;
-	}
-
-	if (b->irq_enabled) {
-		irq_disable(engine);
-		b->irq_enabled = false;
-	}
-
-	intel_runtime_pm_put(engine->i915);
-	b->rpm_wakelock = false;
-}
-
 static inline struct intel_wait *to_wait(struct rb_node *node)
 {
 	return rb_entry(node, struct intel_wait, node);
@@ -413,7 +431,6 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
 			wake_up_process(b->first_wait->tsk);
 		} else {
 			b->first_wait = NULL;
-			__intel_breadcrumbs_disable_irq(b);
 		}
 	} else {
 		GEM_BUG_ON(rb_first(&b->waiters) == &wait->node);
@@ -707,14 +724,16 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
 	cancel_fake_irq(engine);
 	spin_lock_irq(&b->lock);

-	__intel_breadcrumbs_disable_irq(b);
+	if (b->irq_enabled)
+		irq_enable(engine);

Can't hit that GEM_BUG_ON(b->irq_enabled) in __intel_breadcrumbs_enable_irq from here?

+	else
+		irq_disable(engine);
+
 	if (intel_engine_has_waiter(engine)) {
-		__intel_breadcrumbs_enable_irq(b);
 		if (test_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted))
 			wake_up_process(b->first_wait->tsk);
-	} else {
+		mod_timer(&b->hangcheck, wait_timeout());

If there are no waiters why schedule the timer? It won't do anything anyway AFAICS.

I need to give this some more thought tomorrow.

Regards,

Tvrtko

 		/* sanitize the IMR and unmask any auxiliary interrupts */
-		irq_disable(engine);
 	}

 	spin_unlock_irq(&b->lock);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 1271b6ebdd4d..1362169ef2b4 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -246,8 +246,8 @@ struct intel_engine_cs {

 		unsigned int hangcheck_interrupts;

+		bool irq_armed : 1;
 		bool irq_enabled : 1;
-		bool rpm_wakelock : 1;
 		I915_SELFTEST_DECLARE(bool mock : 1);
 	} breadcrumbs;

@@ -621,6 +621,8 @@ static inline bool intel_engine_wakeup(struct intel_engine_cs *engine)
 	return wait;
 }

+void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
+void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
 void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
 bool intel_breadcrumbs_busy(struct intel_engine_cs *engine);

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux