Re: [PATCH v7] drm/i915: Slaughter the thundering i915_wait_request herd

Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxxxxxxx> · Mon, 7 Dec 2015 15:08:28 +0000

Hi,

On 03/12/15 16:22, Chris Wilson wrote:
One particularly stressful scenario consists of many independent tasks
all competing for GPU time and waiting upon the results (e.g. realtime
transcoding of many, many streams). One bottleneck in particular is that
each client waits on its own results, but every client is woken up after
every batchbuffer - hence the thunder of hooves as then every client must
do its heavyweight dance to read a coherent seqno to see if it is the
lucky one.

Ideally, we only want one client to wake up after the interrupt and
check its request for completion. Since the requests must retire in
order, we can select the first client on the oldest request to be woken.
Once that client has completed his wait, we can then wake up the
next client and so on.

v2: Convert from a kworker per engine into a dedicated kthread for the
bottom-half.
v3: Rename request members and tweak comments.
v4: Use a per-engine spinlock in the breadcrumbs bottom-half.
v5: Fix race in locklessly checking waiter status and kicking the task on
adding a new waiter.
v6: Fix deciding when to force the timer to hide missing interrupts.
v7: Move the bottom-half from the kthread to the first client process.

I like the idea a lot so I'll make some comments even before you post v9 
or later. :)

Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: "Rogozhkin, Dmitry V" <dmitry.v.rogozhkin@xxxxxxxxx>
Cc: "Gong, Zhipeng" <zhipeng.gong@xxxxxxxxx>
Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxxxxxxx>
Cc: Dave Gordon <david.s.gordon@xxxxxxxxx>
---
  drivers/gpu/drm/i915/Makefile            |   1 +
  drivers/gpu/drm/i915/i915_debugfs.c      |   2 +-
  drivers/gpu/drm/i915/i915_drv.h          |   3 +-
  drivers/gpu/drm/i915/i915_gem.c          | 108 ++++++-----------
  drivers/gpu/drm/i915/i915_gpu_error.c    |   2 +-
  drivers/gpu/drm/i915/i915_irq.c          |  17 +--
  drivers/gpu/drm/i915/intel_breadcrumbs.c | 197 +++++++++++++++++++++++++++++++
  drivers/gpu/drm/i915/intel_lrc.c         |   5 +-
  drivers/gpu/drm/i915/intel_ringbuffer.c  |   5 +-
  drivers/gpu/drm/i915/intel_ringbuffer.h  |  47 +++++++-
  10 files changed, 297 insertions(+), 90 deletions(-)
  create mode 100644 drivers/gpu/drm/i915/intel_breadcrumbs.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 0851de07bd13..d3b9d3618719 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -35,6 +35,7 @@ i915-y += i915_cmd_parser.o \
  	  i915_gem_userptr.o \
  	  i915_gpu_error.o \
  	  i915_trace_points.o \
+	  intel_breadcrumbs.o \
  	  intel_lrc.o \
  	  intel_mocs.o \
  	  intel_ringbuffer.o \
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 0583eda642d7..68fbe4d1f91d 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2323,7 +2323,7 @@ static int count_irq_waiters(struct drm_i915_private *i915)
  	int i;

  	for_each_ring(ring, i915, i)
-		count += ring->irq_refcount;
+		count += intel_engine_has_waiter(ring);

Don't care how many any longer? Okay in debugfs, but also in error capture?

  	return count;
  }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2acf2cf0b836..30d470934874 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1383,7 +1383,7 @@ struct i915_gpu_error {
  #define I915_STOP_RING_ALLOW_WARN      (1 << 30)

  	/* For missed irq/seqno simulation. */
-	unsigned int test_irq_rings;
+	unsigned long test_irq_rings;
  };

  enum modeset_restore {
@@ -2800,7 +2800,6 @@ ibx_disable_display_interrupt(struct drm_i915_private *dev_priv, uint32_t bits)
  	ibx_display_interrupt_update(dev_priv, bits, 0);
  }

-
  /* i915_gem.c */
  int i915_gem_create_ioctl(struct drm_device *dev, void *data,
  			  struct drm_file *file_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6e051402e340..8dcbac584e84 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1125,17 +1125,6 @@ i915_gem_check_wedge(unsigned reset_counter, bool interruptible)
  	return 0;
  }

-static void fake_irq(unsigned long data)
-{
-	wake_up_process((struct task_struct *)data);
-}
-
-static bool missed_irq(struct drm_i915_private *dev_priv,
-		       struct intel_engine_cs *ring)
-{
-	return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
-}
-
  static unsigned long local_clock_us(unsigned *cpu)
  {
  	unsigned long t;
@@ -1183,9 +1172,6 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
  	 * takes to sleep on a request, on the order of a microsecond.
  	 */

-	if (req->ring->irq_refcount)
-		return -EBUSY;
-
  	/* Only spin if we know the GPU is processing this request */
  	if (!i915_gem_request_started(req, false))
  		return -EAGAIN;
@@ -1204,9 +1190,6 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
  		cpu_relax_lowlatency();
  	}

-	if (i915_gem_request_completed(req, false))
-		return 0;
-
  	return -EAGAIN;
  }

@@ -1231,26 +1214,19 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
  			s64 *timeout,
  			struct intel_rps_client *rps)
  {
-	struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	const bool irq_test_in_progress =
-		ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
  	int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
-	DEFINE_WAIT(wait);
-	unsigned long timeout_expire;
+	struct intel_breadcrumb wait;
+	unsigned long timeout_remain;
  	s64 before, now;
  	int ret;

-	WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
-
  	if (list_empty(&req->list))
  		return 0;

  	if (i915_gem_request_completed(req, true))
  		return 0;

-	timeout_expire = 0;
+	timeout_remain = MAX_SCHEDULE_TIMEOUT;
  	if (timeout) {
  		if (WARN_ON(*timeout < 0))
  			return -EINVAL;
@@ -1258,34 +1234,43 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
  		if (*timeout == 0)
  			return -ETIME;

-		timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout);
+		timeout_remain = nsecs_to_jiffies_timeout(*timeout);
  	}

-	if (INTEL_INFO(dev_priv)->gen >= 6)
-		gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
-
  	/* Record current time in case interrupted by signal, or wedged */
  	trace_i915_gem_request_wait_begin(req);
  	before = ktime_get_raw_ns();

-	/* Optimistic spin for the next jiffie before touching IRQs */
-	ret = __i915_spin_request(req, state);
-	if (ret == 0)
-		goto out;
+	if (INTEL_INFO(req->i915)->gen >= 6)
+		gen6_rps_boost(req->i915, rps, req->emitted_jiffies);

-	if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) {
-		ret = -ENODEV;
-		goto out;
+	/* Optimistic spin for the next jiffie before touching IRQs */

Exactly the opposite! :)

+	if (intel_breadcrumbs_add_waiter(req, &wait)) {
+		ret = __i915_spin_request(req, state);
+		if (ret == 0)
+			goto out;
  	}

  	for (;;) {
-		struct timer_list timer;
-
-		prepare_to_wait(&ring->irq_queue, &wait, state);
+		set_task_state(wait.task, state);
+
+		/* Ensure our read of the seqno is coherent so that we
+		 * do not "miss an interrupt" (i.e. if this is the last
+		 * request and the seqno write from the GPU is not visible
+		 * by the time the interrupt fires, we will see that the
+		 * request is incomplete and go back to sleep awaiting
+		 * another interrupt that will never come.)
+		 *
+		 * Strictly, we only need to do this once after an interrupt,
+		 * but it is easier and safer to do it every time the waiter
+		 * is woken.
+		 */
+		if (i915_gem_request_completed(req, false)) {
+			ret = 0;
+			break;
+		}

-		/* We need to check whether any gpu reset happened in between
-		 * the caller grabbing the seqno and now ... */
-		if (req->reset_counter != i915_reset_counter(&dev_priv->gpu_error)) {
+		if (unlikely(req->reset_counter != i915_reset_counter(&req->i915->gpu_error))) {
  			/* As we do not requeue the request over a GPU reset,
  			 * if one does occur we know that the request is
  			 * effectively complete.
@@ -1294,46 +1279,21 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
  			break;
  		}

-		if (i915_gem_request_completed(req, false)) {
-			ret = 0;
-			break;
-		}
-
-		if (signal_pending_state(state, current)) {
+		if (signal_pending_state(state, wait.task)) {
  			ret = -ERESTARTSYS;
  			break;
  		}

-		if (timeout && time_after_eq(jiffies, timeout_expire)) {
+		timeout_remain = io_schedule_timeout(timeout_remain);
+		if (timeout_remain == 0) {
  			ret = -ETIME;
  			break;
  		}
-
-		i915_queue_hangcheck(dev_priv);
-
-		timer.function = NULL;
-		if (timeout || missed_irq(dev_priv, ring)) {
-			unsigned long expire;
-
-			setup_timer_on_stack(&timer, fake_irq, (unsigned long)current);
-			expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire;
-			mod_timer(&timer, expire);
-		}
-
-		io_schedule();
-
-		if (timer.function) {
-			del_singleshot_timer_sync(&timer);
-			destroy_timer_on_stack(&timer);
-		}
  	}
-	if (!irq_test_in_progress)
-		ring->irq_put(ring);
-
-	finish_wait(&ring->irq_queue, &wait);
-
+	__set_task_state(wait.task, TASK_RUNNING);
  out:
  	now = ktime_get_raw_ns();
+	intel_breadcrumbs_remove_waiter(req, &wait);
  	trace_i915_gem_request_wait_end(req);

  	if (timeout) {
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 06ca4082735b..f805d117f3d1 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -900,7 +900,7 @@ static void i915_record_ring_state(struct drm_device *dev,
  		ering->instdone = I915_READ(GEN2_INSTDONE);
  	}

-	ering->waiting = waitqueue_active(&ring->irq_queue);
+	ering->waiting = intel_engine_has_waiter(ring);
  	ering->instpm = I915_READ(RING_INSTPM(ring->mmio_base));
  	ering->seqno = ring->get_seqno(ring, false);
  	ering->acthd = intel_ring_get_active_head(ring);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 28c0329f8281..57b113132fd7 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -996,12 +996,11 @@ static void ironlake_rps_change_irq_handler(struct drm_device *dev)

  static void notify_ring(struct intel_engine_cs *ring)
  {
-	if (!intel_ring_initialized(ring))
+	if (ring->i915 == NULL)

Glanced something about this in some other thread, maybe best to keep it 
consolidated in one patch?

  		return;

  	trace_i915_gem_request_notify(ring);
-
-	wake_up_all(&ring->irq_queue);
+	intel_engine_wakeup(ring);
  }

  static void vlv_c0_read(struct drm_i915_private *dev_priv,
@@ -1083,7 +1082,7 @@ static bool any_waiters(struct drm_i915_private *dev_priv)
  	int i;

  	for_each_ring(ring, dev_priv, i)
-		if (ring->irq_refcount)
+		if (intel_engine_has_waiter(ring))
  			return true;

  	return false;
@@ -2411,7 +2410,7 @@ static void i915_error_wake_up(struct drm_i915_private *dev_priv,

  	/* Wake up __wait_seqno, potentially holding dev->struct_mutex. */
  	for_each_ring(ring, dev_priv, i)
-		wake_up_all(&ring->irq_queue);
+		intel_engine_wakeup(ring);

  	/* Wake up intel_crtc_wait_for_pending_flips, holding crtc->mutex. */
  	wake_up_all(&dev_priv->pending_flip_queue);
@@ -2986,16 +2985,18 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
  			if (ring_idle(ring, seqno)) {
  				ring->hangcheck.action = HANGCHECK_IDLE;

-				if (waitqueue_active(&ring->irq_queue)) {
+				if (intel_engine_has_waiter(ring)) {
  					/* Issue a wake-up to catch stuck h/w. */
  					if (!test_and_set_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings)) {
-						if (!(dev_priv->gpu_error.test_irq_rings & intel_ring_flag(ring)))
+						if (!test_bit(ring->id, &dev_priv->gpu_error.test_irq_rings))
  							DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
  								  ring->name);
  						else
  							DRM_INFO("Fake missed irq on %s\n",
  								 ring->name);
-						wake_up_all(&ring->irq_queue);
+						mod_delayed_work(system_highpri_wq,
+								 &ring->breadcrumbs.irq,
+								 0);
  					}
  					/* Safeguard against driver failure */
  					ring->hangcheck.score += BUSY;
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
new file mode 100644
index 000000000000..17d51269b774
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/kthread.h>

Not any more.

+
+#include "i915_drv.h"
+
+static bool irq_get(struct intel_engine_cs *engine)
+{
+	if (test_bit(engine->id, &engine->i915->gpu_error.test_irq_rings))
+		return false;

I don't understand why knowledge of this debugfs interface needs to be 
sprinkled around? I mean, I am not even 100% sure what the debugfs 
interface is for. But it looks to be about software masking user 
interrupts on a ring? Why not just mask the interrupt delivery based on 
this mask at the source?

+
+	if (!intel_irqs_enabled(engine->i915))
+		return false;
+
+	return engine->irq_get(engine);
+}
+
+static void intel_breadcrumbs_irq(struct work_struct *work)
+{
+	struct intel_breadcrumbs *b =
+		container_of(work, struct intel_breadcrumbs, irq.work);
+	struct intel_engine_cs *engine =
+		container_of(b, struct intel_engine_cs, breadcrumbs);
+
+	/* We unmask the user-interrupt in the background so that the
+	 * first-waiter can do a little busywaiting before sleeping and
+	 * not incur additional latency from setting up the IRQ.
+	 *
+	 * The worker also persists in case we cannot enable interrupts,
+	 * or if we have previously seen seqno/interrupt incoherency
+	 * ("missed interrupt" syndrome). Here the worker will wake up
+	 * every jiffie in order to kick the original waiter to do the
+	 * coherent seqno check. (Not as efficient as having a timer
+	 * source kick the waiter directly, but this should only have to
+	 * be used in rare cases where we try to workaround some
+	 * hardware/driver failures.)
+	 */
+
+	spin_lock(&b->lock);
+	if (b->first_waiter) {
+		if (!b->rpm_wakelock) {
+			intel_runtime_pm_get(engine->i915);
+			b->rpm_wakelock = true;
+		}
+
+		if (!b->irq_enabled) {
+			b->irq_enabled = irq_get(engine);
+			/* If we were delayed, the original waiter may have
+			 * gone to sleep already and we may have missed the
+			 * interrupt. So after enabling the interrupt,
+			 * kick the waiter.
+			 */
+			wake_up_process(b->first_waiter);
+		}
+
+		/* No interrupts? Kick the waiter every jiffie! */
+		if (!b->irq_enabled ||
+		    test_bit(engine->id,
+			     &engine->i915->gpu_error.missed_irq_rings)) {
+			wake_up_process(b->first_waiter);
+			queue_delayed_work(system_highpri_wq, &b->irq, 1);
+		}
+	} else {
+		if (b->irq_enabled) {
+			engine->irq_put(engine);
+			b->irq_enabled = false;
+		}
+		if (b->rpm_wakelock) {
+			intel_runtime_pm_put(engine->i915);
+			b->rpm_wakelock = false;
+		}
+	}
+	spin_unlock(&b->lock);
+}
+
+inline struct intel_breadcrumb *to_wait(struct rb_node *node)
+{
+	if (node == NULL)
+		return NULL;
+
+	return container_of(node, struct intel_breadcrumb, node);
+}
+
+bool intel_breadcrumbs_add_waiter(struct drm_i915_gem_request *request,
+				  struct intel_breadcrumb *wait)
+{
+	struct intel_breadcrumbs *b = &request->ring->breadcrumbs;
+	struct rb_node **p, *parent;
+	bool first = false;
+
+	wait->task = current;
+	wait->seqno = request->seqno;
+
+	spin_lock(&b->lock);
+
+	/* First? Unmask the user interrupt */
+	if (b->first_waiter == NULL)
+		queue_delayed_work(system_highpri_wq, &b->irq, 0);

Who enables interrupts if the worker gets scheduled and completes before 
settig b->first_waiter below?

+
+	/* Insert the request into the retirment ordered list
+	 * of waiters by walking the rbtree. If we are the oldest
+	 * seqno in the tree (the first to be retired), then
+	 * set ourselves as the bottom-half.
+	 */
+	first = true;
+	parent = NULL;
+	p = &b->requests.rb_node;
+	while (*p) {
+		parent = *p;
+		if (i915_seqno_passed(wait->seqno, to_wait(parent)->seqno)) {
+			p = &parent->rb_right;
+			first = false;
+		} else
+			p = &parent->rb_left;
+	}
+	rb_link_node(&wait->node, parent, p);
+	rb_insert_color(&wait->node, &b->requests);

WARN_ON(!first && !b->first_waiter) maybe?

+	if (first)
+		b->first_waiter = wait->task;
+
+	spin_unlock(&b->lock);
+
+	return first;
+}
+
+void intel_breadcrumbs_remove_waiter(struct drm_i915_gem_request *request,
+				     struct intel_breadcrumb *wait)
+{
+	struct intel_breadcrumbs *b = &request->ring->breadcrumbs;
+
+	spin_lock(&b->lock);
+
+	if (b->first_waiter == wait->task) {
+		struct intel_breadcrumb *next;
+		struct task_struct *task;
+
+		/* We are the current bottom-half. Find the next candidate,
+		 * the first waiter in the queue on the remaining oldest
+		 * request. As multiple seqnos may complete in the time it
+		 * takes us to wake up and find the next waiter, we have to
+		 * wake up that waiter for it to perform its own coherent
+		 * completion check.
+		 */

Equally, why wouldn't we wake up all waiters for which the requests have 
been completed?

Would be a cheap check here and it would save a cascading growing 
latency as one task wakes up the next one.

Even more benefit for multiple waiters on the same request.

+		task = NULL;
+		next = to_wait(rb_next(&wait->node));
+		if (next)
+			task = next->task;
+
+		b->first_waiter = task;
+		if (task)
+			wake_up_process(task);
+		else
+			/* Disable the user interrupts */
+			mod_delayed_work(system_highpri_wq, &b->irq, 1);
+	}
+
+	rb_erase(&wait->node, &b->requests);
+	spin_unlock(&b->lock);
+}
+
+void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
+{
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+	spin_lock_init(&b->lock);
+	INIT_DELAYED_WORK(&b->irq, intel_breadcrumbs_irq);
+}
+
+void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
+{
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+	flush_delayed_work(&b->irq);
+}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 08b982a6ae81..ce5119e2bcef 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1904,6 +1904,8 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
  	i915_cmd_parser_fini_ring(ring);
  	i915_gem_batch_pool_fini(&ring->batch_pool);

+	intel_engine_fini_breadcrumbs(ring);
+
  	if (ring->status_page.obj) {
  		kunmap(sg_page(ring->status_page.obj->pages->sgl));
  		ring->status_page.obj = NULL;
@@ -1920,10 +1922,11 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
  	ring->buffer = NULL;

  	ring->dev = dev;
+	ring->i915 = to_i915(dev);
  	INIT_LIST_HEAD(&ring->active_list);
  	INIT_LIST_HEAD(&ring->request_list);
  	i915_gem_batch_pool_init(dev, &ring->batch_pool);
-	init_waitqueue_head(&ring->irq_queue);
+	intel_engine_init_breadcrumbs(ring);

  	INIT_LIST_HEAD(&ring->buffers);
  	INIT_LIST_HEAD(&ring->execlist_queue);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 511efe556d73..8a0f02e73a4e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2157,6 +2157,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
  	WARN_ON(ring->buffer);

  	ring->dev = dev;
+	ring->i915 = to_i915(dev);
  	INIT_LIST_HEAD(&ring->active_list);
  	INIT_LIST_HEAD(&ring->request_list);
  	INIT_LIST_HEAD(&ring->execlist_queue);
@@ -2164,7 +2165,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
  	i915_gem_batch_pool_init(dev, &ring->batch_pool);
  	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));

-	init_waitqueue_head(&ring->irq_queue);
+	intel_engine_init_breadcrumbs(ring);

  	ringbuf = intel_engine_create_ringbuffer(ring, 32 * PAGE_SIZE);
  	if (IS_ERR(ringbuf))
@@ -2225,6 +2226,8 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)

  	i915_cmd_parser_fini_ring(ring);
  	i915_gem_batch_pool_fini(&ring->batch_pool);
+
+	intel_engine_fini_breadcrumbs(ring);
  }

  static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 5d1eb206151d..7dde9310df1b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -157,9 +157,31 @@ struct  intel_engine_cs {
  #define LAST_USER_RING (VECS + 1)
  	u32		mmio_base;
  	struct		drm_device *dev;
+	struct drm_i915_private *i915;
  	struct intel_ringbuffer *buffer;
  	struct list_head buffers;

+	/* Rather than have every client wait upon all user interrupts,
+	 * with the herd waking after every interrupt and each doing the
+	 * heavyweight seqno dance, we delegate the task to the first client.
+	 * After the interrupt, we wake up one client, who does the heavyweight
+	 * coherent seqno read and either goes back to sleep (if incomplete),
+	 * or wakes up the next client in the queue and so forth.
+	 *
+	 * With respect to walking the entire list of waiters in a single

s/With respect to/In contrast to/ or "Compared to"?

+	 * dedicated bottom-half, we reduce the latency of the first waiter
+	 * by avoiding a context switch, but incur additional coherent seqno
+	 * reads when following the chain of request breadcrumbs.
+	 */
+	struct intel_breadcrumbs {
+		spinlock_t lock; /* protects the lists of requests */
+		struct rb_root requests; /* sorted by retirement */
+		struct task_struct *first_waiter; /* bh for user interrupts */
+		struct delayed_work irq; /* used to enable or fake inerrupts */
+		bool irq_enabled : 1;
+		bool rpm_wakelock : 1;

Are bitfields worth it? There aren't that many engine so maybe it loses 
more in terms of instructions generated.

+	} breadcrumbs;
+
  	/*
  	 * A pool of objects to use as shadow copies of client batch buffers
  	 * when the command parser is enabled. Prevents the client from
@@ -303,8 +325,6 @@ struct  intel_engine_cs {

  	bool gpu_caches_dirty;

-	wait_queue_head_t irq_queue;
-
  	struct intel_context *default_context;
  	struct intel_context *last_context;

@@ -506,4 +526,27 @@ void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf);
  /* Legacy ringbuffer specific portion of reservation code: */
  int intel_ring_reserve_space(struct drm_i915_gem_request *request);

+/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
+struct intel_breadcrumb {
+	struct rb_node node;
+	struct task_struct *task;
+	u32 seqno;
+};
+void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
+bool intel_breadcrumbs_add_waiter(struct drm_i915_gem_request *request,
+				  struct intel_breadcrumb *wait);
+void intel_breadcrumbs_remove_waiter(struct drm_i915_gem_request *request,
+				     struct intel_breadcrumb *wait);
+static inline bool intel_engine_has_waiter(struct intel_engine_cs *engine)
+{
+	return READ_ONCE(engine->breadcrumbs.first_waiter);
+}
+static inline void intel_engine_wakeup(struct intel_engine_cs *engine)
+{
+	struct task_struct *task = READ_ONCE(engine->breadcrumbs.first_waiter);
+	if (task)
+		wake_up_process(task);

What guarantees task is a valid task at this point?

I know it is so extremely unlikely, but it can theoretically sample the 
first_waiter which then exists and disappears bafore the wake_up_process 
call.

+}
+void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
+
  #endif /* _INTEL_RINGBUFFER_H_ */


Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx