Re: [PATCH i-g-t] i915/gem_exec_schedule: Trick semaphores into a GPU hang

Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxxxxxxx> · Tue, 9 Apr 2019 16:39:43 +0100

On 09/04/2019 14:56, Chris Wilson wrote:
If we have two tasks running on xcs0 and xcs1 independently, but who
queue subsequent work onto rcs, we may insert semaphores before the rcs
work and pick unwisely which task to run first. To maximise throughput,
we want to run on rcs whichever task is ready first. Conversely, if we
pick wrongly that can be used to trigger a GPU hang with unaware
userspace.

Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
---
  tests/i915/gem_exec_schedule.c | 61 ++++++++++++++++++++++++++++++++++
  1 file changed, 61 insertions(+)

diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 3df319bcc..d6f109540 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -404,6 +404,65 @@ static void semaphore_userlock(int i915)
  	igt_spin_batch_free(i915, spin);
  }
  
+static void semaphore_codependency(int i915)
+{
+	struct {
+		igt_spin_t *xcs, *rcs;
+	} task[2];
+	unsigned int engine;
+	int i;
+
+	/*
+	 * Consider two tasks, task A runs on (xcs0, rcs0) and task B
+	 * on (xcs1, rcs0). That is they must both run a dependent
+	 * batch on rcs0, after first running in parallel on separate
+	 * engines. To maximise throughput, we want the shorter xcs task
+	 * to start on rcs first. However, if we insert semaphores we may
+	 * pick wrongly and end up running the requests in the least
+	 * optimal order.
+	 */
+
+	i = 0;
+	for_each_physical_engine(i915, engine) {
+		uint32_t ctx;
+
+		if (engine == I915_EXEC_RENDER)
+			continue;
+
+		ctx = gem_context_create(i915);
+
+		task[i].xcs =
+			__igt_spin_batch_new(i915,
+					     .ctx = ctx,
+					     .engine = engine,
+					     .flags = IGT_SPIN_POLL_RUN);
+		igt_spin_busywait_until_running(task[i].xcs);
+
+		/* Common rcs tasks will be queued in FIFO */
+		task[i].rcs =
+			__igt_spin_batch_new(i915,
+					     .ctx = ctx,
+					     .engine = I915_EXEC_RENDER,
+					     .dependency = task[i].xcs->handle);
+
+		gem_context_destroy(i915, ctx);
+
+		if (++i == ARRAY_SIZE(task))
+			break;
+	}
+	igt_require(i == ARRAY_SIZE(task));
+
+	/* Since task[0] was queued first, it will be first in queue for rcs */
+	igt_spin_batch_end(task[1].xcs);
+	igt_spin_batch_end(task[1].rcs);
+	gem_sync(i915, task[1].rcs->handle); /* to hang if task[0] hogs rcs */
+
+	for (i = 0; i < ARRAY_SIZE(task); i++) {
+		igt_spin_batch_free(i915, task[i].xcs);
+		igt_spin_batch_free(i915, task[i].rcs);
+	}
+}
+
  static void reorder(int fd, unsigned ring, unsigned flags)
  #define EQUAL 1
  {
@@ -1393,6 +1452,8 @@ igt_main
  
  		igt_subtest("semaphore-user")
  			semaphore_userlock(fd);
+		igt_subtest("semaphore-codependency")
+			semaphore_codependency(fd);
  
  		igt_subtest("smoketest-all")
  			smoketest(fd, ALL_ENGINES, 30);


Just need can_store_dword for code correctness before IGT_SPIN_POLL_RUN 
and with that:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx