Re: [PATCH i-g-t] i915/gem_exec_balancer: Throw a few hangs into the virtual pipelines

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 15/11/2019 14:59, Chris Wilson wrote:
Quoting Chris Wilson (2019-11-15 14:58:00)
Quoting Tvrtko Ursulin (2019-11-15 14:52:16)

On 15/11/2019 13:09, Chris Wilson wrote:
Quoting Tvrtko Ursulin (2019-11-15 13:02:24)

On 14/11/2019 19:15, Chris Wilson wrote:
Although a virtual engine itself has no hang detection; that is on the
underlying physical engines, it does provide a unique means for clients
to try and break the system. Try and break it before they do.

Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
---
    tests/i915/gem_exec_balancer.c | 105 +++++++++++++++++++++++++++++++++
    1 file changed, 105 insertions(+)

diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
index 70c4529b4..86028cfdd 100644
--- a/tests/i915/gem_exec_balancer.c
+++ b/tests/i915/gem_exec_balancer.c
@@ -24,6 +24,7 @@
    #include <sched.h>
#include "igt.h"
+#include "igt_gt.h"
    #include "igt_perf.h"
    #include "i915/gem_ring.h"
    #include "sw_sync.h"
@@ -1314,6 +1315,102 @@ static void semaphore(int i915)
        gem_quiescent_gpu(i915);
    }
+static void set_unbannable(int i915, uint32_t ctx)
+{
+     struct drm_i915_gem_context_param p = {
+             .ctx_id = ctx,
+             .param = I915_CONTEXT_PARAM_BANNABLE,
+     };
+
+     igt_assert_eq(__gem_context_set_param(i915, &p), 0);
+}
+
+static void hangme(int i915)
+{
+     struct drm_i915_gem_exec_object2 batch = {
+             .handle = batch_create(i915),
+     };
+
+     /*
+      * Fill the available engines with hanging virtual engines and verify
+      * that execution continues onto the second batch.
+      */
+
+     for (int class = 1; class < 32; class++) {
+             struct i915_engine_class_instance *ci;
+             struct client {
+                     igt_spin_t *spin[2];
+             } *client;
+             unsigned int count;
+             uint32_t bg;
+
+             ci = list_engines(i915, 1u << class, &count);
+             if (!ci)
+                     continue;
+
+             if (count < 2) {
+                     free(ci);
+                     continue;
+             }
+
+             client = malloc(sizeof(*client) * count);
+             igt_assert(client);
+
+             for (int i = 0; i < count; i++) {
+                     uint32_t ctx = gem_context_create(i915);
+                     struct client *c = &client[i];
+                     unsigned int flags;
+
+                     set_unbannable(i915, ctx);
+                     set_load_balancer(i915, ctx, ci, count, NULL);
+
+                     flags = IGT_SPIN_FENCE_OUT | IGT_SPIN_NO_PREEMPTION;
+                     for (int j = 0; j < ARRAY_SIZE(c->spin); j++)  {
+                             c->spin[j] = igt_spin_new(i915, ctx,
+                                                       .flags = flags);
+                             flags = IGT_SPIN_FENCE_OUT;
+                     }
+
+                     gem_context_destroy(i915, ctx);
+             }
+
+             /* Apply some background context to speed up hang detection */
+             bg = gem_context_create(i915);
+             set_engines(i915, bg, ci, count);
+             for (int i = 0; i < count; i++) {
+                     struct drm_i915_gem_execbuffer2 execbuf = {
+                             .buffers_ptr = to_user_pointer(&batch),
+                             .buffer_count = 1,
+                             .flags = i,
+                             .rsvd1 = bg,
+                     };
+                     gem_execbuf(i915, &execbuf);
+             }
+             gem_context_destroy(i915, bg);
+
+             for (int i = 0; i < count; i++) {
+                     struct client *c = &client[i];
+
+                     igt_debug("Waiting for client[%d].spin[%d]\n", i, 0);
+                     gem_sync(i915, c->spin[0]->handle);
+                     igt_assert_eq(sync_fence_status(c->spin[0]->out_fence),
+                                   -EIO);
+
+                     igt_debug("Waiting for client[%d].spin[%d]\n", i, 1);
+                     gem_sync(i915, c->spin[1]->handle);
+                     igt_assert_eq(sync_fence_status(c->spin[1]->out_fence),
+                                   -EIO);
+
+                     igt_spin_free(i915, c->spin[0]);
+                     igt_spin_free(i915, c->spin[1]);
+             }
+             free(client);
+     }
+
+     gem_close(i915, batch.handle);
+     gem_quiescent_gpu(i915);
+}
+
    static void smoketest(int i915, int timeout)
    {
        struct drm_i915_gem_exec_object2 batch[2] = {
@@ -1486,4 +1583,12 @@ igt_main
        igt_fixture {
                igt_stop_hang_detector();
        }
+
+     igt_subtest("hang") {
+             igt_hang_t hang = igt_allow_hang(i915, 0, 0);
+
+             hangme(i915);
+
+             igt_disallow_hang(i915, hang);
+     }
    }


Looks good. But do we need some core helpers to figure out when preempt
timeout is compiled out?

It should still work the same, but slower; 10s hang detection rather
than ~200ms.

You are talking about old hangcheck? I was thinking about all new
Kconfig's compiled out. No heartbeats, no preemption timeout. Still works?

Works even faster. :)

The spinners then get killed when the contexts are closed (default is
non-persistent contexts if you disable heartbeats entirely). The
challenge is really on the per-engine heartbeat controls to make sure we
kick off the dead contexts, but that's for the future.

And for the other kconfig, with no preemption timeout, you just get
regular heartbeats, so roughly the 10s hangcheck timeout.

Good then. No other opens:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux