Re: [PATCH i-g-t] tests/i915/gem_ctx_switch: Update with engine discovery

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 27/06/2019 11:35, Chris Wilson wrote:
Quoting Tvrtko Ursulin (2019-06-27 11:20:19)
diff --git a/tests/i915/gem_ctx_switch.c b/tests/i915/gem_ctx_switch.c
index 647911d4c42e..de39748344b8 100644
--- a/tests/i915/gem_ctx_switch.c
+++ b/tests/i915/gem_ctx_switch.c
@@ -55,7 +55,7 @@ static double elapsed(const struct timespec *start, const struct timespec *end)
static int measure_qlen(int fd,
                         struct drm_i915_gem_execbuffer2 *execbuf,
-                       unsigned int *engine, unsigned int nengine,
+                       const struct intel_engine_data *engines,
                         int timeout)
  {
         const struct drm_i915_gem_exec_object2 * const obj =
@@ -63,15 +63,17 @@ static int measure_qlen(int fd,
         uint32_t ctx[64];
         int min = INT_MAX, max = 0;
- for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+       for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
                 ctx[i] = gem_context_create(fd);
+               gem_context_set_all_engines(fd, ctx[i]);
+       }
- for (unsigned int n = 0; n < nengine; n++) {
+       for (unsigned int n = 0; n < engines->nengines; n++) {
                 uint64_t saved = execbuf->flags;
                 struct timespec tv = {};
                 int q;
- execbuf->flags |= engine[n];
+               execbuf->flags |= engines->engines[n].flags;
for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
                         execbuf->rsvd1 = ctx[i];
@@ -90,7 +92,8 @@ static int measure_qlen(int fd,
                  * Be conservative and aim not to overshoot timeout, so scale
                  * down by 8 for hopefully a max of 12.5% error.
                  */
-               q = ARRAY_SIZE(ctx) * timeout * 1e9 / igt_nsec_elapsed(&tv) / 8 + 1;
+               q = ARRAY_SIZE(ctx) * timeout * 1e9 / igt_nsec_elapsed(&tv) /
+                   8 + 1;
                 if (q < min)
                         min = q;
                 if (q > max)
@@ -107,7 +110,7 @@ static int measure_qlen(int fd,
  }
static void single(int fd, uint32_t handle,
-                  const struct intel_execution_engine *e,
+                  const struct intel_execution_engine2 *e2,
                    unsigned flags,
                    const int ncpus,
                    int timeout)
@@ -125,13 +128,14 @@ static void single(int fd, uint32_t handle,
         shared = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
         igt_assert(shared != MAP_FAILED);
- gem_require_ring(fd, e->exec_id | e->flags);
-
         for (n = 0; n < 64; n++) {
                 if (flags & QUEUE)
                         contexts[n] = gem_queue_create(fd);
                 else
                         contexts[n] = gem_context_create(fd);
+
+               if (gem_context_has_engine_map(fd, 0))
+                       gem_context_set_all_engines(fd, contexts[n]);

That was a moment of doubt on how well setting engines on a "queue"
worked :)

Why it wouldn't work?


         }
memset(&obj, 0, sizeof(obj));
@@ -152,12 +156,12 @@ static void single(int fd, uint32_t handle,
         execbuf.buffers_ptr = to_user_pointer(&obj);
         execbuf.buffer_count = 1;
         execbuf.rsvd1 = contexts[0];
-       execbuf.flags = e->exec_id | e->flags;
+       execbuf.flags = e2->flags;
         execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
         execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
         igt_require(__gem_execbuf(fd, &execbuf) == 0);
         if (__gem_execbuf(fd, &execbuf)) {
-               execbuf.flags = e->exec_id | e->flags;
+               execbuf.flags = e2->flags;
                 reloc.target_handle = obj.handle;
                 gem_execbuf(fd, &execbuf);
         }
@@ -190,7 +194,8 @@ static void single(int fd, uint32_t handle,
                 clock_gettime(CLOCK_MONOTONIC, &now);
igt_info("[%d] %s: %'u cycles: %.3fus%s\n",
-                        child, e->name, count, elapsed(&start, &now)*1e6 / count,
+                        child, e2->name, count,
+                        elapsed(&start, &now) * 1e6 / count,
                          flags & INTERRUPTIBLE ? " (interruptible)" : "");
shared[child].elapsed = elapsed(&start, &now);
@@ -209,7 +214,7 @@ static void single(int fd, uint32_t handle,
                 }
igt_info("Total %s: %'lu cycles: %.3fus%s\n",
-                        e->name, total, max*1e6 / total,
+                        e2->name, total, max*1e6 / total,
                          flags & INTERRUPTIBLE ? " (interruptible)" : "");
         }
@@ -223,25 +228,20 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
  {
         struct drm_i915_gem_execbuffer2 execbuf;
         struct drm_i915_gem_exec_object2 obj[2];
-       unsigned int engine[16], e;
-       const char *name[16];
+       struct intel_engine_data engines = { };
         uint32_t contexts[65];
-       unsigned int nengine;
         int n, qlen;
- nengine = 0;
-       for_each_physical_engine(fd, e) {
-               engine[nengine] = e;
-               name[nengine] = e__->name;
-               nengine++;
-       }
-       igt_require(nengine);
+       engines = intel_init_engine_list(fd, 0);
+       igt_require(engines.nengines);
for (n = 0; n < ARRAY_SIZE(contexts); n++) {
                 if (flags & QUEUE)
                         contexts[n] = gem_queue_create(fd);
                 else
                         contexts[n] = gem_context_create(fd);
+
+               gem_context_set_all_engines(fd, contexts[n]);
         }
memset(obj, 0, sizeof(obj));
@@ -256,7 +256,7 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
         igt_require(__gem_execbuf(fd, &execbuf) == 0);
         gem_sync(fd, handle);
- qlen = measure_qlen(fd, &execbuf, engine, nengine, timeout);
+       qlen = measure_qlen(fd, &execbuf, &engines, timeout);
         igt_info("Using timing depth of %d batches\n", qlen);
execbuf.buffers_ptr = to_user_pointer(obj);
@@ -264,13 +264,15 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
for (int pot = 2; pot <= 64; pot *= 2) {
                 for (int nctx = pot - 1; nctx <= pot + 1; nctx++) {
-                       igt_fork(child, nengine) {
+                       igt_fork(child, engines.nengines) {
                                 struct timespec start, now;
                                 unsigned int count = 0;
obj[0].handle = gem_create(fd, 4096);
-                               execbuf.flags |= engine[child];
-                               for (int loop = 0; loop < ARRAY_SIZE(contexts); loop++) {
+                               execbuf.flags |= engines.engines[child].flags;
+                               for (int loop = 0;
+                                    loop < ARRAY_SIZE(contexts);
+                                    loop++) {
                                         execbuf.rsvd1 = contexts[loop];
                                         gem_execbuf(fd, &execbuf);
                                 }
@@ -279,7 +281,8 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
                                 clock_gettime(CLOCK_MONOTONIC, &start);
                                 do {
                                         for (int loop = 0; loop < qlen; loop++) {
-                                               execbuf.rsvd1 = contexts[loop % nctx];
+                                               execbuf.rsvd1 =
+                                                       contexts[loop % nctx];
                                                 gem_execbuf(fd, &execbuf);
                                         }
                                         count += qlen;
@@ -291,8 +294,11 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
                                 gem_close(fd, obj[0].handle);
igt_info("[%d:%d] %s: %'u cycles: %.3fus%s (elapsed: %.3fs)\n",
-                                        nctx, child, name[child], count, elapsed(&start, &now)*1e6 / count,
-                                        flags & INTERRUPTIBLE ? " (interruptible)" : "",
+                                        nctx, child,
+                                        engines.engines[child].name, count,
+                                        elapsed(&start, &now) * 1e6 / count,
+                                        flags & INTERRUPTIBLE ?
+                                        " (interruptible)" : "",
                                          elapsed(&start, &now));
                         }
                         igt_waitchildren();
@@ -306,6 +312,7 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
  igt_main
  {
         const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+       const struct intel_execution_engine2 *e2;
         const struct intel_execution_engine *e;
         static const struct {
                 const char *name;
@@ -338,7 +345,49 @@ igt_main
                 igt_fork_hang_detector(fd);
         }
+ /* Legacy testing must be first. */
         for (e = intel_execution_engines; e->name; e++) {
+               e2 = gem_eb_flags_to_engine(e->exec_id | e->flags);
+               if (!e2)
+                       continue; /* I915_EXEC_BSD with no ring selectors */
+
+               for (typeof(*phases) *p = phases; p->name; p++) {
+                       igt_subtest_group {
+                               igt_fixture {
+                                       gem_require_ring(fd, e2->flags);
+                                       if (p->require)
+                                               igt_require(p->require(fd));
+                               }
+
+                               igt_subtest_f("legacy-%s%s%s",
+                                             e->name,
+                                             *p->name ? "-" : "",
+                                             p->name)
+                                       single(fd, light, e2, p->flags, 1, 5);
+
+                               igt_skip_on_simulation();
+
+                               igt_subtest_f("legacy-%s%s-heavy%s",
+                                             e->exec_id == 0 ? "basic-" : "",
+                                             e->name,
+                                             p->name)
+                                       single(fd, heavy, e2, p->flags, 1, 5);
+                               igt_subtest_f("legacy-forked-%s%s",
+                                             e->name,
+                                             p->name)
+                                       single(fd, light, e2, p->flags, ncpus,
+                                              150);
+                               igt_subtest_f("legacy-forked-%s-heavy%s",
+                                             e->name,
+                                             p->name)
+                                       single(fd, heavy, e2, p->flags, ncpus,
+                                              150);
+                       }
+               }
+       }
+
+       /* Must come after legacy subtests. */
+       __for_each_physical_engine(fd, e2) {
                 for (typeof(*phases) *p = phases; p->name; p++) {
                         igt_subtest_group {
                                 igt_fixture {
@@ -346,33 +395,40 @@ igt_main
                                                 igt_require(p->require(fd));
                                 }
- igt_subtest_f("%s%s%s", e->exec_id == 0 ? "basic-" : "", e->name, p->name)
-                                       single(fd, light, e, p->flags, 1, 5);
+                               igt_subtest_f("%s%s%s",
+                                             e2->name,
+                                             *p->name ? "-" : "",
+                                             p->name)
+                                       single(fd, light, e2, p->flags, 1, 5);
igt_skip_on_simulation(); - igt_subtest_f("%s%s-heavy%s", e->exec_id == 0 ? "basic-" : "", e->name, p->name)
-                                       single(fd, heavy, e, p->flags, 1, 5);
-                               igt_subtest_f("forked-%s%s", e->name, p->name)
-                                       single(fd, light, e, p->flags, ncpus, 150);
-                               igt_subtest_f("forked-%s-heavy%s", e->name, p->name)
-                                       single(fd, heavy, e, p->flags, ncpus, 150);
+                               igt_subtest_f("%s-heavy%s", e2->name, p->name)
+                                       single(fd, heavy, e2, p->flags, 1, 5);
+                               igt_subtest_f("%s-forked%s", e2->name, p->name)
+                                       single(fd, light, e2, p->flags, ncpus,
+                                              150);
+                               igt_subtest_f("%s-forked-heavy%s",
+                                             e2->name,
+                                             p->name)
+                                       single(fd, heavy, e2, p->flags, ncpus,
+                                              150);
                         }
                 }
         }

Ok, that looks like what I would expect. For crux tests, we iterate over
both the legacy ring selector, and along the engine[] indices.

-       igt_subtest("basic-all-light")
+       igt_subtest("all-light")
                 all(fd, light, 0, 5);
-       igt_subtest("basic-all-heavy")
+       igt_subtest("all-heavy")
                 all(fd, heavy, 0, 5);

And for "all" tests where we are just trying to utilise all engines at
once, we only care about the underlying HW utilisation and so the new
query interface works best.

Cool, so no complaints?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux