Exercise the in-kernel load balancer checking that we can distribute
batches across the set of ctx->engines to avoid load.
v2: Many assorted improvements from Tvrtko.
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
---
tests/Makefile.am | 1 +
tests/Makefile.sources | 1 +
tests/i915/gem_exec_balancer.c | 1052 ++++++++++++++++++++++++++++++++
tests/meson.build | 7 +
4 files changed, 1061 insertions(+)
create mode 100644 tests/i915/gem_exec_balancer.c
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 5097debf6..c6af0aeaf 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -96,6 +96,7 @@ gem_close_race_LDADD = $(LDADD) -lpthread
gem_ctx_thrash_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
gem_ctx_thrash_LDADD = $(LDADD) -lpthread
gem_ctx_sseu_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
+i915_gem_exec_balancer_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
gem_exec_capture_LDADD = $(LDADD) -lz
gem_exec_parallel_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
gem_exec_parallel_LDADD = $(LDADD) -lpthread
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 2ef689023..17fca6d77 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -24,6 +24,7 @@ TESTS_progs = \
i915/gem_ctx_clone \
i915/gem_ctx_engines \
i915/gem_ctx_shared \
+ i915/gem_exec_balancer \
i915/gem_vm_create \
kms_3d \
kms_addfb_basic \
diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
new file mode 100644
index 000000000..40a2719c0
--- /dev/null
+++ b/tests/i915/gem_exec_balancer.c
@@ -0,0 +1,1052 @@
+/*
+ * Copyright © 2018-2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <sched.h>
+
+#include "igt.h"
+#include "igt_perf.h"
+#include "i915/gem_ring.h"
+#include "sw_sync.h"
+
+IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing");
+
+#define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS)
+
+static bool has_class_instance(int i915, uint16_t class, uint16_t instance)
+{
+ int fd;
+
+ fd = perf_i915_open(I915_PMU_ENGINE_BUSY(class, instance));
+ if (fd != -1) {
+ close(fd);
+ return true;
+ }
+
+ return false;
+}
+
+static struct i915_engine_class_instance *
+list_engines(int i915, uint32_t class_mask, unsigned int *out)
+{
+ unsigned int count = 0, size = 64;
+ struct i915_engine_class_instance *engines;
+
+ engines = malloc(size * sizeof(*engines));
+ if (!engines) {
+ *out = 0;
+ return NULL;
+ }
+
+ for (enum drm_i915_gem_engine_class class = I915_ENGINE_CLASS_RENDER;
+ class_mask;
+ class++, class_mask >>= 1) {
+ if (!(class_mask & 1))
+ continue;
+
+ for (unsigned int instance = 0;
+ instance < INSTANCE_COUNT;
+ instance++) {
+ if (!has_class_instance(i915, class, instance))
+ continue;
+
+ if (count == size) {
+ struct i915_engine_class_instance *e;
+
+ size *= 2;
+ e = realloc(engines, size*sizeof(*engines));
+ if (!e) {
+ *out = count;
+ return engines;
+ }
+
+ engines = e;
+ }
+
+ engines[count++] = (struct i915_engine_class_instance){
+ .engine_class = class,
+ .engine_instance = instance,
+ };
+ }
+ }
+
+ if (!count) {
+ free(engines);
+ engines = NULL;
+ }
+
+ *out = count;
+ return engines;
+}
+
+static int __set_load_balancer(int i915, uint32_t ctx,
+ const struct i915_engine_class_instance *ci,
+ unsigned int count)
+{
+ I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, count);
+ I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1 + count);
+ struct drm_i915_gem_context_param p = {
+ .ctx_id = ctx,
+ .param = I915_CONTEXT_PARAM_ENGINES,
+ .size = sizeof(engines),
+ .value = to_user_pointer(&engines)
+ };
+
+ memset(&balancer, 0, sizeof(balancer));
+ balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
+
+ igt_assert(count);
+ balancer.num_siblings = count;
+ memcpy(balancer.engines, ci, count * sizeof(*ci));
+
+ memset(&engines, 0, sizeof(engines));
+ engines.extensions = to_user_pointer(&balancer);
+ engines.engines[0].engine_class =
+ I915_ENGINE_CLASS_INVALID;
+ engines.engines[0].engine_instance =
+ I915_ENGINE_CLASS_INVALID_NONE;
+ memcpy(engines.engines + 1, ci, count * sizeof(*ci));
+
+ return __gem_context_set_param(i915, &p);
+}
+
+static void set_load_balancer(int i915, uint32_t ctx,
+ const struct i915_engine_class_instance *ci,
+ unsigned int count)
+{
+ igt_assert_eq(__set_load_balancer(i915, ctx, ci, count), 0);
+}
+
+static uint32_t load_balancer_create(int i915,
+ const struct i915_engine_class_instance *ci,
+ unsigned int count)
+{
+ uint32_t ctx;
+
+ ctx = gem_context_create(i915);
+ set_load_balancer(i915, ctx, ci, count);
+
+ return ctx;
+}
+
+static uint32_t __batch_create(int i915, uint32_t offset)
+{
+ const uint32_t bbe = MI_BATCH_BUFFER_END;
+ uint32_t handle;
+
+ handle = gem_create(i915, ALIGN(offset + 4, 4096));
+ gem_write(i915, handle, offset, &bbe, sizeof(bbe));
+
+ return handle;
+}
+
+static uint32_t batch_create(int i915)
+{
+ return __batch_create(i915, 0);
+}
+
+static void invalid_balancer(int i915)
+{
+ I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, 64);
+ I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 64);
+ struct drm_i915_gem_context_param p = {
+ .param = I915_CONTEXT_PARAM_ENGINES,
+ .value = to_user_pointer(&engines)
+ };
+ uint32_t handle;
+ void *ptr;
+
+ /*
+ * Assume that I915_CONTEXT_PARAM_ENGINE validates the array
+ * of engines[], our job is to determine if the load_balancer
+ * extension explodes.
+ */
+
+ for (int class = 0; class < 32; class++) {
+ struct i915_engine_class_instance *ci;
+ unsigned int count;
+
+ ci = list_engines(i915, 1 << class, &count);
+ if (!ci)
+ continue;
+
+ igt_assert_lte(count, 64);
+
+ p.ctx_id = gem_context_create(i915);
+ p.size = (sizeof(struct i915_context_param_engines) +
+ (count + 1) * sizeof(*engines.engines));
+
+ memset(&engines, 0, sizeof(engines));
+ engines.engines[0].engine_class = I915_ENGINE_CLASS_INVALID;
+ engines.engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
+ memcpy(engines.engines + 1, ci, count * sizeof(*ci));
+ gem_context_set_param(i915, &p);
+
+ engines.extensions = -1ull;
+ igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+
+ engines.extensions = 1ull;
+ igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+
+ memset(&balancer, 0, sizeof(balancer));
+ balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
+ balancer.num_siblings = count;
+ memcpy(balancer.engines, ci, count * sizeof(*ci));
+
+ engines.extensions = to_user_pointer(&balancer);
+ gem_context_set_param(i915, &p);
+
+ balancer.engine_index = 1;
+ igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
+
+ balancer.engine_index = count;
+ igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
+
+ balancer.engine_index = count + 1;
+ igt_assert_eq(__gem_context_set_param(i915, &p), -EINVAL);
+
+ balancer.engine_index = 0;
+ gem_context_set_param(i915, &p);
+
+ balancer.base.next_extension = to_user_pointer(&balancer);
+ igt_assert_eq(__gem_context_set_param(i915, &p), -EEXIST);
+
+ balancer.base.next_extension = -1ull;
+ igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+
+ handle = gem_create(i915, 4096 * 3);
+ ptr = gem_mmap__gtt(i915, handle, 4096 * 3, PROT_WRITE);
+ gem_close(i915, handle);
+
+ memset(&engines, 0, sizeof(engines));
+ engines.engines[0].engine_class = I915_ENGINE_CLASS_INVALID;
+ engines.engines[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
+ engines.engines[1].engine_class = I915_ENGINE_CLASS_INVALID;
+ engines.engines[1].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
+ memcpy(engines.engines + 2, ci, count * sizeof(ci));
+ p.size = (sizeof(struct i915_context_param_engines) +
+ (count + 2) * sizeof(*engines.engines));
+ gem_context_set_param(i915, &p);
+
+ balancer.base.next_extension = 0;
+ balancer.engine_index = 1;
+ engines.extensions = to_user_pointer(&balancer);
+ gem_context_set_param(i915, &p);
+
+ memcpy(ptr + 4096 - 8, &balancer, sizeof(balancer));
+ memcpy(ptr + 8192 - 8, &balancer, sizeof(balancer));
+ balancer.engine_index = 0;
+
+ engines.extensions = to_user_pointer(ptr) + 4096 - 8;
+ gem_context_set_param(i915, &p);
+
+ balancer.base.next_extension = engines.extensions;
+ engines.extensions = to_user_pointer(&balancer);
+ gem_context_set_param(i915, &p);
+
+ munmap(ptr, 4096);
+ igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+ engines.extensions = to_user_pointer(ptr) + 4096 - 8;
+ igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+
+ engines.extensions = to_user_pointer(ptr) + 8192 - 8;
+ gem_context_set_param(i915, &p);
+
+ balancer.base.next_extension = engines.extensions;
+ engines.extensions = to_user_pointer(&balancer);
+ gem_context_set_param(i915, &p);
+
+ munmap(ptr + 8192, 4096);
+ igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+ engines.extensions = to_user_pointer(ptr) + 8192 - 8;
+ igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+
+ munmap(ptr + 4096, 4096);
+
+ gem_context_destroy(i915, p.ctx_id);
+ free(ci);
+ }
+}
+
+static void kick_kthreads(void)
+{
+ usleep(20 * 1000); /* 20ms should be enough for ksoftirqd! */
+}
+
+static double measure_load(int pmu, int period_us)
+{
+ uint64_t data[2];
+ uint64_t d_t, d_v;
+
+ kick_kthreads();
+
+ igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
+ d_v = -data[0];
+ d_t = -data[1];
+
+ usleep(period_us);
+
+ igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
+ d_v += data[0];
+ d_t += data[1];
+
+ return d_v / (double)d_t;
+}
+
+static double measure_min_load(int pmu, unsigned int num, int period_us)
+{
+ uint64_t data[2 + num];
+ uint64_t d_t, d_v[num];
+ uint64_t min = -1, max = 0;
+
+ kick_kthreads();
+
+ igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
+ for (unsigned int n = 0; n < num; n++)
+ d_v[n] = -data[2 + n];
+ d_t = -data[1];
+
+ usleep(period_us);
+
+ igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
+
+ d_t += data[1];
+ for (unsigned int n = 0; n < num; n++) {
+ d_v[n] += data[2 + n];
+ igt_debug("engine[%d]: %.1f%%\n",
+ n, d_v[n] / (double)d_t * 100);
+ if (d_v[n] < min)
+ min = d_v[n];
+ if (d_v[n] > max)
+ max = d_v[n];
+ }
+
+ igt_debug("elapsed: %"PRIu64"ns, load [%.1f, %.1f]%%\n",
+ d_t, min / (double)d_t * 100, max / (double)d_t * 100);
+
+ return min / (double)d_t;
+}
+
+static void check_individual_engine(int i915,
+ uint32_t ctx,
+ const struct i915_engine_class_instance *ci,
+ int idx)
+{
+ igt_spin_t *spin;
+ double load;
+ int pmu;
+
+ pmu = perf_i915_open(I915_PMU_ENGINE_BUSY(ci[idx].engine_class,
+ ci[idx].engine_instance));
+
+ spin = igt_spin_new(i915, .ctx = ctx, .engine = idx + 1);
+ load = measure_load(pmu, 10000);
+ igt_spin_free(i915, spin);
+
+ close(pmu);
+
+ igt_assert_f(load > 0.90,
+ "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n",
+ idx, ci[idx].engine_class, ci[idx].engine_instance, load*100);
+}
+
+static void individual(int i915)
+{
+ uint32_t ctx;
+
+ /*
+ * I915_CONTEXT_PARAM_ENGINE allows us to index into the user
+ * supplied array from gem_execbuf(). Our check is to build the
+ * ctx->engine[] with various different engine classes, feed in
+ * a spinner and then ask pmu to confirm it the expected engine
+ * was busy.
+ */
+
+ ctx = gem_context_create(i915);
+
+ for (int class = 0; class < 32; class++) {
+ struct i915_engine_class_instance *ci;
+ unsigned int count;
+
+ ci = list_engines(i915, 1u << class, &count);
+ if (!ci)
+ continue;
+
+ for (int pass = 0; pass < count; pass++) { /* approx. count! */
+ igt_assert(sizeof(*ci) == sizeof(int));
+ igt_permute_array(ci, count, igt_exchange_int);
+ set_load_balancer(i915, ctx, ci, count);
+ for (unsigned int n = 0; n < count; n++)
+ check_individual_engine(i915, ctx, ci, n);
+ }
+
+ free(ci);
+ }
+
+ gem_context_destroy(i915, ctx);
+ gem_quiescent_gpu(i915);
+}
+
+static void indices(int i915)
+{
+ I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, I915_EXEC_RING_MASK + 1);
+ struct drm_i915_gem_context_param p = {
+ .ctx_id = gem_context_create(i915),
+ .param = I915_CONTEXT_PARAM_ENGINES,
+ .value = to_user_pointer(&engines)
+ };
+
+ struct drm_i915_gem_exec_object2 batch = {
+ .handle = batch_create(i915),
+ };
+
+ unsigned int nengines = 0;
+ void *balancers = NULL;
+
+ /*
+ * We can populate our engine map with multiple virtual engines.
+ * Do so.
+ */
+
+ for (int class = 0; class < 32; class++) {
+ struct i915_engine_class_instance *ci;
+ unsigned int count;
+
+ ci = list_engines(i915, 1u << class, &count);
+ if (!ci)
+ continue;
+
+ for (int n = 0; n < count; n++) {
+ I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(*balancer,
+ count);
+
+ engines.engines[nengines].engine_class =
+ I915_ENGINE_CLASS_INVALID;
+ engines.engines[nengines].engine_instance =
+ I915_ENGINE_CLASS_INVALID_NONE;
+
+ balancer = calloc(sizeof(*balancer), 1);
+ igt_assert(balancer);
+
+ balancer->base.name =
+ I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
+ balancer->base.next_extension =
+ to_user_pointer(balancers);
+ balancers = balancer;
+
+ balancer->engine_index = nengines++;
+ balancer->num_siblings = count;
+
+ memcpy(balancer->engines,
+ ci, count * sizeof(*ci));
+ }
+ free(ci);
+ }
+
+ igt_require(balancers);
+ engines.extensions = to_user_pointer(balancers);
+ p.size = (sizeof(struct i915_engine_class_instance) * nengines +
+ sizeof(struct i915_context_param_engines));
+ gem_context_set_param(i915, &p);
+
+ for (unsigned int n = 0; n < nengines; n++) {
+ struct drm_i915_gem_execbuffer2 eb = {
+ .buffers_ptr = to_user_pointer(&batch),
+ .buffer_count = 1,
+ .flags = n,
+ .rsvd1 = p.ctx_id,
+ };
+ igt_debug("Executing on index=%d\n", n);
+ gem_execbuf(i915, &eb);
+ }
+ gem_context_destroy(i915, p.ctx_id);
+
+ gem_sync(i915, batch.handle);
+ gem_close(i915, batch.handle);
+
+ while (balancers) {
+ struct i915_context_engines_load_balance *b, *n;
+
+ b = balancers;
+ n = from_user_pointer(b->base.next_extension);
+ free(b);
+
+ balancers = n;
+ }
+
+ gem_quiescent_gpu(i915);
+}
+
+static void busy(int i915)
+{
+ uint32_t scratch = gem_create(i915, 4096);
+
+ /*
+ * Check that virtual engines are reported via GEM_BUSY.
+ *
+ * When running, the batch will be on the real engine and report
+ * the actual class.
+ *
+ * Prior to running, if the load-balancer is across multiple
+ * classes we don't know which engine the batch will
+ * execute on, so we report them all!
+ *
+ * However, as we only support (and test) creating a load-balancer
+ * from engines of only one class, that can be propagated accurately
+ * through to GEM_BUSY.
+ */
+
+ for (int class = 0; class < 16; class++) {
+ struct drm_i915_gem_busy busy;
+ struct i915_engine_class_instance *ci;
+ unsigned int count;
+ igt_spin_t *spin[2];
+ uint32_t ctx;
+
+ ci = list_engines(i915, 1u << class, &count);
+ if (!ci)
+ continue;
+
+ ctx = load_balancer_create(i915, ci, count);
+ free(ci);
+
+ spin[0] = __igt_spin_new(i915,
+ .ctx = ctx,
+ .flags = IGT_SPIN_POLL_RUN);
+ spin[1] = __igt_spin_new(i915,
+ .ctx = ctx,
+ .dependency = scratch);
+
+ igt_spin_busywait_until_started(spin[0]);
+
+ /* Running: actual class */
+ busy.handle = spin[0]->handle;
+ do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
+ igt_assert_eq_u32(busy.busy, 1u << (class + 16));
+
+ /* Queued(read): expected class */
+ busy.handle = spin[1]->handle;
+ do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
+ igt_assert_eq_u32(busy.busy, 1u << (class + 16));
+
+ /* Queued(write): expected class */
+ busy.handle = scratch;
+ do_ioctl(i915, DRM_IOCTL_I915_GEM_BUSY, &busy);
+ igt_assert_eq_u32(busy.busy,
+ (1u << (class + 16)) | (class + 1));
+
+ igt_spin_free(i915, spin[1]);
+ igt_spin_free(i915, spin[0]);
+
+ gem_context_destroy(i915, ctx);
+ }
+
+ gem_close(i915, scratch);
+ gem_quiescent_gpu(i915);
+}
+
+static int add_pmu(int pmu, const struct i915_engine_class_instance *ci)
+{
+ return perf_i915_open_group(I915_PMU_ENGINE_BUSY(ci->engine_class,
+ ci->engine_instance),
+ pmu);
+}
+
+static void full(int i915, unsigned int flags)
+#define PULSE 0x1
+#define LATE 0x2
+{
+ struct drm_i915_gem_exec_object2 batch = {
+ .handle = batch_create(i915),
+ };
+
+ if (flags & LATE)
+ igt_require_sw_sync();
+
+ /*
+ * I915_CONTEXT_PARAM_ENGINE changes the meaning of engine selector in
+ * execbuf to utilize our own map, into which we replace I915_EXEC_DEFAULT
+ * to provide an automatic selection from the other ctx->engine[]. It
+ * employs load-balancing to evenly distribute the workload the
+ * array. If we submit N spinners, we expect them to be simultaneously
+ * running across N engines and use PMU to confirm that the entire
+ * set of engines are busy.
+ *
+ * We complicate matters by interspersing short-lived tasks to
+ * challenge the kernel to search for space in which to insert new
+ * batches.
+ */
+
+ for (int class = 0; class < 32; class++) {
+ struct i915_engine_class_instance *ci;
+ igt_spin_t *spin = NULL;
+ IGT_CORK_FENCE(cork);
+ unsigned int count;
+ double load;
+ int fence = -1;
+ int *pmu;
+
+ ci = list_engines(i915, 1u << class, &count);
+ if (!ci)
+ continue;
+
+ pmu = malloc(sizeof(*pmu) * count);
+ igt_assert(pmu);
+
+ if (flags & LATE)
+ fence = igt_cork_plug(&cork, i915);
+
+ pmu[0] = -1;
+ for (unsigned int n = 0; n < count; n++) {
+ uint32_t ctx;
+
+ pmu[n] = add_pmu(pmu[0], &ci[n]);
+
+ if (flags & PULSE) {
+ struct drm_i915_gem_execbuffer2 eb = {
+ .buffers_ptr = to_user_pointer(&batch),
+ .buffer_count = 1,
+ .rsvd2 = fence,
+ .flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
+ };
+ gem_execbuf(i915, &eb);
+ }
+
+ /*
+ * Each spinner needs to be one a new timeline,
+ * otherwise they will just sit in the single queue
+ * and not run concurrently.
+ */
+ ctx = load_balancer_create(i915, ci, count);
+
+ if (spin == NULL) {
+ spin = __igt_spin_new(i915, .ctx = ctx);
+ } else {
+ struct drm_i915_gem_execbuffer2 eb = {
+ .buffers_ptr = spin->execbuf.buffers_ptr,
+ .buffer_count = spin->execbuf.buffer_count,
+ .rsvd1 = ctx,
+ .rsvd2 = fence,
+ .flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
+ };
+ gem_execbuf(i915, &eb);
+ }
+
+ gem_context_destroy(i915, ctx);
+ }
+
+ if (flags & LATE) {
+ igt_cork_unplug(&cork);
+ close(fence);
+ }
+
+ load = measure_min_load(pmu[0], count, 10000);
+ igt_spin_free(i915, spin);
+
+ close(pmu[0]);
+ free(pmu);
+
+ free(ci);
+
+ igt_assert_f(load > 0.90,
+ "minimum load for %d x class:%d was found to be only %.1f%% busy\n",
+ count, class, load*100);
+ gem_quiescent_gpu(i915);
+ }
+
+ gem_close(i915, batch.handle);
+ gem_quiescent_gpu(i915);
+}
+
+static const char *class_to_str(int class)
+{
+ const char *str[] = {
+ [I915_ENGINE_CLASS_RENDER] = "rcs",
+ [I915_ENGINE_CLASS_COPY] = "bcs",
+ [I915_ENGINE_CLASS_VIDEO] = "vcs",
+ [I915_ENGINE_CLASS_VIDEO_ENHANCE] = "vecs",
+ };
+
+ if (class < ARRAY_SIZE(str))
+ return str[class];
+
+ return "unk";
+}
+
+static void nop(int i915)
+{
+ struct drm_i915_gem_exec_object2 batch = {
+ .handle = batch_create(i915),
+ };
+
+ for (int class = 0; class < 32; class++) {
+ struct i915_engine_class_instance *ci;
+ unsigned int count;
+ uint32_t ctx;
+
+ ci = list_engines(i915, 1u << class, &count);
+ if (!ci)
+ continue;
+
+ ctx = load_balancer_create(i915, ci, count);
+
+ for (int n = 0; n < count; n++) {
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&batch),
+ .buffer_count = 1,
+ .flags = n + 1,
+ .rsvd1 = ctx,
+ };
+ struct timespec tv = {};
+ unsigned long nops;
+ double t;
+
+ igt_nsec_elapsed(&tv);
+ nops = 0;
+ do {
+ for (int r = 0; r < 1024; r++)
+ gem_execbuf(i915, &execbuf);
+ nops += 1024;
+ } while (igt_seconds_elapsed(&tv) < 2);
+ gem_sync(i915, batch.handle);
+
+ t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
+ igt_info("%s:%d %.3fus\n", class_to_str(class), n, t);
+ }
+
+ {
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&batch),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ };
+ struct timespec tv = {};
+ unsigned long nops;
+ double t;
+
+ igt_nsec_elapsed(&tv);
+ nops = 0;
+ do {
+ for (int r = 0; r < 1024; r++)
+ gem_execbuf(i915, &execbuf);
+ nops += 1024;
+ } while (igt_seconds_elapsed(&tv) < 2);
+ gem_sync(i915, batch.handle);
+
+ t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
+ igt_info("%s:* %.3fus\n", class_to_str(class), t);
+ }
+
+
+ igt_fork(child, count) {
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&batch),
+ .buffer_count = 1,
+ .flags = child + 1,
+ .rsvd1 = gem_context_clone(i915, ctx,
+ I915_CONTEXT_CLONE_ENGINES, 0),
+ };
+ struct timespec tv = {};
+ unsigned long nops;
+ double t;
+
+ igt_nsec_elapsed(&tv);
+ nops = 0;
+ do {
+ for (int r = 0; r < 1024; r++)
+ gem_execbuf(i915, &execbuf);
+ nops += 1024;
+ } while (igt_seconds_elapsed(&tv) < 2);
+ gem_sync(i915, batch.handle);
+
+ t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
+ igt_info("[%d] %s:%d %.3fus\n",
+ child, class_to_str(class), child, t);
+
+ memset(&tv, 0, sizeof(tv));
+ execbuf.flags = 0;
+
+ igt_nsec_elapsed(&tv);
+ nops = 0;
+ do {
+ for (int r = 0; r < 1024; r++)
+ gem_execbuf(i915, &execbuf);
+ nops += 1024;
+ } while (igt_seconds_elapsed(&tv) < 2);
+ gem_sync(i915, batch.handle);
+
+ t = igt_nsec_elapsed(&tv) * 1e-3 / nops;
+ igt_info("[%d] %s:* %.3fus\n",
+ child, class_to_str(class), t);
+
+ gem_context_destroy(i915, execbuf.rsvd1);
+ }
+
+ igt_waitchildren();
+
+ gem_context_destroy(i915, ctx);
+ free(ci);
+ }
+
+ gem_close(i915, batch.handle);
+ gem_quiescent_gpu(i915);
+}
+
+static void ping(int i915, uint32_t ctx, unsigned int engine)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .flags = engine,
+ .rsvd1 = ctx,
+ };
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+ gem_close(i915, obj.handle);
+}
+
+static void semaphore(int i915)
+{
+ uint32_t block[2], scratch;
+ igt_spin_t *spin[3];
+
+ /*
+ * If we are using HW semaphores to launch serialised requests
+ * on different engine concurrently, we want to verify that real
+ * work is unimpeded.
+ */
+ igt_require(gem_scheduler_has_preemption(i915));
+
+ block[0] = gem_context_create(i915);
+ block[1] = gem_context_create(i915);
+
+ scratch = gem_create(i915, 4096);
+ spin[2] = igt_spin_new(i915, .dependency = scratch);
+ for (int class = 1; class < 32; class++) {
+ struct i915_engine_class_instance *ci;
+ unsigned int count;
+ uint32_t vip;
+
+ ci = list_engines(i915, 1u << class, &count);
+ if (!ci)
+ continue;
+
+ if (count < ARRAY_SIZE(block))
+ continue;
+
+ /* Ensure that we completely occupy all engines in this group */
+ count = ARRAY_SIZE(block);
+
+ for (int i = 0; i < count; i++) {
+ set_load_balancer(i915, block[i], ci, count);
+ spin[i] = __igt_spin_new(i915,
+ .ctx = block[i],
+ .dependency = scratch);
+ }
+
+ /*
+ * Either we haven't blocked both engines with semaphores,
+ * or we let the vip through. If not, we hang.
+ */
+ vip = gem_context_create(i915);
+ set_load_balancer(i915, vip, ci, count);
+ ping(i915, vip, 0);
+ gem_context_destroy(i915, vip);
+
+ for (int i = 0; i < count; i++)
+ igt_spin_free(i915, spin[i]);
+
+ free(ci);
+ }
+ igt_spin_free(i915, spin[2]);
+ gem_close(i915, scratch);
+
+ gem_context_destroy(i915, block[1]);
+ gem_context_destroy(i915, block[0]);
+
+ gem_quiescent_gpu(i915);
+}
+
+static void smoketest(int i915, int timeout)
+{
+ struct drm_i915_gem_exec_object2 batch[2] = {
+ { .handle = __batch_create(i915, 16380) }
+ };
+ unsigned int ncontext = 0;
+ uint32_t *contexts = NULL;
+ uint32_t *handles = NULL;
+
+ igt_require_sw_sync();
+
+ for (int class = 0; class < 32; class++) {
+ struct i915_engine_class_instance *ci;
+ unsigned int count = 0;
+
+ ci = list_engines(i915, 1u << class, &count);
+ if (!ci || count < 2) {
+ free(ci);
+ continue;
+ }
+
+ ncontext += 128;
+ contexts = realloc(contexts, sizeof(*contexts) * ncontext);
+ igt_assert(contexts);
+
+ for (unsigned int n = ncontext - 128; n < ncontext; n++) {
+ contexts[n] = load_balancer_create(i915, ci, count);
+ igt_assert(contexts[n]);
+ }
+
+ free(ci);
+ }
+ igt_debug("Created %d virtual engines (one per context)\n", ncontext);
+ igt_require(ncontext);
+
+ contexts = realloc(contexts, sizeof(*contexts) * ncontext * 4);
+ igt_assert(contexts);
+ memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts));
+ ncontext *= 2;
+ memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts));
+ ncontext *= 2;
+
+ handles = malloc(sizeof(*handles) * ncontext);
+ igt_assert(handles);
+ for (unsigned int n = 0; n < ncontext; n++)
+ handles[n] = gem_create(i915, 4096);
+
+ igt_until_timeout(timeout) {
+ unsigned int count = 1 + (rand() % (ncontext - 1));
+ IGT_CORK_FENCE(cork);
+ int fence = igt_cork_plug(&cork, i915);
+
+ for (unsigned int n = 0; n < count; n++) {
+ struct drm_i915_gem_execbuffer2 eb = {
+ .buffers_ptr = to_user_pointer(batch),
+ .buffer_count = ARRAY_SIZE(batch),
+ .rsvd1 = contexts[n],
+ .rsvd2 = fence,
+ .flags = I915_EXEC_BATCH_FIRST | I915_EXEC_FENCE_IN,
+ };
+ batch[1].handle = handles[n];
+ gem_execbuf(i915, &eb);
+ }
+ igt_permute_array(handles, count, igt_exchange_int);
+
+ igt_cork_unplug(&cork);
+ for (unsigned int n = 0; n < count; n++)
+ gem_sync(i915, handles[n]);
+
+ close(fence);
+ }
+
+ for (unsigned int n = 0; n < ncontext; n++) {
+ gem_close(i915, handles[n]);
+ __gem_context_destroy(i915, contexts[n]);
+ }
+ free(handles);
+ free(contexts);
+ gem_close(i915, batch[0].handle);
+}
+
+static bool has_context_engines(int i915)
+{
+ struct drm_i915_gem_context_param p = {
+ .param = I915_CONTEXT_PARAM_ENGINES,
+ };
+
+ return __gem_context_set_param(i915, &p) == 0;
+}
+
+static bool has_load_balancer(int i915)
+{
+ struct i915_engine_class_instance ci = {};
+ uint32_t ctx;
+ int err;
+
+ ctx = gem_context_create(i915);
+ err = __set_load_balancer(i915, ctx, &ci, 1);
+ gem_context_destroy(i915, ctx);
+
+ return err == 0;
+}
+
+igt_main
+{
+ int i915 = -1;
+
+ igt_skip_on_simulation();
+
+ igt_fixture {
+ i915 = drm_open_driver(DRIVER_INTEL);
+ igt_require_gem(i915);
+
+ gem_require_contexts(i915);
+ igt_require(has_context_engines(i915));
+ igt_require(has_load_balancer(i915));
+
+ igt_fork_hang_detector(i915);
+ }
+
+ igt_subtest("invalid-balancer")
+ invalid_balancer(i915);
+
+ igt_subtest("individual")
+ individual(i915);
+
+ igt_subtest("indices")
+ indices(i915);
+
+ igt_subtest("busy")
+ busy(i915);
+
+ igt_subtest_group {
+ static const struct {
+ const char *name;
+ unsigned int flags;
+ } phases[] = {
+ { "", 0 },
+ { "-pulse", PULSE },
+ { "-late", LATE },
+ { "-late-pulse", PULSE | LATE },
+ { }
+ };
+ for (typeof(*phases) *p = phases; p->name; p++)
+ igt_subtest_f("full%s", p->name)
+ full(i915, p->flags);
+ }
+
+ igt_subtest("nop")
+ nop(i915);
+
+ igt_subtest("semaphore")
+ semaphore(i915);
+
+ igt_subtest("smoke")
+ smoketest(i915, 20);
+
+ igt_fixture {
+ igt_stop_hang_detector();
+ }
+}
diff --git a/tests/meson.build b/tests/meson.build
index b7e56103d..20366b68b 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -293,6 +293,13 @@ test_executables += executable('gem_eio',
install : true)
test_list += 'gem_eio'
+test_executables += executable('gem_exec_balancer', 'i915/gem_exec_balancer.c',
+ dependencies : test_deps + [ lib_igt_perf ],
+ install_dir : libexecdir,
+ install_rpath : libexecdir_rpathdir,
+ install : true)
+test_progs += 'gem_exec_balancer'
+
test_executables += executable('gem_mocs_settings',
join_paths('i915', 'gem_mocs_settings.c'),
dependencies : test_deps + [ lib_igt_perf ],