The submit-fence + load_balancing apis allow for us to execute a named
pair of engines in parallel; that this by submitting a request to one
engine, we can then use the generated submit-fence to submit a second
request to another engine and have it execute at the same time.
Furthermore, by specifying bonded pairs, we can direct the virtual
engine to use a particular engine in parallel to the first request.
v2: Measure load across all bonded siblings to check we don't
miss an accidental execution on another.
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
---
tests/i915/gem_exec_balancer.c | 277 +++++++++++++++++++++++++++++++--
1 file changed, 262 insertions(+), 15 deletions(-)
diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
index 40a2719c0..c76113476 100644
--- a/tests/i915/gem_exec_balancer.c
+++ b/tests/i915/gem_exec_balancer.c
@@ -98,9 +98,35 @@ list_engines(int i915, uint32_t class_mask, unsigned int *out)
return engines;
}
+static int __set_engines(int i915, uint32_t ctx,
+ const struct i915_engine_class_instance *ci,
+ unsigned int count)
+{
+ I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, count);
+ struct drm_i915_gem_context_param p = {
+ .ctx_id = ctx,
+ .param = I915_CONTEXT_PARAM_ENGINES,
+ .size = sizeof(engines),
+ .value = to_user_pointer(&engines)
+ };
+
+ engines.extensions = 0;
+ memcpy(engines.engines, ci, sizeof(engines.engines));
+
+ return __gem_context_set_param(i915, &p);
+}
+
+static void set_engines(int i915, uint32_t ctx,
+ const struct i915_engine_class_instance *ci,
+ unsigned int count)
+{
+ igt_assert_eq(__set_engines(i915, ctx, ci, count), 0);
+}
+
static int __set_load_balancer(int i915, uint32_t ctx,
const struct i915_engine_class_instance *ci,
- unsigned int count)
+ unsigned int count,
+ void *ext)
{
I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(balancer, count);
I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1 + count);
@@ -113,6 +139,7 @@ static int __set_load_balancer(int i915, uint32_t ctx,
memset(&balancer, 0, sizeof(balancer));
balancer.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
+ balancer.base.next_extension = to_user_pointer(ext);
igt_assert(count);
balancer.num_siblings = count;
@@ -131,9 +158,10 @@ static int __set_load_balancer(int i915, uint32_t ctx,
static void set_load_balancer(int i915, uint32_t ctx,
const struct i915_engine_class_instance *ci,
- unsigned int count)
+ unsigned int count,
+ void *ext)
{
- igt_assert_eq(__set_load_balancer(i915, ctx, ci, count), 0);
+ igt_assert_eq(__set_load_balancer(i915, ctx, ci, count, ext), 0);
}
static uint32_t load_balancer_create(int i915,
@@ -143,7 +171,7 @@ static uint32_t load_balancer_create(int i915,
uint32_t ctx;
ctx = gem_context_create(i915);
- set_load_balancer(i915, ctx, ci, count);
+ set_load_balancer(i915, ctx, ci, count, NULL);
return ctx;
}
@@ -287,6 +315,74 @@ static void invalid_balancer(int i915)
}
}
+static void invalid_bonds(int i915)
+{
+ I915_DEFINE_CONTEXT_ENGINES_BOND(bonds[16], 1);
+ I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1);
+ struct drm_i915_gem_context_param p = {
+ .ctx_id = gem_context_create(i915),
+ .param = I915_CONTEXT_PARAM_ENGINES,
+ .value = to_user_pointer(&engines),
+ .size = sizeof(engines),
+ };
+ uint32_t handle;
+ void *ptr;
+
+ memset(&engines, 0, sizeof(engines));
+ gem_context_set_param(i915, &p);
+
+ memset(bonds, 0, sizeof(bonds));
+ for (int n = 0; n < ARRAY_SIZE(bonds); n++) {
+ bonds[n].base.name = I915_CONTEXT_ENGINES_EXT_BOND;
+ bonds[n].base.next_extension =
+ n ? to_user_pointer(&bonds[n - 1]) : 0;
+ bonds[n].num_bonds = 1;
+ }
+ engines.extensions = to_user_pointer(&bonds);
+ gem_context_set_param(i915, &p);
+
+ bonds[0].base.next_extension = -1ull;
+ igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+
+ bonds[0].base.next_extension = to_user_pointer(&bonds[0]);
+ igt_assert_eq(__gem_context_set_param(i915, &p), -E2BIG);
+
+ engines.extensions = to_user_pointer(&bonds[1]);
+ igt_assert_eq(__gem_context_set_param(i915, &p), -E2BIG);
+ bonds[0].base.next_extension = 0;
+ gem_context_set_param(i915, &p);
+
+ handle = gem_create(i915, 4096 * 3);
+ ptr = gem_mmap__gtt(i915, handle, 4096 * 3, PROT_WRITE);
+ gem_close(i915, handle);
+
+ memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0]));
+ engines.extensions = to_user_pointer(ptr) + 4096;
+ gem_context_set_param(i915, &p);
+
+ memcpy(ptr, &bonds[0], sizeof(bonds[0]));
+ bonds[0].base.next_extension = to_user_pointer(ptr);
+ memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0]));
+ gem_context_set_param(i915, &p);
+
+ munmap(ptr, 4096);
+ igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+
+ bonds[0].base.next_extension = 0;
+ memcpy(ptr + 8192, &bonds[0], sizeof(bonds[0]));
+ bonds[0].base.next_extension = to_user_pointer(ptr) + 8192;
+ memcpy(ptr + 4096, &bonds[0], sizeof(bonds[0]));
+ gem_context_set_param(i915, &p);
+
+ munmap(ptr + 8192, 4096);
+ igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+
+ munmap(ptr + 4096, 4096);
+ igt_assert_eq(__gem_context_set_param(i915, &p), -EFAULT);
+
+ gem_context_destroy(i915, p.ctx_id);
+}
+
static void kick_kthreads(void)
{
usleep(20 * 1000); /* 20ms should be enough for ksoftirqd! */
@@ -346,6 +442,38 @@ static double measure_min_load(int pmu, unsigned int num, int period_us)
return min / (double)d_t;
}
+static void measure_all_load(int pmu, double *v, unsigned int num, int period_us)
+{
+ uint64_t data[2 + num];
+ uint64_t d_t, d_v[num];
+
+ kick_kthreads();
+
+ igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
+ for (unsigned int n = 0; n < num; n++)
+ d_v[n] = -data[2 + n];
+ d_t = -data[1];
+
+ usleep(period_us);
+
+ igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
+
+ d_t += data[1];
+ for (unsigned int n = 0; n < num; n++) {
+ d_v[n] += data[2 + n];
+ igt_debug("engine[%d]: %.1f%%\n",
+ n, d_v[n] / (double)d_t * 100);
+ v[n] = d_v[n] / (double)d_t;
+ }
+}
+
+static int add_pmu(int pmu, const struct i915_engine_class_instance *ci)
+{
+ return perf_i915_open_group(I915_PMU_ENGINE_BUSY(ci->engine_class,
+ ci->engine_instance),
+ pmu);
+}
+
static void check_individual_engine(int i915,
uint32_t ctx,
const struct i915_engine_class_instance *ci,
@@ -394,7 +522,7 @@ static void individual(int i915)
for (int pass = 0; pass < count; pass++) { /* approx. count! */
igt_assert(sizeof(*ci) == sizeof(int));
igt_permute_array(ci, count, igt_exchange_int);
- set_load_balancer(i915, ctx, ci, count);
+ set_load_balancer(i915, ctx, ci, count, NULL);
for (unsigned int n = 0; n < count; n++)
check_individual_engine(i915, ctx, ci, n);
}
@@ -406,6 +534,123 @@ static void individual(int i915)
gem_quiescent_gpu(i915);
}
+static void bonded(int i915, unsigned int flags)
+#define CORK 0x1
+{
+ I915_DEFINE_CONTEXT_ENGINES_BOND(bonds[16], 1);
+ struct i915_engine_class_instance *master_engines;
+ uint32_t master;
+
+ /*
+ * I915_CONTEXT_PARAM_ENGINE provides an extension that allows us
+ * to specify which engine(s) to pair with a parallel (EXEC_SUBMIT)
+ * request submitted to another engine.
+ */
+
+ master = gem_queue_create(i915);
+
+ memset(bonds, 0, sizeof(bonds));
+ for (int n = 0; n < ARRAY_SIZE(bonds); n++) {
+ bonds[n].base.name = I915_CONTEXT_ENGINES_EXT_BOND;
+ bonds[n].base.next_extension =
+ n ? to_user_pointer(&bonds[n - 1]) : 0;
+ bonds[n].num_bonds = 1;
+ }
+
+ for (int class = 0; class < 32; class++) {
+ struct i915_engine_class_instance *siblings;
+ unsigned int count, limit;
+ uint32_t ctx;
+ int pmu[16];
+ int n;
+
+ siblings = list_engines(i915, 1u << class, &count);
+ if (!siblings)
+ continue;
+
+ if (count < 2) {
+ free(siblings);
+ continue;
+ }
+
+ master_engines = list_engines(i915, ~(1u << class), &limit);
+ set_engines(i915, master, master_engines, limit);
+
+ limit = min(count, limit);
+ igt_assert(limit <= ARRAY_SIZE(bonds));
+ for (n = 0; n < limit; n++) {
+ bonds[n].master = master_engines[n];
+ bonds[n].engines[0] = siblings[n];
+ }
+
+ ctx = gem_context_clone(i915,
+ master, I915_CONTEXT_CLONE_VM,
+ I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE);
+ set_load_balancer(i915, ctx, siblings, count, &bonds[limit - 1]);
+
+ pmu[0] = -1;
+ for (n = 0; n < limit; n++)
+ pmu[n] = add_pmu(pmu[0], &siblings[n]);
+
+ for (n = 0; n < limit; n++) {
+ struct drm_i915_gem_execbuffer2 eb;
+ igt_spin_t *spin, *plug;
+ IGT_CORK_HANDLE(cork);
+ double v[limit];
+
+ igt_assert(siblings[n].engine_class != master_engines[n].engine_class);
+
+ plug = NULL;
+ if (flags & CORK) {
+ plug = __igt_spin_new(i915,
+ .ctx = master,
+ .engine = n,
+ .dependency = igt_cork_plug(&cork, i915));
+ }
+
+ spin = __igt_spin_new(i915,
+ .ctx = master,
+ .engine = n,
+ .flags = IGT_SPIN_FENCE_OUT);
+
+ eb = spin->execbuf;
+ eb.rsvd1 = ctx;
+ eb.rsvd2 = spin->out_fence;
+ eb.flags = I915_EXEC_FENCE_SUBMIT;
+ gem_execbuf(i915, &eb);
+
+ if (plug) {
+ igt_cork_unplug(&cork);
+ igt_spin_free(i915, plug);
+ }
+
+ measure_all_load(pmu[0], v, limit, 10000);
+ igt_spin_free(i915, spin);
+
+ igt_assert_f(v[n] > 0.90,
+ "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n",
+ n, siblings[n].engine_class, siblings[n].engine_instance,
+ 100 * v[n]);
+ for (int other = 0; other < limit; other++) {
+ if (other == n)
+ continue;
+
+ igt_assert_f(v[other] == 0,
+ "engine %d (class:instance %d:%d) was not idle, and actually %.1f%% busy\n",
+ other, siblings[other].engine_class, siblings[other].engine_instance,
+ 100 * v[other]);
+ }
+ }
+
+ close(pmu[0]);
+ gem_context_destroy(i915, ctx);
+ free(master_engines);
+ free(siblings);
+ }
+
+ gem_context_destroy(i915, master);
+}
+
static void indices(int i915)
{
I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, I915_EXEC_RING_MASK + 1);
@@ -564,13 +809,6 @@ static void busy(int i915)
gem_quiescent_gpu(i915);
}
-static int add_pmu(int pmu, const struct i915_engine_class_instance *ci)
-{
- return perf_i915_open_group(I915_PMU_ENGINE_BUSY(ci->engine_class,
- ci->engine_instance),
- pmu);
-}
-
static void full(int i915, unsigned int flags)
#define PULSE 0x1
#define LATE 0x2
@@ -860,7 +1098,7 @@ static void semaphore(int i915)
count = ARRAY_SIZE(block);
for (int i = 0; i < count; i++) {
- set_load_balancer(i915, block[i], ci, count);
+ set_load_balancer(i915, block[i], ci, count, NULL);
spin[i] = __igt_spin_new(i915,
.ctx = block[i],
.dependency = scratch);
@@ -871,7 +1109,7 @@ static void semaphore(int i915)
* or we let the vip through. If not, we hang.
*/
vip = gem_context_create(i915);
- set_load_balancer(i915, vip, ci, count);
+ set_load_balancer(i915, vip, ci, count, NULL);
ping(i915, vip, 0);
gem_context_destroy(i915, vip);
@@ -986,7 +1224,7 @@ static bool has_load_balancer(int i915)
int err;
ctx = gem_context_create(i915);
- err = __set_load_balancer(i915, ctx, &ci, 1);
+ err = __set_load_balancer(i915, ctx, &ci, 1, NULL);
gem_context_destroy(i915, ctx);
return err == 0;
@@ -1012,6 +1250,9 @@ igt_main
igt_subtest("invalid-balancer")
invalid_balancer(i915);
+ igt_subtest("invalid-bonds")
+ invalid_bonds(i915);
+
igt_subtest("individual")
individual(i915);
@@ -1046,6 +1287,12 @@ igt_main
igt_subtest("smoke")
smoketest(i915, 20);
+ igt_subtest("bonded-imm")
+ bonded(i915, 0);
+
+ igt_subtest("bonded-cork")
+ bonded(i915, CORK);
+
igt_fixture {
igt_stop_hang_detector();
}