Exercise the in-kernel load balancer checking that we can distribute batches across the set of ctx->engines to avoid load. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- tests/Makefile.am | 1 + tests/Makefile.sources | 1 + tests/gem_exec_balancer.c | 469 ++++++++++++++++++++++++++++++++++++++ tests/meson.build | 7 + 4 files changed, 478 insertions(+) create mode 100644 tests/gem_exec_balancer.c diff --git a/tests/Makefile.am b/tests/Makefile.am index ee5a7c5e8..71cdcaaaa 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -107,6 +107,7 @@ gem_close_race_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) gem_close_race_LDADD = $(LDADD) -lpthread gem_ctx_thrash_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) gem_ctx_thrash_LDADD = $(LDADD) -lpthread +gem_exec_balancer_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la gem_exec_parallel_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) gem_exec_parallel_LDADD = $(LDADD) -lpthread gem_fence_thrash_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) diff --git a/tests/Makefile.sources b/tests/Makefile.sources index 4e42a4f05..70a144798 100644 --- a/tests/Makefile.sources +++ b/tests/Makefile.sources @@ -67,6 +67,7 @@ TESTS_progs = \ gem_exec_async \ gem_exec_await \ gem_exec_bad_domains \ + gem_exec_balancer \ gem_exec_basic \ gem_exec_big \ gem_exec_blt \ diff --git a/tests/gem_exec_balancer.c b/tests/gem_exec_balancer.c new file mode 100644 index 000000000..954696cb8 --- /dev/null +++ b/tests/gem_exec_balancer.c @@ -0,0 +1,469 @@ +/* + * Copyright © 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <sched.h> + +#include "igt.h" +#include "igt_perf.h" +#include "i915/gem_ring.h" +#include "sw_sync.h" + +IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing"); + +#define I915_CONTEXT_PARAM_ENGINES 0x7 + +struct class_instance { + uint32_t class; + uint32_t instance; +}; +#define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS) + +static bool has_class_instance(int i915, uint32_t class, uint32_t instance) +{ + int fd; + + fd = perf_i915_open(I915_PMU_ENGINE_BUSY(class, instance)); + if (fd != -1) { + close(fd); + return true; + } + + return false; +} + +static struct class_instance * +list_engines(int i915, uint32_t class_mask, unsigned int *out) +{ + unsigned int count = 0, size = 64; + struct class_instance *engines; + + engines = malloc(size * sizeof(*engines)); + if (!engines) { + *out = 0; + return NULL; + } + + for (enum drm_i915_gem_engine_class class = I915_ENGINE_CLASS_RENDER; + class_mask; + class++, class_mask >>= 1) { + if (!(class_mask & 1)) + continue; + + for (unsigned int instance = 0; + instance < INSTANCE_COUNT; + instance++) { + if (!has_class_instance(i915, class, instance)) + continue; + + if (count == size) { + struct class_instance *e; + + size *= 2; + e = realloc(engines, size*sizeof(*engines)); + if (!e) { + *out = count; + return engines; + } + + engines = e; + } + + engines[count++] = (struct class_instance){ + .class = class, + .instance = instance, + }; + } + } + + if (!count) { + free(engines); + engines = NULL; + } + + *out = count; + return engines; +} + +static int __set_load_balancer(int i915, uint32_t ctx, + const struct class_instance *ci, + unsigned int count) +{ +#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 + struct balancer { + uint64_t next_extension; + uint64_t name; + + uint64_t flags; + uint64_t mask; + + uint64_t mbz[4]; + } balancer = { + .name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE, + .mask = ~0ull, + }; + struct engines { + uint64_t extension; + uint64_t class_instance[count]; + } engines; + struct drm_i915_gem_context_param p = { + .ctx_id = ctx, + .param = I915_CONTEXT_PARAM_ENGINES, + .size = sizeof(engines), + .value = to_user_pointer(&engines) + }; + + engines.extension = to_user_pointer(&balancer); + memcpy(engines.class_instance, ci, sizeof(engines.class_instance)); + + return __gem_context_set_param(i915, &p); +} + +static void set_load_balancer(int i915, uint32_t ctx, + const struct class_instance *ci, + unsigned int count) +{ + igt_assert_eq(__set_load_balancer(i915, ctx, ci, count), 0); +} + +static uint32_t load_balancer_create(int i915, + const struct class_instance *ci, + unsigned int count) +{ + uint32_t ctx; + + ctx = gem_queue_create(i915); + set_load_balancer(i915, ctx, ci, count); + + return ctx; +} + +static void kick_kthreads(int period_us) +{ + sched_yield(); + usleep(period_us); +} + +static double measure_load(int pmu, int period_us) +{ + uint64_t data[2]; + uint64_t d_t, d_v; + + kick_kthreads(period_us); + + igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data)); + d_v = -data[0]; + d_t = -data[1]; + + usleep(period_us); + + igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data)); + d_v += data[0]; + d_t += data[1]; + + return d_v / (double)d_t; +} + +static double measure_min_load(int pmu, unsigned int num, int period_us) +{ + uint64_t data[2 + num]; + uint64_t d_t, d_v[num]; + uint64_t min = -1, max = 0; + + kick_kthreads(period_us); + + igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data)); + for (unsigned int n = 0; n < num; n++) + d_v[n] = -data[2 + n]; + d_t = -data[1]; + + usleep(period_us); + + igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data)); + + d_t += data[1]; + for (unsigned int n = 0; n < num; n++) { + d_v[n] += data[2 + n]; + igt_debug("engine[%d]: %.1f%%\n", + n, d_v[n] / (double)d_t * 100); + if (d_v[n] < min) + min = d_v[n]; + if (d_v[n] > max) + max = d_v[n]; + } + + igt_debug("elapsed: %"PRIu64"ns, load [%.1f, %.1f]%%\n", + d_t, min / (double)d_t * 100, max / (double)d_t * 100); + + return min / (double)d_t; +} + +static void check_individual_engine(int i915, + uint32_t ctx, + const struct class_instance *ci, + int idx) +{ + igt_spin_t *spin; + double load; + int pmu; + + pmu = perf_i915_open(I915_PMU_ENGINE_BUSY(ci[idx].class, + ci[idx].instance)); + + spin = igt_spin_batch_new(i915, .ctx = ctx, .engine = idx + 1); + load = measure_load(pmu, 10000); + igt_spin_batch_free(i915, spin); + + close(pmu); + + igt_assert_f(load > 0.90, + "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n", + idx, ci[idx].class, ci[idx].instance, load*100); +} + +static void individual(int i915) +{ + uint32_t ctx; + + /* + * I915_CONTEXT_PARAM_ENGINE allows us to index into the user + * supplied array from gem_execbuf(). Our check is to build the + * ctx->engine[] with various different engine classes, feed in + * a spinner and then ask pmu to confirm it the expected engine + * was busy. + */ + + ctx = gem_queue_create(i915); + + for (int mask = 0; mask < 32; mask++) { + struct class_instance *ci; + unsigned int count; + + ci = list_engines(i915, 1u << mask, &count); + if (!ci) + continue; + + igt_debug("Found %d engines of class %d\n", count, mask); + + for (int pass = 0; pass < count; pass++) { /* approx. count! */ + igt_permute_array(ci, count, igt_exchange_int64); + set_load_balancer(i915, ctx, ci, count); + for (unsigned int n = 0; n < count; n++) + check_individual_engine(i915, ctx, ci, n); + } + + free(ci); + } + + gem_context_destroy(i915, ctx); +} + +static int add_pmu(int pmu, const struct class_instance *ci) +{ + return perf_i915_open_group(I915_PMU_ENGINE_BUSY(ci->class, + ci->instance), + pmu); +} + +static uint32_t batch_create(int i915) +{ + const uint32_t bbe = MI_BATCH_BUFFER_END; + uint32_t handle; + + handle = gem_create(i915, 4096); + gem_write(i915, handle, 0, &bbe, sizeof(bbe)); + + return handle; +} + +static void full(int i915, unsigned int flags) +#define PULSE 0x1 +#define LATE 0x2 +{ + struct drm_i915_gem_exec_object2 batch = { + .handle = batch_create(i915), + }; + + if (flags & LATE) + igt_require_sw_sync(); + + /* + * I915_CONTEXT_PARAM_ENGINE changes the meaning of I915_EXEC_DEFAULT + * to provide an automatic selection from the ctx->engine[]. It + * employs load-balancing to evenly distribute the workload the + * array. If we submit N spinners, we expect them to be simultaneously + * running across N engines and use PMU to confirm that the entire + * set of engines are busy. + * + * We complicate matters by interpersing shortlived tasks to challenge + * the kernel to search for space in which to insert new batches. + */ + + + for (int mask = 0; mask < 32; mask++) { + struct class_instance *ci; + igt_spin_t *spin = NULL; + unsigned int count; + IGT_CORK_FENCE(cork); + double load; + int fence = -1; + int *pmu; + + ci = list_engines(i915, 1u << mask, &count); + if (!ci) + continue; + + igt_debug("Found %d engines of class %d\n", count, mask); + + pmu = malloc(sizeof(*pmu) * count); + igt_assert(pmu); + + if (flags & LATE) + fence = igt_cork_plug(&cork, i915); + + pmu[0] = -1; + for (unsigned int n = 0; n < count; n++) { + uint32_t ctx; + + pmu[n] = add_pmu(pmu[0], &ci[n]); + + if (flags & PULSE) { + struct drm_i915_gem_execbuffer2 eb = { + .buffers_ptr = to_user_pointer(&batch), + .buffer_count = 1, + .rsvd2 = fence, + .flags = flags & LATE ? I915_EXEC_FENCE_IN : 0, + }; + + gem_execbuf(i915, &eb); + } + + /* + * Each spinner needs to be one a new timeline, + * otherwise they will just sit in the single queue + * and not run concurrently. + */ + ctx = load_balancer_create(i915, ci, count); + + if (spin == NULL) { + spin = __igt_spin_batch_new(i915, ctx, 0, 0); + } else { + struct drm_i915_gem_exec_object2 obj = { + .handle = spin->handle, + }; + struct drm_i915_gem_execbuffer2 eb = { + .buffers_ptr = to_user_pointer(&obj), + .buffer_count = 1, + .rsvd1 = ctx, + .rsvd2 = fence, + .flags = flags & LATE ? I915_EXEC_FENCE_IN : 0, + }; + + gem_execbuf(i915, &eb); + } + + gem_context_destroy(i915, ctx); + } + + if (flags & LATE) { + igt_cork_unplug(&cork); + close(fence); + } + + load = measure_min_load(pmu[0], count, 10000); + igt_spin_batch_free(i915, spin); + + close(pmu[0]); + free(pmu); + + free(ci); + + igt_assert_f(load > 0.90, + "minimum load for %d x class:%d was found to be only %.1f%% busy\n", + count, mask, load*100); + } + + gem_close(i915, batch.handle); +} + +static bool has_context_engines(int i915) +{ + struct drm_i915_gem_context_param p = { + .param = I915_CONTEXT_PARAM_ENGINES, + }; + + return __gem_context_set_param(i915, &p) == 0; +} + +static bool has_load_balancer(int i915) +{ + struct class_instance ci = {}; + uint32_t ctx; + int err; + + ctx = gem_queue_create(i915); + err = __set_load_balancer(i915, ctx, &ci, 1); + gem_context_destroy(i915, ctx); + + return err == 0; +} + +igt_main +{ + int i915 = -1; + + igt_skip_on_simulation(); + + igt_fixture { + i915 = drm_open_driver(DRIVER_INTEL); + igt_require_gem(i915); + + gem_require_contexts(i915); + igt_require(has_context_engines(i915)); + igt_require(has_load_balancer(i915)); + + igt_fork_hang_detector(i915); + } + + igt_subtest("individual") + individual(i915); + + igt_subtest_group { + static const struct { + const char *name; + unsigned int flags; + } phases[] = { + { "", 0 }, + { "-pulse", PULSE }, + { "-late", LATE }, + { "-late-pulse", PULSE | LATE }, + { } + }; + for (typeof(*phases) *p = phases; p->name; p++) + igt_subtest_f("full%s", p->name) + full(i915, p->flags); + } + + igt_fixture { + igt_stop_hang_detector(); + } +} diff --git a/tests/meson.build b/tests/meson.build index b6da4f479..420d626fb 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -256,6 +256,13 @@ test_executables += executable('gem_eio', 'gem_eio.c', install : true) test_progs += 'gem_eio' +test_executables += executable('gem_exec_balancer', 'gem_exec_balancer.c', + dependencies : test_deps + [ lib_igt_perf ], + install_dir : libexecdir, + install_rpath : libexecdir_rpathdir, + install : true) +test_progs += 'gem_exec_balancer' + test_executables += executable('gem_mocs_settings', 'gem_mocs_settings.c', dependencies : test_deps + [ lib_igt_perf ], install_dir : libexecdir, -- 2.19.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx