Quoting Tvrtko Ursulin (2020-12-14 15:51:24) > > On 14/12/2020 10:51, Chris Wilson wrote: > > Oversaturate the virtual engines on the system and check that each > > workload receives a fair share of the available GPU time. > > A simpler one to start with, I support that. > > > > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > > --- > > tests/i915/gem_exec_balancer.c | 154 +++++++++++++++++++++++++++++++++ > > 1 file changed, 154 insertions(+) > > > > diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c > > index 35a032ccb..5efd586ad 100644 > > --- a/tests/i915/gem_exec_balancer.c > > +++ b/tests/i915/gem_exec_balancer.c > > @@ -2763,6 +2763,157 @@ static void smoketest(int i915, int timeout) > > gem_close(i915, batch[0].handle); > > } > > > > +static uint32_t read_ctx_timestamp(int i915, uint32_t ctx) > > +{ > > + struct drm_i915_gem_relocation_entry reloc; > > + struct drm_i915_gem_exec_object2 obj = { > > + .handle = gem_create(i915, 4096), > > + .offset = 32 << 20, > > + .relocs_ptr = to_user_pointer(&reloc), > > + .relocation_count = 1, > > + }; > > + struct drm_i915_gem_execbuffer2 execbuf = { > > + .buffers_ptr = to_user_pointer(&obj), > > + .buffer_count = 1, > > + .rsvd1 = ctx, > > + }; > > + uint32_t *map, *cs; > > + uint32_t ts; > > + > > + cs = map = gem_mmap__device_coherent(i915, obj.handle, > > + 0, 4096, PROT_WRITE); > > + > > + *cs++ = 0x24 << 23 | 1 << 19 | 2; /* relative SRM */ > > + *cs++ = 0x3a8; /* CTX_TIMESTAMP */ > > + memset(&reloc, 0, sizeof(reloc)); > > + reloc.target_handle = obj.handle; > > + reloc.presumed_offset = obj.offset; > > + reloc.offset = offset_in_page(cs); > > + reloc.delta = 4000; > > + *cs++ = obj.offset + 4000; > > + *cs++ = obj.offset >> 32; > > + > > + *cs++ = MI_BATCH_BUFFER_END; > > + > > + gem_execbuf(i915, &execbuf); > > + gem_sync(i915, obj.handle); > > + gem_close(i915, obj.handle); > > + > > + ts = map[1000]; > > + munmap(map, 4096); > > + > > + return ts; > > +} > > + > > +static int cmp_u32(const void *A, const void *B) > > +{ > > + const uint32_t *a = A, *b = B; > > + > > + if (*a < *b) > > + return -1; > > + else if (*a > *b) > > + return 1; > > + else > > + return 0; > > +} > > + > > +static int read_ctx_timestamp_frequency(int i915) > > +{ > > + int value = 12500000; /* icl!!! are you feeling alright? CTX vs CS */ > > + drm_i915_getparam_t gp = { > > + .value = &value, > > + .param = I915_PARAM_CS_TIMESTAMP_FREQUENCY, > > + }; > > + if (intel_gen(intel_get_drm_devid(i915)) != 11) > > + ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp); > > + return value; > > +} > > + > > +static uint64_t div64_u64_round_up(uint64_t x, uint64_t y) > > +{ > > + return (x + y - 1) / y; > > +} > > + > > +static uint64_t ticks_to_ns(int i915, uint64_t ticks) > > +{ > > + return div64_u64_round_up(ticks * NSEC_PER_SEC, > > + read_ctx_timestamp_frequency(i915)); > > +} > > + > > +static void __fairslice(int i915, > > + const struct i915_engine_class_instance *ci, > > + unsigned int count) > > +{ > > + igt_spin_t *spin = NULL; > > + uint32_t ctx[count + 1]; > > + uint32_t ts[count + 1]; > > + > > + igt_debug("Launching %zd spinners on %s\n", > > + ARRAY_SIZE(ctx), class_to_str(ci->engine_class)); > > + igt_assert(ARRAY_SIZE(ctx) >= 3); > > + > > + for (int i = 0; i < ARRAY_SIZE(ctx); i++) { > > + ctx[i] = load_balancer_create(i915, ci, count); > > + if (spin == NULL) { > > + spin = __igt_spin_new(i915, .ctx = ctx[i]); > > + } else { > > + struct drm_i915_gem_execbuffer2 eb = { > > + .buffer_count = 1, > > + .buffers_ptr = to_user_pointer(&spin->obj[IGT_SPIN_BATCH]), > > + .rsvd1 = ctx[i], > > + }; > > + gem_execbuf(i915, &eb); > > + } > > + } > > + > > + sleep(2); /* over the course of many timeslices */ > > + > > + igt_assert(gem_bo_busy(i915, spin->handle)); > > + igt_spin_end(spin); > > + igt_debug("Cancelled spinners\n"); > > + > > + for (int i = 0; i < ARRAY_SIZE(ctx); i++) > > + ts[i] = read_ctx_timestamp(i915, ctx[i]); > > Now if we had context stats ioctl. :) > > > + > > + for (int i = 0; i < ARRAY_SIZE(ctx); i++) > > + gem_context_destroy(i915, ctx[i]); > > + igt_spin_free(i915, spin); > > + > > + qsort(ts, ARRAY_SIZE(ctx), sizeof(*ts), cmp_u32); > > + igt_info("%s: [%.1f, %.1f, %.1f] ms, expect %1.fms\n", > > + class_to_str(ci->engine_class), > > + 1e-6 * ticks_to_ns(i915, ts[0]), > > + 1e-6 * ticks_to_ns(i915, ts[(count + 1) / 2]), > > + 1e-6 * ticks_to_ns(i915, ts[count]), > > + 2e3 * count / ARRAY_SIZE(ctx)); > > + > > + igt_assert_f(ts[count], "CTX_TIMESTAMP not reported!\n"); > > + igt_assert_f((ts[count] - ts[0]) * 6 < ts[(count + 1) / 2], > > + "Range of timeslices greater than tolerable: %.2fms > %.2fms; unfair!\n", > > + 1e-6 * ticks_to_ns(i915, ts[count] - ts[0]), > > + 1e-6 * ticks_to_ns(i915, ts[(count + 1) / 2]) / 6); > > Just put a comment saying what is the criteria please. Six ranges vs > median, must be some statistical thing but that's not my forte. The usual ballpark estimate for a test that should be reliable in CI. I guess if we model it as a drunken walk, the range would scale with the square-root of the number of timeslices. That would suggest we should not expect deviations more than +-50ms. I plucked ~200ms out of the air, which is turns out to be not too terrible a safety factor. :) -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx