Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > An important property for multi-client systems is that each client gets > a 'fair' allotment of system time. (Where fairness is at the whim of the > context properties, such as priorities.) This test forks N independent > clients (albeit they happen to share a single vm), and does an equal > amount of work in client and asserts that they take an equal amount of > time. > > Though we have never claimed to have a completely fair scheduler, that > is what is expected. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > Cc: Ramalingam C <ramalingam.c@xxxxxxxxx> > --- > tests/i915/gem_exec_schedule.c | 418 +++++++++++++++++++++++++++++++++ > 1 file changed, 418 insertions(+) > > diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c > index 56c638833..d1121ecd2 100644 > --- a/tests/i915/gem_exec_schedule.c > +++ b/tests/i915/gem_exec_schedule.c > @@ -2495,6 +2495,417 @@ static void measure_semaphore_power(int i915) > rapl_close(&pkg); > } > > +static int read_timestamp_frequency(int i915) > +{ > + int value = 0; > + drm_i915_getparam_t gp = { > + .value = &value, > + .param = I915_PARAM_CS_TIMESTAMP_FREQUENCY, > + }; > + ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp); > + return value; > +} > + > +static uint64_t div64_u64_round_up(uint64_t x, uint64_t y) > +{ > + return (x + y - 1) / y; > +} > + > +static uint64_t ns_to_ticks(int i915, uint64_t ns) > +{ > + return div64_u64_round_up(ns * read_timestamp_frequency(i915), > + NSEC_PER_SEC); > +} > + > +static uint64_t ticks_to_ns(int i915, uint64_t ticks) > +{ > + return div64_u64_round_up(ticks * NSEC_PER_SEC, > + read_timestamp_frequency(i915)); > +} > + > +#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags)) > + > +#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1) > +#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2)) > +/* Opcodes for MI_MATH_INSTR */ > +#define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0) > +#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2) > +#define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2) > +#define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1) > +#define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1) > +#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0) > +#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0) > +#define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0) > +#define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0) > +#define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0) > +#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2) > +#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2) > +/* Registers used as operands in MI_MATH_INSTR */ > +#define MI_MATH_REG(x) (x) > +#define MI_MATH_REG_SRCA 0x20 > +#define MI_MATH_REG_SRCB 0x21 > +#define MI_MATH_REG_ACCU 0x31 > +#define MI_MATH_REG_ZF 0x32 > +#define MI_MATH_REG_CF 0x33 Are you thinking that we should just pull in the driver gpu_commands.h as is into lib? -Mika > + > +#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1) > + > +static void delay(int i915, > + const struct intel_execution_engine2 *e, > + uint32_t handle, > + uint64_t addr, > + uint64_t ns) > +{ > + const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8; > + const uint32_t base = gem_engine_mmio_base(i915, e->name); > +#define CS_GPR(x) (base + 0x600 + 8 * (x)) > +#define TIMESTAMP (base + 0x3a8) > + enum { START_TS, NOW_TS }; > + uint32_t *map, *cs, *jmp; > + > + igt_require(base); > + > + cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE); > + > + *cs++ = MI_LOAD_REGISTER_IMM; > + *cs++ = CS_GPR(START_TS) + 4; > + *cs++ = 0; > + *cs++ = MI_LOAD_REGISTER_REG; > + *cs++ = TIMESTAMP; > + *cs++ = CS_GPR(START_TS); > + > + if (offset_in_page(cs) & 4) > + *cs++ = 0; > + jmp = cs; > + > + *cs++ = 0x5 << 23; /* MI_ARB_CHECK */ > + > + *cs++ = MI_LOAD_REGISTER_IMM; > + *cs++ = CS_GPR(NOW_TS) + 4; > + *cs++ = 0; > + *cs++ = MI_LOAD_REGISTER_REG; > + *cs++ = TIMESTAMP; > + *cs++ = CS_GPR(NOW_TS); > + > + *cs++ = MI_MATH(4); > + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS)); > + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS)); > + *cs++ = MI_MATH_SUB; > + *cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU); > + > + *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */ > + *cs++ = CS_GPR(NOW_TS); > + *cs++ = addr + 4000; > + *cs++ = addr >> 32; > + > + *cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b); > + *cs++ = ~ns_to_ticks(i915, ns); > + *cs++ = addr + 4000; > + *cs++ = addr >> 32; > + > + *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b; > + *cs++ = addr + offset_in_page(jmp); > + *cs++ = addr >> 32; > + > + munmap(map, 4096); > +} > + > +static struct drm_i915_gem_exec_object2 > +delay_create(int i915, uint32_t ctx, > + const struct intel_execution_engine2 *e, > + uint64_t target_ns) > +{ > + struct drm_i915_gem_exec_object2 obj = { > + .handle = batch_create(i915), > + .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS, > + }; > + struct drm_i915_gem_execbuffer2 execbuf = { > + .buffers_ptr = to_user_pointer(&obj), > + .buffer_count = 1, > + .rsvd1 = ctx, > + .flags = e->flags, > + }; > + > + gem_execbuf(i915, &execbuf); > + gem_sync(i915, obj.handle); > + > + delay(i915, e, obj.handle, obj.offset, target_ns); > + > + obj.flags |= EXEC_OBJECT_PINNED; > + return obj; > +} > + > +static void tslog(int i915, > + const struct intel_execution_engine2 *e, > + uint32_t handle, > + uint64_t addr) > +{ > + const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8; > + const uint32_t base = gem_engine_mmio_base(i915, e->name); > +#define CS_GPR(x) (base + 0x600 + 8 * (x)) > +#define CS_TIMESTAMP (base + 0x358) > + enum { ONE, MASK, ADDR }; > + uint32_t *timestamp_lo, *addr_lo; > + uint32_t *map, *cs; > + > + igt_require(base); > + > + map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE); > + cs = map + 512; > + > + *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */ > + *cs++ = CS_TIMESTAMP; > + timestamp_lo = cs; > + *cs++ = addr; > + *cs++ = addr >> 32; > + > + *cs++ = MI_LOAD_REGISTER_IMM; > + *cs++ = CS_GPR(ADDR); > + addr_lo = cs; > + *cs++ = addr; > + *cs++ = MI_LOAD_REGISTER_IMM; > + *cs++ = CS_GPR(ADDR) + 4; > + *cs++ = addr >> 32; > + > + *cs++ = MI_LOAD_REGISTER_IMM; > + *cs++ = CS_GPR(ONE); > + *cs++ = 4; > + *cs++ = MI_LOAD_REGISTER_IMM; > + *cs++ = CS_GPR(ONE) + 4; > + *cs++ = 0; > + > + *cs++ = MI_LOAD_REGISTER_IMM; > + *cs++ = CS_GPR(MASK); > + *cs++ = 0xfffff7ff; > + *cs++ = MI_LOAD_REGISTER_IMM; > + *cs++ = CS_GPR(MASK) + 4; > + *cs++ = 0xffffffff; > + > + *cs++ = MI_MATH(8); > + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ONE)); > + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(ADDR)); > + *cs++ = MI_MATH_ADD; > + *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU); > + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ADDR)); > + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(MASK)); > + *cs++ = MI_MATH_AND; > + *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU); > + > + *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */ > + *cs++ = CS_GPR(ADDR); > + *cs++ = addr + offset_in_page(timestamp_lo); > + *cs++ = addr >> 32; > + *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */ > + *cs++ = CS_GPR(ADDR); > + *cs++ = addr + offset_in_page(addr_lo); > + *cs++ = addr >> 32; > + > + *cs++ = MI_BATCH_BUFFER_END; > + > + munmap(map, 4096); > +} > + > +static struct drm_i915_gem_exec_object2 > +tslog_create(int i915, uint32_t ctx, const struct intel_execution_engine2 *e) > +{ > + struct drm_i915_gem_exec_object2 obj = { > + .handle = batch_create(i915), > + .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS, > + }; > + struct drm_i915_gem_execbuffer2 execbuf = { > + .buffers_ptr = to_user_pointer(&obj), > + .buffer_count = 1, > + .rsvd1 = ctx, > + .flags = e->flags, > + }; > + > + gem_execbuf(i915, &execbuf); > + gem_sync(i915, obj.handle); > + > + tslog(i915, e, obj.handle, obj.offset); > + > + obj.flags |= EXEC_OBJECT_PINNED; > + return obj; > +} > + > +static int cmp_u32(const void *A, const void *B) > +{ > + const unsigned long *a = A, *b = B; > + > + if (*a < *b) > + return -1; > + else if (*a > *b) > + return 1; > + else > + return 0; > +} > + > +static void fair_child(int i915, uint32_t ctx, > + const struct intel_execution_engine2 *e, > + uint64_t frame_ns, > + int timeout, > + int timeline, > + unsigned int flags, > + unsigned long *ctl, > + unsigned long *out) > +#define F_PACING 0x1 > +#define F_EXTERNAL 0x2 > +{ > + const int batches_per_frame = 3; > + struct drm_i915_gem_exec_object2 prev = > + delay_create(i915, ctx, e, frame_ns / batches_per_frame); > + struct drm_i915_gem_exec_object2 next = > + delay_create(i915, ctx, e, frame_ns / batches_per_frame); > + struct drm_i915_gem_exec_object2 ts = tslog_create(i915, ctx, e); > + struct timespec tv = {}; > + unsigned long count = 0; > + int p_fence = -1, n_fence = -1; > + uint32_t *map; > + int n; > + > + igt_nsec_elapsed(&tv); > + while (!READ_ONCE(*ctl)) { > + struct drm_i915_gem_execbuffer2 execbuf = { > + .buffers_ptr = to_user_pointer(&next), > + .buffer_count = 1, > + .rsvd1 = ctx, > + .rsvd2 = -1, > + .flags = e->flags, > + }; > + > + if (flags & F_EXTERNAL) { > + execbuf.rsvd2 = > + sw_sync_timeline_create_fence(timeline, count); > + execbuf.flags |= I915_EXEC_FENCE_IN; > + } > + > + execbuf.flags |= I915_EXEC_FENCE_OUT; > + gem_execbuf_wr(i915, &execbuf); > + n_fence = execbuf.rsvd2 >> 32; > + execbuf.flags &= ~(I915_EXEC_FENCE_OUT | I915_EXEC_FENCE_IN); > + for (n = 1; n < batches_per_frame; n++) > + gem_execbuf(i915, &execbuf); > + > + execbuf.buffers_ptr = to_user_pointer(&ts); > + execbuf.batch_start_offset = 2048; > + gem_execbuf(i915, &execbuf); > + > + if (flags & F_PACING && p_fence != -1) { > + struct pollfd pfd = { > + .fd = p_fence, > + .events = POLLIN, > + }; > + poll(&pfd, 1, -1); > + } > + close(p_fence); > + close(execbuf.rsvd2); > + > + igt_swap(prev, next); > + igt_swap(p_fence, n_fence); > + count++; > + } > + gem_sync(i915, prev.handle); > + close(p_fence); > + > + gem_close(i915, next.handle); > + gem_close(i915, prev.handle); > + > + map = gem_mmap__device_coherent(i915, ts.handle, 0, 4096, PROT_WRITE); > + for (n = 1; n < min(count, 512); n++) > + map[n - 1] = map[n] - map[n - 1]; > + qsort(map, --n, sizeof(*map), cmp_u32); > + *out = ticks_to_ns(i915, map[n / 2]); > + munmap(map, 4096); > + > + gem_close(i915, ts.handle); > +} > + > +static int cmp_ul(const void *A, const void *B) > +{ > + const unsigned long *a = A, *b = B; > + > + if (*a < *b) > + return -1; > + else if (*a > *b) > + return 1; > + else > + return 0; > +} > + > +static void fairness(int i915, > + const struct intel_execution_engine2 *e, > + int timeout, unsigned int flags) > +{ > + const int frame_ns = 16666 * 1000; > + unsigned long *result; > + > + igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8); > + igt_require(gem_class_has_mutable_submission(i915, e->class)); > + > + result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0); > + > + for (int n = 2; n <= 16; n <<= 1) { > + int timeline = sw_sync_timeline_create(); > + int nframes = timeout * NSEC_PER_SEC / frame_ns + 1; > + const int nchild = n - 1; /* odd for easy medians */ > + const int lo = nchild / 4; > + const int hi = (3 * nchild + 3) / 4 - 1; > + struct igt_mean m; > + > + memset(result, 0, (nchild + 1) * sizeof(result[0])); > + igt_fork(child, nchild) { > + uint32_t ctx = gem_context_clone_with_engines(i915, 0); > + > + fair_child(i915, ctx, e, frame_ns / nchild, > + timeout, timeline, flags, > + &result[nchild], > + &result[child]); > + > + gem_context_destroy(i915, ctx); > + } > + > + while (nframes--) { > + struct timespec tv = { .tv_nsec = frame_ns }; > + nanosleep(&tv, NULL); > + sw_sync_timeline_inc(timeline, 1); > + } > + result[nchild] = 1; > + for (int child = 0; child < nchild; child++) { > + while (!READ_ONCE(result[child])) { > + struct timespec tv = { .tv_nsec = frame_ns }; > + nanosleep(&tv, NULL); > + sw_sync_timeline_inc(timeline, 1); > + } > + } > + igt_waitchildren(); > + close(timeline); > + > + igt_mean_init(&m); > + for (int child = 0; child < nchild; child++) > + igt_mean_add(&m, result[child]); > + > + qsort(result, nchild, sizeof(*result), cmp_ul); > + igt_info("%d clients, range: [%.1f, %.1f], iqr: [%.1f, %.1f], median: %.1f, mean: %.1f ± %.2f ms\n", > + nchild, > + 1e-6 * result[0], 1e-6 * result[nchild - 1], > + 1e-6 * result[lo], 1e-6 * result[hi], > + 1e-6 * result[nchild / 2], > + 1e-6 * igt_mean_get(&m), > + 1e-6 * sqrt(igt_mean_get_variance(&m))); > + > +#if 0 > + /* Mean within 10% of target */ > + igt_assert( 9 * igt_mean_get(&m) > 10 * frame_ns && > + 10 * igt_mean_get(&m) < 9 * frame_ns); > + > + /* Variance [inter-quartile range] is less than 33% of median */ > + igt_assert(3 * result[hi] - result[lo] < result[nchild / 2]); > +#endif > + } > + > + munmap(result, 4096); > +} > + > #define test_each_engine(T, i915, e) \ > igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \ > igt_dynamic_f("%s", e->name) > @@ -2589,6 +3000,13 @@ igt_main > test_each_engine_store("promotion", fd, e) > promotion(fd, e->flags); > > + test_each_engine_store("fair-none", fd, e) > + fairness(fd, e, 2, 0); > + test_each_engine_store("fair-pace", fd, e) > + fairness(fd, e, 2, F_PACING); > + test_each_engine_store("fair-sync", fd, e) > + fairness(fd, e, 2, F_PACING | F_EXTERNAL); > + > igt_subtest_group { > igt_fixture { > igt_require(gem_scheduler_has_preemption(fd)); > -- > 2.27.0.rc2 > > _______________________________________________ > igt-dev mailing list > igt-dev@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/igt-dev _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx