Quoting Tvrtko Ursulin (2018-04-17 09:41:11) > From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > > We want to make sure RT tasks which use a lot of CPU times can submit > batch buffers with roughly the same latency (and certainly not worse) > compared to normal tasks. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > --- > tests/gem_exec_latency.c | 176 +++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 176 insertions(+) > > diff --git a/tests/gem_exec_latency.c b/tests/gem_exec_latency.c > index 9498c0921e60..420ede0f83a0 100644 > --- a/tests/gem_exec_latency.c > +++ b/tests/gem_exec_latency.c > @@ -36,11 +36,15 @@ > #include <sys/time.h> > #include <sys/signal.h> > #include <time.h> > +#include <sched.h> > > #include "drm.h" > > #include "igt_sysfs.h" > #include "igt_vgem.h" > +#include "igt_dummyload.h" > +#include "igt_stats.h" > + > #include "i915/gem_ring.h" > > #define LOCAL_I915_EXEC_NO_RELOC (1<<11) > @@ -351,6 +355,172 @@ static void latency_from_ring(int fd, > } > } > > +static void __rearm_spin_batch(igt_spin_t *spin) > +{ > + const uint32_t mi_arb_chk = 0x5 << 23; > + > + *spin->batch = mi_arb_chk; > + *spin->running = 0; > + __sync_synchronize(); > +} > + > +static void > +__submit_spin_batch(int fd, igt_spin_t *spin, unsigned int flags) > +{ > + struct drm_i915_gem_execbuffer2 eb = spin->execbuf; > + > + eb.flags &= ~(0x3f | I915_EXEC_BSD_MASK); > + eb.flags |= flags | I915_EXEC_NO_RELOC; > + > + gem_execbuf(fd, &eb); > +} > + > +struct rt_pkt > +{ > +#define RT_OK (0) > +#define RT_FAIL (1) > +#define RT_TIMEOUT (2) > + int status; > + struct igt_mean mean; > + double max; > +}; > + > +static void __spin_wait(struct rt_pkt *msg, igt_spin_t *spin, double *t_wait) > +{ > + struct timespec ts = { }; > + uint64_t t_last = 0; > + > + igt_nsec_elapsed(&ts); > + > + while (!READ_ONCE(*spin->running)) { > + uint64_t t = igt_nsec_elapsed(&ts); > + > + if ((t - t_last) > 5UL * NSEC_PER_SEC) { > + /* Absolute timeout to save time. */ > + msg->status = RT_TIMEOUT; > + } else if ((t - t_last) > NSEC_PER_SEC / 10) { > + /* Backoff every 100ms to give it chance to complete. */ > + t_last = t; > + usleep(1); > + } > + } > + > + *t_wait = igt_nsec_elapsed(&ts) / 1e9; (I would keep this as nanonseconds in the double until we need to convert into human readable units.) > + msg->status = RT_OK; > +} > + > +/* > + * Test whether RT thread which hogs the CPU a lot can submit work with > + * reasonable latency. > + */ > +static void > +rthog_latency_on_ring(int fd, unsigned int ring, const char *name) > +{ > + const char *passname[] = { "warmup", "normal", "rt" }; > + struct rt_pkt res[3]; > + unsigned int i; > + int link[2]; > + int ret; > + > + igt_require(gem_can_store_dword(fd, ring)); > + > + igt_assert(pipe(link) == 0); > + > + memset(res, 0, sizeof(res)); > + > + igt_fork(child, 1) { > + unsigned int pass = 0; /* Three passes: warmup, normal, rt. */ > + > + do { > + struct rt_pkt msg = { }; > + igt_spin_t *spin; > + > + igt_mean_init(&msg.mean); > + > + if (pass == 2) { > + struct sched_param rt = > + { .sched_priority = 99 }; > + > + ret = sched_setscheduler(0, > + SCHED_FIFO | SCHED_RESET_ON_FORK, > + &rt); > + if (ret) { > + igt_warn("Failed to set scheduling policy!\n"); > + msg.status = RT_FAIL; > + write(link[1], &msg, sizeof(msg)); > + exit(1); > + } > + } > + > + spin = __igt_spin_batch_new_poll(fd, 0, ring); > + if (!spin) { > + igt_warn("Failed to create spinner! (%s)\n", > + passname[pass]); > + msg.status = RT_FAIL; > + write(link[1], &msg, sizeof(msg)); > + exit(1); > + } > + igt_spin_busywait_until_running(spin); > + > + igt_until_timeout(pass > 0 ? 5 : 2) { > + double t; > + > + igt_spin_batch_end(spin); > + gem_sync(fd, spin->handle); > + > + __rearm_spin_batch(spin); > + __submit_spin_batch(fd, spin, ring); > + > + __spin_wait(&msg, spin, &t); > + if (msg.status != RT_OK) { > + igt_warn("Wait timeout! (%s)\n", > + passname[pass]); > + write(link[1], &msg, sizeof(msg)); > + exit(1); > + } > + > + if (t > msg.max) > + msg.max = t; > + > + igt_mean_add(&msg.mean, t); I think I would include a background load (normal process, submitting overlapping nops from alternate contexts, just so that we know that we should be saturating execlists_submission_tasklet) and then have the RT process compete with a MAX_PRIO timed request. As I understand the problem, it is the priority inversion between the RT hog spinning on the breadcrumb preventing ksoftirqd from running; so the challenge of the test setup is in tricking ksoftirqd into running on the same CPU as the hog. I think we need a cpuset to run both RT and background on the same CPU. > + } > + > + igt_spin_batch_free(fd, spin); > + > + igt_info("%10s: mean=%.2fus variance=%.2fus max=%.2fus (n=%lu)\n", > + passname[pass], > + igt_mean_get(&msg.mean) * 1e6, > + igt_mean_get_variance(&msg.mean) * 1e6, > + msg.max * 1e6, > + msg.mean.count); > + > + write(link[1], &msg, sizeof(msg)); > + } while (++pass < 3); > + > + exit(0); > + } > + > + for (i = 0; i < 3; i++) { > + ret = read(link[0], &res[i], sizeof(res[0])); > + igt_assert_eq(ret, sizeof(res[0])); > + > + igt_assert_eq(res[i].status, RT_OK); > + } > + > + close(link[0]); > + close(link[1]); > + > + igt_waitchildren(); > + > + /* > + * Check that the submission latency variance for a task with RT > + * priority is no larger than three times the same of a normal task. > + */ > + igt_assert(igt_mean_get_variance(&res[2].mean) < > + igt_mean_get_variance(&res[1].mean) * 3); So sample period of 5s, we expect to sample over many many requests, so mean/variance should be reasonably stable even on a noisy system. Please make this an assert_f and explain the failure in the error message. Even a s/2/RT/; s/1/NORMAL/ would help the stringify. -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx