Quoting Tvrtko Ursulin (2018-03-15 12:56:17) > From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > > More than one test assumes that the spinner is running pretty much > immediately after we have create or submitted it. > > In actuality there is a variable delay, especially on execlists platforms, > between submission and spin batch starting to run on the hardware. > > To enable tests which care about this level of timing to account for this, > we add a new spin batch constructor which provides an output field which > can be polled to determine when the batch actually started running. > > This is implemented via MI_STOREDW_IMM from the spin batch, writing into > memory mapped page shared with userspace. > > Using this facility from perf_pmu, where applicable, should improve very > occasional test fails across the set and platforms. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > Suggested-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > --- > lib/igt_dummyload.c | 99 +++++++++++++++++++++++++++++++---- > lib/igt_dummyload.h | 9 ++++ > tests/perf_pmu.c | 145 +++++++++++++++++++++++++++++++++++----------------- > 3 files changed, 196 insertions(+), 57 deletions(-) > > diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c > index 4b20f23dfe26..0447d2f14d57 100644 > --- a/lib/igt_dummyload.c > +++ b/lib/igt_dummyload.c > @@ -74,9 +74,12 @@ fill_reloc(struct drm_i915_gem_relocation_entry *reloc, > reloc->write_domain = write_domains; > } > > -static int emit_recursive_batch(igt_spin_t *spin, > - int fd, uint32_t ctx, unsigned engine, > - uint32_t dep, bool out_fence) > +#define OUT_FENCE (1 << 0) > +#define POLL_RUN (1 << 1) > + > +static int > +emit_recursive_batch(igt_spin_t *spin, int fd, uint32_t ctx, unsigned engine, > + uint32_t dep, unsigned int flags) > { > #define SCRATCH 0 > #define BATCH 1 > @@ -116,6 +119,8 @@ static int emit_recursive_batch(igt_spin_t *spin, > execbuf.buffer_count++; > > if (dep) { > + igt_assert(!(flags & POLL_RUN)); > + Challenge left to the reader :) > /* dummy write to dependency */ > obj[SCRATCH].handle = dep; > fill_reloc(&relocs[obj[BATCH].relocation_count++], > @@ -123,6 +128,41 @@ static int emit_recursive_batch(igt_spin_t *spin, > I915_GEM_DOMAIN_RENDER, > I915_GEM_DOMAIN_RENDER); > execbuf.buffer_count++; > + } else if (flags & POLL_RUN) { > + unsigned int offset; > + > + igt_assert(!dep); > + > + spin->poll_handle = gem_create(fd, 4096); > + spin->running = __gem_mmap__wc(fd, spin->poll_handle, > + 0, 4096, PROT_READ | PROT_WRITE); Use mmap_cpu and gem_set_caching(). > + igt_assert(spin->running); > + igt_assert_eq(*spin->running, 0); > + > + *batch++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); Hmm, have we forgot the (len-2) or is this an unusual command that knows its own length? > + > + if (gen >= 8) { > + offset = sizeof(uint32_t); > + *batch++ = 0; > + *batch++ = 0; > + } else if (gen >= 4) { > + offset = 2 * sizeof(uint32_t); > + *batch++ = 0; > + *batch++ = 0; > + } else { > + offset = sizeof(uint32_t); > + batch[-1]--; > + *batch++ = 0; > + } > + > + *batch++ = 1; > + > + obj[SCRATCH].handle = spin->poll_handle; > + fill_reloc(&relocs[obj[BATCH].relocation_count++], > + spin->poll_handle, offset, > + I915_GEM_DOMAIN_INSTRUCTION, > + I915_GEM_DOMAIN_INSTRUCTION); DOMAIN_RENDER preferably. You don't need the w/a. Could we not lie about the write-hazard? Removes the need for EXEC_OBJECT_ASYNC and opens up the possibility for using different dwords for different engines and then waiting for all-engines. > + execbuf.buffer_count++; gen4 and gen5 require I915_EXEC_SECURE and a DRM_MASTER fd. We can just do something like if (gen == 4 || gen == 5) igt_require(igt_device_set_master(fd) == 0)); > +/** > + * igt_spin_batch_new_poll: > + * @fd: open i915 drm file descriptor > + * @engine: Ring to execute batch OR'd with execbuf flags. If value is less > + * than 0, execute on all available rings. > + * > + * Start a recursive batch on a ring. Immediately returns a #igt_spin_t that > + * contains the batch's handle that can be waited upon. The returned structure > + * must be passed to igt_spin_batch_free() for post-processing. > + * > + * igt_spin_t->running will containt a pointer which target will change from > + * zero to one once the spinner actually starts executing on the GPU. > + * > + * Returns: > + * Structure with helper internal state for igt_spin_batch_free(). > + */ > +igt_spin_t * > +igt_spin_batch_new_poll(int fd, uint32_t ctx, unsigned engine) > +{ > + igt_spin_t *spin; > + > + igt_require_gem(fd); > + igt_require(gem_mmap__has_wc(fd)); igt_require(gem_can_store_dword(fd, engine)); Not all platforms have a MI_STORE_DWORD/DATA_IMM (with virtual addresses at least) and some platforms will die (*cough* snb *cough*). > + > + spin = __igt_spin_batch_new_poll(fd, ctx, engine); > + igt_assert(gem_bo_busy(fd, spin->handle)); > + > + return spin; > +} > igt_spin_t *__igt_spin_batch_new(int fd, > @@ -55,6 +57,13 @@ igt_spin_t *igt_spin_batch_new_fence(int fd, > uint32_t ctx, > unsigned engine); > > +igt_spin_t *__igt_spin_batch_new_poll(int fd, > + uint32_t ctx, > + unsigned engine); > +igt_spin_t *igt_spin_batch_new_poll(int fd, > + uint32_t ctx, > + unsigned engine); > + > void igt_spin_batch_set_timeout(igt_spin_t *spin, int64_t ns); > void igt_spin_batch_end(igt_spin_t *spin); > void igt_spin_batch_free(int fd, igt_spin_t *spin); > diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c > index 19fcc95ffc7f..d1b7b23bc646 100644 > --- a/tests/perf_pmu.c > +++ b/tests/perf_pmu.c > @@ -184,6 +184,38 @@ static void end_spin(int fd, igt_spin_t *spin, unsigned int flags) > usleep(batch_duration_ns / 5000); > } > > +static igt_spin_t * __spin_poll(int fd, uint32_t ctx, unsigned long flags) > +{ > + return __igt_spin_batch_new_poll(fd, ctx, flags); > +} > + > +static unsigned long __spin_wait(igt_spin_t *spin) > +{ > + struct timespec start = { }; > + > + igt_nsec_elapsed(&start); > + > + while (!spin->running); Put ';' on a new line so it's clearly visible. > + > + return igt_nsec_elapsed(&start); > +} > + > +static igt_spin_t * __spin_sync(int fd, uint32_t ctx, unsigned long flags) > +{ > + igt_spin_t *spin = __spin_poll(fd, ctx, flags); > + > + __spin_wait(spin); > + > + return spin; > +} > + > +static igt_spin_t * spin_sync(int fd, uint32_t ctx, unsigned long flags) spin_sync() has connotations with gem_sync(). gem_sync is wait for end, but spin_sync is wait_for_start. Maybe spin_wait_for_execute? Nah. > +{ > + igt_require_gem(fd); > + > + return __spin_sync(fd, ctx, flags); > +} > static void > __submit_spin_batch(int gem_fd, > + igt_spin_t *spin, > struct drm_i915_gem_exec_object2 *obj, > const struct intel_execution_engine2 *e) > { > struct drm_i915_gem_execbuffer2 eb = { > - .buffer_count = 1, > .buffers_ptr = to_user_pointer(obj), > .flags = e2ring(gem_fd, e), > }; > > + if (spin->running) { > + obj[0].handle = spin->poll_handle; > + obj[0].flags = EXEC_OBJECT_ASYNC; > + obj[1].handle = spin->handle; > + eb.buffer_count = 2; > + } else { > + obj[0].handle = spin->handle; > + eb.buffer_count = 1; > + } obj[] must be set up by the caller; the EXEC_OBJECT_PINNED are essential. Or else the kernel *will* move spin->poll_handle and then it is fubar. -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx