On 2020-08-10 at 13:44:15 +0100, Chris Wilson wrote: > Don't assume the kernel will emit a semaphore to synchronise between two > engine, and emit the semaphore ourselves for the basis of our > measurements. The purpose of the test is to try and ascertain the > accuracy of the two sampling methods, semaphore busyness uses register > polling, whereas the engine busyness may use ktime_t of the CS events. Looks good to me. Reviewed-by: Ramalingam C <ramalingam.c@xxxxxxxxx> Tested on the platform too. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Ramalingam C <ramalingam.c@xxxxxxxxx> > --- > tests/i915/perf_pmu.c | 94 +++++++++++++++++++++++++++++-------------- > 1 file changed, 64 insertions(+), 30 deletions(-) > > diff --git a/tests/i915/perf_pmu.c b/tests/i915/perf_pmu.c > index 13e1bd93e..ecd4afbd6 100644 > --- a/tests/i915/perf_pmu.c > +++ b/tests/i915/perf_pmu.c > @@ -650,6 +650,7 @@ no_sema(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags) > #define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */ > #define MI_SEMAPHORE_POLL (1<<15) > #define MI_SEMAPHORE_SAD_GTE_SDD (1<<12) > +#define MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12) > > static void > sema_wait(int gem_fd, const struct intel_execution_engine2 *e, > @@ -751,10 +752,39 @@ sema_wait(int gem_fd, const struct intel_execution_engine2 *e, > assert_within_epsilon(val[1] - val[0], slept, tolerance); > } > > +static uint32_t > +create_sema(int gem_fd, struct drm_i915_gem_relocation_entry *reloc) > +{ > + uint32_t cs[] = { > + /* Reset our semaphore wait */ > + MI_STORE_DWORD_IMM, > + 0, > + 0, > + 1, > + > + /* Wait until the semaphore value is set to 0 [by caller] */ > + MI_SEMAPHORE_WAIT | MI_SEMAPHORE_POLL | MI_SEMAPHORE_SAD_NEQ_SDD, > + 1, > + 0, > + 0, > + > + MI_BATCH_BUFFER_END > + }; > + uint32_t handle = gem_create(gem_fd, 4096); > + > + memset(reloc, 0, 2 * sizeof(*reloc)); > + reloc[0].target_handle = handle; > + reloc[0].offset = 64 + 1 * sizeof(uint32_t); > + reloc[1].target_handle = handle; > + reloc[1].offset = 64 + 6 * sizeof(uint32_t); > + > + gem_write(gem_fd, handle, 64, cs, sizeof(cs)); > + return handle; > +} > + > static void > __sema_busy(int gem_fd, int pmu, > const struct intel_execution_engine2 *e, > - const struct intel_execution_engine2 *signal, > int sema_pct, > int busy_pct) > { > @@ -764,39 +794,54 @@ __sema_busy(int gem_fd, int pmu, > }; > uint64_t total, sema, busy; > uint64_t start[2], val[2]; > - igt_spin_t *spin[2]; > + struct drm_i915_gem_relocation_entry reloc[2]; > + struct drm_i915_gem_exec_object2 obj = { > + .handle = create_sema(gem_fd, reloc), > + .relocation_count = 2, > + .relocs_ptr = to_user_pointer(reloc), > + }; > + struct drm_i915_gem_execbuffer2 eb = { > + .batch_start_offset = 64, > + .buffer_count = 1, > + .buffers_ptr = to_user_pointer(&obj), > + .flags = e->flags, > + }; > + igt_spin_t *spin; > + uint32_t *map; > > /* Time spent being busy includes time waiting on semaphores */ > igt_assert(busy_pct >= sema_pct); > > gem_quiescent_gpu(gem_fd); > > - spin[0] = igt_spin_new(gem_fd, > - .engine = signal->flags, > - .flags = IGT_SPIN_FENCE_OUT | IGT_SPIN_POLL_RUN); > - spin[1] = igt_spin_new(gem_fd, > - .engine = e->flags, > - .fence = spin[0]->out_fence, > - .flags = IGT_SPIN_FENCE_IN); > + map = gem_mmap__wc(gem_fd, obj.handle, 0, 4096, PROT_WRITE); > + gem_execbuf(gem_fd, &eb); > + spin = igt_spin_new(gem_fd, .engine = e->flags); > > - igt_spin_busywait_until_started(spin[0]); > + /* Wait until the batch is executed and the semaphore is busy-waiting */ > + while (!READ_ONCE(*map) && gem_bo_busy(gem_fd, obj.handle)) > + ; > + igt_assert(gem_bo_busy(gem_fd, obj.handle)); > + gem_close(gem_fd, obj.handle); > > total = pmu_read_multi(pmu, 2, start); > > sema = measured_usleep(batch_duration_ns * sema_pct / 100 / 1000); > - igt_spin_end(spin[0]); > + *map = 0; __sync_synchronize(); > busy = measured_usleep(batch_duration_ns * (busy_pct - sema_pct) / 100 / 1000); > - igt_spin_end(spin[1]); > + igt_spin_end(spin); > measured_usleep(batch_duration_ns * (100 - busy_pct) / 100 / 1000); > > total = pmu_read_multi(pmu, 2, val) - total; > + igt_spin_free(gem_fd, spin); > + munmap(map, 4096); > > busy += sema; > val[SEMA] -= start[SEMA]; > val[BUSY] -= start[BUSY]; > > - igt_info("%s<-%s, target: {%.1f%% [%d], %.1f%% [%d]}, measured: {%.1f%%, %.1f%%}\n", > - e->name, signal->name, > + igt_info("%s, target: {%.1f%% [%d], %.1f%% [%d]}, measured: {%.1f%%, %.1f%%}\n", > + e->name, > sema * 100. / total, sema_pct, > busy * 100. / total, busy_pct, > val[SEMA] * 100. / total, > @@ -809,8 +854,6 @@ __sema_busy(int gem_fd, int pmu, > val[SEMA] * 1e-3, val[SEMA] * 100. / total, > val[BUSY] * 1e-3, val[BUSY] * 100. / total); > > - igt_spin_free(gem_fd, spin[1]); > - igt_spin_free(gem_fd, spin[0]); > } > > static void > @@ -818,25 +861,16 @@ sema_busy(int gem_fd, > const struct intel_execution_engine2 *e, > unsigned int flags) > { > - const struct intel_execution_engine2 *signal; > int fd; > > - igt_require(gem_scheduler_has_semaphores(gem_fd)); > - igt_require(gem_scheduler_has_preemption(gem_fd)); > + igt_require(intel_gen(intel_get_drm_devid(gem_fd)) >= 8); > > - fd = open_group(gem_fd, > - I915_PMU_ENGINE_SEMA(e->class, e->instance), -1); > + fd = open_group(gem_fd, I915_PMU_ENGINE_SEMA(e->class, e->instance), -1); > open_group(gem_fd, I915_PMU_ENGINE_BUSY(e->class, e->instance), fd); > > - __for_each_physical_engine(gem_fd, signal) { > - if (e->class == signal->class && > - e->instance == signal->instance) > - continue; > - > - __sema_busy(gem_fd, fd, e, signal, 50, 100); > - __sema_busy(gem_fd, fd, e, signal, 25, 50); > - __sema_busy(gem_fd, fd, e, signal, 75, 75); > - } > + __sema_busy(gem_fd, fd, e, 50, 100); > + __sema_busy(gem_fd, fd, e, 25, 50); > + __sema_busy(gem_fd, fd, e, 75, 75); > > close(fd); > } > -- > 2.28.0 > _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx