Re: [PATCH i-g-t] i915/perf_pmu: Compare semaphore and busy measurements

Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxxxxxxx> · Wed, 25 Sep 2019 09:36:10 +0100

On 24/09/2019 23:01, Chris Wilson wrote:
Our semaphore time is measured by sampling a ring register, whereas our
busy time is measured exactly. This leaves a window of discrepancy that
we wish to keep small (at least within sample tolerance).

References: https://bugs.freedesktop.org/show_bug.cgi?id=111788
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
---
  tests/perf_pmu.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++-
  1 file changed, 72 insertions(+), 1 deletion(-)

diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index 8a06e5d44..2fcaf88de 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -130,7 +130,7 @@ static uint64_t pmu_read_multi(int fd, unsigned int num, uint64_t *val)
  #define __assert_within_epsilon(x, ref, tol_up, tol_down) \
  	igt_assert_f((double)(x) <= (1.0 + (tol_up)) * (double)(ref) && \
  		     (double)(x) >= (1.0 - (tol_down)) * (double)(ref), \
-		     "'%s' != '%s' (%f not within +%f%%/-%f%% tolerance of %f)\n",\
+		     "'%s' != '%s' (%f not within +%.1f%%/-%.1f%% tolerance of %f)\n",\
  		     #x, #ref, (double)(x), \
  		     (tol_up) * 100.0, (tol_down) * 100.0, \
  		     (double)(ref))
@@ -744,6 +744,74 @@ sema_wait(int gem_fd, const struct intel_execution_engine2 *e,
  	assert_within_epsilon(val[1] - val[0], slept, tolerance);
  }
  
+static void
+__sema_busy(int gem_fd, int pmu,
+	    const struct intel_execution_engine2 *e,
+	    const struct intel_execution_engine2 *signal,
+	    int sema_pct,
+	    int busy_pct)
+{
+	uint64_t total, sema, busy;
+	uint64_t start[2], end[2];
+	igt_spin_t *spin[2];
+
+	spin[0] = igt_spin_new(gem_fd,
+			       .engine = signal->flags,
+			       .flags = IGT_SPIN_FENCE_OUT);
+	spin[1] = igt_spin_new(gem_fd,
+			       .engine = e->flags,
+			       .fence = spin[0]->out_fence,
+			       .flags = IGT_SPIN_FENCE_IN);
+
+	total = pmu_read_multi(pmu, 2, start);

Might be worth for result stability for have signaler with 
IGT_SPIN_POLL_RUN and wait for it to run here before proceeding with sleeps.

+
+	sema = measured_usleep(batch_duration_ns * sema_pct / 100 / 1000);
+	igt_spin_end(spin[0]);
+	busy = measured_usleep(batch_duration_ns * (busy_pct - sema_pct) / 100 / 1000);

busy is only ever used together with sema so it may make sense for 
clarity to add sema to it straight away and then it would directly 
correspond with the metric semantics.

+	igt_spin_end(spin[1]);
+	measured_usleep(batch_duration_ns * (100 - busy_pct) / 100 / 1000);

Required relationship between input sema_pct and busy_pct is a bit 
non-obvious.

igt_assert(busy_pct >= sema_pct)?

+
+	total = pmu_read_multi(pmu, 2, end) - total;
+
+	igt_info("%s<-%s, target: {%.1f%% [%d], %.1f%% [%d]}, measured {%.1f%%, %.1f%%}\n",
+		 e->name, signal->name,
+		 sema * 100. / total, sema_pct,
+		 (sema + busy) * 100. / total, busy_pct,
+		 (end[0] - start[0]) * 100. / total,
+		 (end[1] - start[1]) * 100. / total);
+
+	assert_within_epsilon(end[0] - start[0], sema, tolerance);
+	assert_within_epsilon(end[1] - start[1], sema + busy, tolerance);
+	igt_assert((end[0] - start[0]) < (end[1] - start[1]) * (1 + tolerance));

__assert_within_epsilon with one of the tolerances zero?

+
+	igt_spin_free(gem_fd, spin[1]);
+	igt_spin_free(gem_fd, spin[0]);
+}
+
+static void
+sema_busy(int gem_fd,
+	  const struct intel_execution_engine2 *e,
+	  unsigned int flags)
+{
+	const struct intel_execution_engine2 *signal;
+	int fd;
+
+	fd = open_group(I915_PMU_ENGINE_SEMA(e->class, e->instance), -1);
+	open_group(I915_PMU_ENGINE_BUSY(e->class, e->instance), fd);
+
+	__for_each_physical_engine(gem_fd, signal) {
+		if (e->class == signal->class &&
+		    e->instance == signal->instance)
+			continue;
+
+		__sema_busy(gem_fd, fd, e, signal, 50, 100);
+		__sema_busy(gem_fd, fd, e, signal, 25, 50);
+		__sema_busy(gem_fd, fd, e, signal, 75, 75);
+	}
+
+	close(fd);
+}
+
  #define   MI_WAIT_FOR_PIPE_C_VBLANK (1<<21)
  #define   MI_WAIT_FOR_PIPE_B_VBLANK (1<<11)
  #define   MI_WAIT_FOR_PIPE_A_VBLANK (1<<3)
@@ -1774,6 +1842,9 @@ igt_main
  			sema_wait(fd, e,
  				  TEST_BUSY | TEST_TRAILING_IDLE);
  
+		igt_subtest_f("semaphore-busy-%s", e->name)
+			sema_busy(fd, e, 0);
+
  		/**
  		 * Check that two perf clients do not influence each
  		 * others observations.


Looking forward to results!

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx