Re: [PATCH i-g-t] i915/perf_pmu: Compare semaphore and busy measurements

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 24/09/2019 23:01, Chris Wilson wrote:
Our semaphore time is measured by sampling a ring register, whereas our
busy time is measured exactly. This leaves a window of discrepancy that
we wish to keep small (at least within sample tolerance).

References: https://bugs.freedesktop.org/show_bug.cgi?id=111788
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
---
  tests/perf_pmu.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++-
  1 file changed, 72 insertions(+), 1 deletion(-)

diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index 8a06e5d44..2fcaf88de 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -130,7 +130,7 @@ static uint64_t pmu_read_multi(int fd, unsigned int num, uint64_t *val)
  #define __assert_within_epsilon(x, ref, tol_up, tol_down) \
  	igt_assert_f((double)(x) <= (1.0 + (tol_up)) * (double)(ref) && \
  		     (double)(x) >= (1.0 - (tol_down)) * (double)(ref), \
-		     "'%s' != '%s' (%f not within +%f%%/-%f%% tolerance of %f)\n",\
+		     "'%s' != '%s' (%f not within +%.1f%%/-%.1f%% tolerance of %f)\n",\
  		     #x, #ref, (double)(x), \
  		     (tol_up) * 100.0, (tol_down) * 100.0, \
  		     (double)(ref))
@@ -744,6 +744,74 @@ sema_wait(int gem_fd, const struct intel_execution_engine2 *e,
  	assert_within_epsilon(val[1] - val[0], slept, tolerance);
  }
+static void
+__sema_busy(int gem_fd, int pmu,
+	    const struct intel_execution_engine2 *e,
+	    const struct intel_execution_engine2 *signal,
+	    int sema_pct,
+	    int busy_pct)
+{
+	uint64_t total, sema, busy;
+	uint64_t start[2], end[2];
+	igt_spin_t *spin[2];
+
+	spin[0] = igt_spin_new(gem_fd,
+			       .engine = signal->flags,
+			       .flags = IGT_SPIN_FENCE_OUT);
+	spin[1] = igt_spin_new(gem_fd,
+			       .engine = e->flags,
+			       .fence = spin[0]->out_fence,
+			       .flags = IGT_SPIN_FENCE_IN);
+
+	total = pmu_read_multi(pmu, 2, start);

Might be worth for result stability for have signaler with IGT_SPIN_POLL_RUN and wait for it to run here before proceeding with sleeps.

+
+	sema = measured_usleep(batch_duration_ns * sema_pct / 100 / 1000);
+	igt_spin_end(spin[0]);
+	busy = measured_usleep(batch_duration_ns * (busy_pct - sema_pct) / 100 / 1000);

busy is only ever used together with sema so it may make sense for clarity to add sema to it straight away and then it would directly correspond with the metric semantics.

+	igt_spin_end(spin[1]);
+	measured_usleep(batch_duration_ns * (100 - busy_pct) / 100 / 1000);

Required relationship between input sema_pct and busy_pct is a bit non-obvious.

igt_assert(busy_pct >= sema_pct)?

+
+	total = pmu_read_multi(pmu, 2, end) - total;
+
+	igt_info("%s<-%s, target: {%.1f%% [%d], %.1f%% [%d]}, measured {%.1f%%, %.1f%%}\n",
+		 e->name, signal->name,
+		 sema * 100. / total, sema_pct,
+		 (sema + busy) * 100. / total, busy_pct,
+		 (end[0] - start[0]) * 100. / total,
+		 (end[1] - start[1]) * 100. / total);
+
+	assert_within_epsilon(end[0] - start[0], sema, tolerance);
+	assert_within_epsilon(end[1] - start[1], sema + busy, tolerance);
+	igt_assert((end[0] - start[0]) < (end[1] - start[1]) * (1 + tolerance));

__assert_within_epsilon with one of the tolerances zero?

+
+	igt_spin_free(gem_fd, spin[1]);
+	igt_spin_free(gem_fd, spin[0]);
+}
+
+static void
+sema_busy(int gem_fd,
+	  const struct intel_execution_engine2 *e,
+	  unsigned int flags)
+{
+	const struct intel_execution_engine2 *signal;
+	int fd;
+
+	fd = open_group(I915_PMU_ENGINE_SEMA(e->class, e->instance), -1);
+	open_group(I915_PMU_ENGINE_BUSY(e->class, e->instance), fd);
+
+	__for_each_physical_engine(gem_fd, signal) {
+		if (e->class == signal->class &&
+		    e->instance == signal->instance)
+			continue;
+
+		__sema_busy(gem_fd, fd, e, signal, 50, 100);
+		__sema_busy(gem_fd, fd, e, signal, 25, 50);
+		__sema_busy(gem_fd, fd, e, signal, 75, 75);
+	}
+
+	close(fd);
+}
+
  #define   MI_WAIT_FOR_PIPE_C_VBLANK (1<<21)
  #define   MI_WAIT_FOR_PIPE_B_VBLANK (1<<11)
  #define   MI_WAIT_FOR_PIPE_A_VBLANK (1<<3)
@@ -1774,6 +1842,9 @@ igt_main
  			sema_wait(fd, e,
  				  TEST_BUSY | TEST_TRAILING_IDLE);
+ igt_subtest_f("semaphore-busy-%s", e->name)
+			sema_busy(fd, e, 0);
+
  		/**
  		 * Check that two perf clients do not influence each
  		 * others observations.


Looking forward to results!

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux