From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> A subtest to verify that the engine busyness is reported with expected accuracy on platforms where the feature is available. We test three patterns: 2%, 50% and 98% load per engine. v2: * Use spin batch instead of nop calibration. * Various tweaks. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> --- tests/perf_pmu.c | 141 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c index 935fee03b253..3ca88bcc6976 100644 --- a/tests/perf_pmu.c +++ b/tests/perf_pmu.c @@ -35,6 +35,7 @@ #include <dirent.h> #include <time.h> #include <poll.h> +#include <sched.h> #include "igt.h" #include "igt_core.h" @@ -983,6 +984,136 @@ test_rc6(int gem_fd) assert_within_epsilon(busy - prev, 0.0, tolerance); } +static uint64_t __pmu_read_single(int fd, uint64_t *ts) +{ + uint64_t data[2]; + + igt_assert_eq(read(fd, data, sizeof(data)), sizeof(data)); + + *ts = data[1]; + + return data[0]; +} + +static double __error(double val, double ref) +{ + return (100.0 * val / ref) - 100.0; +} + +static void debug_error(const char *str, double val, double ref) +{ + igt_debug("%s=%.2f%% (%.2f/%.2f)\n", str, __error(val, ref), val, ref); +} + +static void log_error(const char *str, double val, double ref) +{ + debug_error(str, val, ref); + igt_info("%s=%.2f%%\n", str, __error(val, ref)); +} + +static void +accuracy(int gem_fd, const struct intel_execution_engine2 *e, + unsigned long target_busy_pct) +{ + const unsigned int test_us = 1e6; + unsigned long busy_us = 2500; + unsigned long idle_us = 100 * (busy_us - target_busy_pct * + busy_us / 100) / target_busy_pct; + double busy_r; + uint64_t val[2]; + uint64_t ts[2]; + int fd; + + /* Sampling platforms cannot reach the high accuracy criteria. */ + igt_require(intel_gen(intel_get_drm_devid(gem_fd)) >= 8); + + while (idle_us < 2500) { + busy_us *= 2; + idle_us *= 2; + } + + assert_within_epsilon((double)busy_us / (busy_us + idle_us), + (double)target_busy_pct / 100.0, tolerance); + + /* Emit PWM pattern on the engine from a child. */ + igt_fork(child, 1) { + struct sched_param rt = { .sched_priority = 99 }; + unsigned long overhead_ns = 0; + unsigned long loops; + unsigned long i; + + /* We need the best sleep accuracy we can get. */ + igt_require(sched_setscheduler(0, + SCHED_FIFO | SCHED_RESET_ON_FORK, + &rt) == 0); + + /* Measure setup overhead. */ + loops = test_us / 8000; + for (i = 0; i < loops; i++) { + struct timespec start = { }; + igt_spin_t *spin; + unsigned int ns; + + igt_nsec_elapsed(&start); + spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), + 0); + igt_spin_batch_end(spin); + ns = igt_nsec_elapsed(&start); + gem_sync(gem_fd, spin->handle); + igt_spin_batch_free(gem_fd, spin); + overhead_ns += ns; + usleep(1000); + } + + overhead_ns /= test_us / loops; + igt_debug("spin setup overhead = %luus\n", overhead_ns / 1000); + igt_assert(overhead_ns < busy_us * 1000); + + /* Emit PWM busy signal. */ + loops = test_us / (busy_us + idle_us); + for (i = 0; i < loops; i++) { + struct timespec start = { }; + igt_spin_t *spin; + unsigned int ns; + + spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), + 0); + ns = measured_usleep(busy_us - overhead_ns / 1000); + igt_spin_batch_end(spin); + gem_sync(gem_fd, spin->handle); + igt_nsec_elapsed(&start); + igt_spin_batch_free(gem_fd, spin); + debug_error("busy error", ns, busy_us * 1000); + ns = igt_nsec_elapsed(&start); + + if (ns > idle_us * 1000) + ns = 0; + else + ns = idle_us; + ns = measured_usleep(ns); + debug_error("idle error", ns, idle_us * 1000); + } + } + + /* Let the child run. */ + usleep(test_us / 4); + + /* Collect engine busyness for a subset of child runtime. */ + fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance)); + val[0] = __pmu_read_single(fd, &ts[0]); + usleep(test_us / 2); + val[1] = __pmu_read_single(fd, &ts[1]); + close(fd); + + igt_waitchildren(); + + busy_r = (double)(val[1] - val[0]) / (ts[1] - ts[0]); + + log_error("error", busy_r, target_busy_pct / 100.0); + + assert_within_epsilon(busy_r, (double)target_busy_pct / 100.0, 0.15); +} + igt_main { const unsigned int num_other_metrics = @@ -1011,6 +1142,8 @@ igt_main invalid_init(); for_each_engine_class_instance(fd, e) { + const unsigned int pct[] = { 2, 50, 98 }; + /** * Test that a single engine metric can be initialized. */ @@ -1077,6 +1210,14 @@ igt_main */ igt_subtest_f("multi-client-%s", e->name) multi_client(fd, e); + + /** + * Check engine busyness accuracy is as expected. + */ + for (i = 0; i < ARRAY_SIZE(pct); i++) { + igt_subtest_f("busy-accuracy-%u-%s", pct[i], e->name) + accuracy(fd, e, pct[i]); + } } /** -- 2.14.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx