From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> A subtest to verify that the engine busyness is reported with expected accuracy on platforms where the feature is available. We test three patterns: 2%, 50% and 98% load per engine. v2: * Use spin batch instead of nop calibration. * Various tweaks. v3: * Change loops to be time based. * Use __igt_spin_batch_new inside timing sensitive loops. * Fixed PWM sleep handling. v4: * Use restarting spin batch. * Calibrate more carefully by looking at the real PWM loop. v5: * Made standalone. * Better info messages. * Tweak sleep compensation. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> --- tests/perf_pmu.c | 192 +++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 174 insertions(+), 18 deletions(-) diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c index a7501ca5f7a4..fa9b54793a52 100644 --- a/tests/perf_pmu.c +++ b/tests/perf_pmu.c @@ -35,6 +35,7 @@ #include <dirent.h> #include <time.h> #include <poll.h> +#include <sched.h> #include "igt.h" #include "igt_core.h" @@ -385,6 +386,22 @@ busy_check_all(int gem_fd, const struct intel_execution_engine2 *e, gem_quiescent_gpu(gem_fd); } +static void +__submit_spin_batch(int gem_fd, igt_spin_t *spin, + const struct intel_execution_engine2 *e) +{ + struct drm_i915_gem_exec_object2 obj = { + .handle = spin->handle + }; + struct drm_i915_gem_execbuffer2 eb = { + .buffer_count = 1, + .buffers_ptr = to_user_pointer(&obj), + .flags = e2ring(gem_fd, e), + }; + + gem_execbuf(gem_fd, &eb); +} + static void most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e, const unsigned int num_engines, unsigned int flags) @@ -405,15 +422,7 @@ most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e, if (e == e_) { idle_idx = i; } else if (spin) { - struct drm_i915_gem_exec_object2 obj = { - .handle = spin->handle - }; - struct drm_i915_gem_execbuffer2 eb = { - .buffer_count = 1, - .buffers_ptr = to_user_pointer(&obj), - .flags = e2ring(gem_fd, e_), - }; - gem_execbuf(gem_fd, &eb); + __submit_spin_batch(gem_fd, spin, e_); } else { spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e_), 0); @@ -469,15 +478,7 @@ all_busy_check_all(int gem_fd, const unsigned int num_engines, continue; if (spin) { - struct drm_i915_gem_exec_object2 obj = { - .handle = spin->handle - }; - struct drm_i915_gem_execbuffer2 eb = { - .buffer_count = 1, - .buffers_ptr = to_user_pointer(&obj), - .flags = e2ring(gem_fd, e), - }; - gem_execbuf(gem_fd, &eb); + __submit_spin_batch(gem_fd, spin, e); } else { spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0); @@ -1390,6 +1391,150 @@ test_enable_race(int gem_fd, const struct intel_execution_engine2 *e) gem_quiescent_gpu(gem_fd); } +static double __error(double val, double ref) +{ + igt_assert(ref != 0.0); + return (100.0 * val / ref) - 100.0; +} + +static void __rearm_spin_batch(igt_spin_t *spin) +{ + const uint32_t mi_arb_chk = 0x5 << 23; + + *spin->batch = mi_arb_chk; + __sync_synchronize(); +} + +#define div_round_up(a, b) (((a) + (b) - 1) / (b)) + +static void +accuracy(int gem_fd, const struct intel_execution_engine2 *e, + unsigned long target_busy_pct) +{ + const unsigned int min_test_loops = 7; + const unsigned long min_test_us = 1e6; + unsigned long busy_us = 2500; + unsigned long idle_us = 100 * (busy_us - target_busy_pct * + busy_us / 100) / target_busy_pct; + unsigned long pwm_calibration_us; + unsigned long test_us; + double busy_r; + uint64_t val[2]; + uint64_t ts[2]; + int fd; + + /* Sampling platforms cannot reach the high accuracy criteria. */ + igt_require(intel_gen(intel_get_drm_devid(gem_fd)) >= 8); + + while (idle_us < 2500) { + busy_us *= 2; + idle_us *= 2; + } + + pwm_calibration_us = min_test_loops * (busy_us + idle_us); + while (pwm_calibration_us < min_test_us) + pwm_calibration_us += busy_us + idle_us; + test_us = min_test_loops * (idle_us + busy_us); + while (test_us < min_test_us) + test_us += busy_us + idle_us; + + igt_info("calibration=%luus, test=%luus; busy=%luus, idle=%luus\n", + pwm_calibration_us, test_us, busy_us, idle_us); + + assert_within_epsilon((double)busy_us / (busy_us + idle_us), + (double)target_busy_pct / 100.0, tolerance); + + /* Emit PWM pattern on the engine from a child. */ + igt_fork(child, 1) { + struct sched_param rt = { .sched_priority = 99 }; + const unsigned long timeout[] = { pwm_calibration_us * 1000, + test_us * 2 * 1000 }; + unsigned long sleep_busy = busy_us; + unsigned long sleep_idle = idle_us; + igt_spin_t *spin; + + /* We need the best sleep accuracy we can get. */ + igt_require(sched_setscheduler(0, + SCHED_FIFO | SCHED_RESET_ON_FORK, + &rt) == 0); + + /* Allocate our spin batch and idle it. */ + spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0); + igt_spin_batch_end(spin); + gem_sync(gem_fd, spin->handle); + + /* 1st pass is calibration, second pass is the test. */ + for (int pass = 0; pass < ARRAY_SIZE(timeout); pass++) { + unsigned long busy_ns = 0, idle_ns = 0; + struct timespec test_start = { }; + unsigned long loops = 0; + double err_busy, err_idle; + + igt_nsec_elapsed(&test_start); + do { + struct timespec t_busy = { }; + + igt_nsec_elapsed(&t_busy); + + /* Restart the spinbatch. */ + __rearm_spin_batch(spin); + __submit_spin_batch(gem_fd, spin, e); + measured_usleep(sleep_busy); + igt_spin_batch_end(spin); + gem_sync(gem_fd, spin->handle); + + busy_ns += igt_nsec_elapsed(&t_busy); + + idle_ns += measured_usleep(sleep_idle); + + loops++; + } while (igt_nsec_elapsed(&test_start) < timeout[pass]); + + busy_ns = div_round_up(busy_ns, loops); + idle_ns = div_round_up(idle_ns, loops); + + err_busy = __error(busy_ns / 1000, busy_us); + err_idle = __error(idle_ns / 1000, idle_us); + + igt_info("%u: busy %lu/%lu %.2f%%, idle %lu/%lu %.2f%%\n", + pass, + busy_ns / 1000, busy_us, err_busy, + idle_ns / 1000, idle_us, err_idle); + + if (pass == 0) { + sleep_busy = (double)busy_us - + (double)busy_us * err_busy / 100.0; + sleep_idle = (double)idle_us - + (double)idle_us * err_idle / 100.0; + igt_info("calibrated sleeps: busy=%lu, idle=%lu\n", + sleep_busy, sleep_idle); + } + } + + igt_spin_batch_free(gem_fd, spin); + } + + /* Let the child run. */ + usleep(pwm_calibration_us * 2); + + /* Collect engine busyness for an interesting part of child runtime. */ + fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance)); + val[0] = __pmu_read_single(fd, &ts[0]); + usleep(test_us / 2); + val[1] = __pmu_read_single(fd, &ts[1]); + close(fd); + + igt_waitchildren(); + + busy_r = (double)(val[1] - val[0]) / (ts[1] - ts[0]); + + igt_info("error=%.2f%% (%.2f%% vs %lu%%)\n", + __error(busy_r, target_busy_pct / 100.0), + busy_r * 100.0, target_busy_pct); + + assert_within_epsilon(busy_r, (double)target_busy_pct / 100.0, 0.15); +} + igt_main { const unsigned int num_other_metrics = @@ -1418,6 +1563,8 @@ igt_main invalid_init(); for_each_engine_class_instance(fd, e) { + const unsigned int pct[] = { 2, 50, 98 }; + /** * Test that a single engine metric can be initialized or it * is correctly rejected. @@ -1524,6 +1671,15 @@ igt_main */ igt_subtest_f("enable-race-%s", e->name) test_enable_race(fd, e); + + /** + * Check engine busyness accuracy is as expected. + */ + for (i = 0; i < ARRAY_SIZE(pct); i++) { + igt_subtest_f("busy-accuracy-%u-%s", + pct[i], e->name) + accuracy(fd, e, pct[i]); + } } /** -- 2.14.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx