From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> A subtest to verify that the engine busyness is reported with expected accuracy on platforms where the feature is available. We test three patterns: 2%, 50% and 98% load per engine. Problematic part is we also rely on scheduling latencies and the no-op batch calibration accuracy. For these reasons we use a large-ish tolerance and also set the load emitting process to SCHED_FIFO. Load calibration is also moved to a subtest group fixture so the set-up time is shared between all subtests which use it. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> --- tests/perf_pmu.c | 125 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 115 insertions(+), 10 deletions(-) diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c index db7696115a7b..ec6b0ee1cb86 100644 --- a/tests/perf_pmu.c +++ b/tests/perf_pmu.c @@ -35,6 +35,7 @@ #include <dirent.h> #include <time.h> #include <poll.h> +#include <sched.h> #include "igt.h" #include "igt_core.h" @@ -79,6 +80,17 @@ init(int gem_fd, const struct intel_execution_engine2 *e, uint8_t sample) close(fd); } +static uint64_t __pmu_read_single(int fd, uint64_t *ts) +{ + uint64_t data[2]; + + igt_assert_eq(read(fd, data, sizeof(data)), sizeof(data)); + + *ts = data[1]; + + return data[0]; +} + static uint64_t pmu_read_single(int fd) { uint64_t data[2]; @@ -665,6 +677,77 @@ multi_client(int gem_fd, const struct intel_execution_engine2 *e) assert_within_epsilon(val[1], slept, tolerance); } +static void +accuracy(int gem_fd, const struct intel_execution_engine2 *e, + unsigned long cal_ms_sz, unsigned long target_busy_pct) +{ + const unsigned long busy_us = 2500; + const unsigned long idle_us = 100 * (busy_us - target_busy_pct * + busy_us / 100) / target_busy_pct; + const unsigned int test_us = 1e6; + double busy_r; + uint64_t val[2]; + uint64_t ts[2]; + int fd; + + igt_require(intel_gen(intel_get_drm_devid(gem_fd)) >= 8); + + assert_within_epsilon((double)busy_us / (busy_us + idle_us), + (double)target_busy_pct / 100.0, tolerance); + + /* Emit PWM pattern on the engine from a child. */ + igt_fork(child, 1) { + struct sched_param rt = { .sched_priority = 99 }; + const uint32_t bbe = MI_BATCH_BUFFER_END; + const unsigned long loops = test_us / (busy_us + idle_us); + const unsigned long sz = ALIGN(busy_us * cal_ms_sz / 1000, + sizeof(uint32_t)); + struct drm_i915_gem_exec_object2 obj = { }; + struct drm_i915_gem_execbuffer2 eb = { + .buffers_ptr = to_user_pointer(&obj), + .buffer_count = 1, + .flags = e2ring(gem_fd, e) + }; + unsigned long i; + + /* We need the best sleep accuracy we can get. */ + igt_require(sched_setscheduler(0, + SCHED_FIFO | SCHED_RESET_ON_FORK, + &rt) == 0); + + obj.handle = gem_create(gem_fd, sz); + gem_write(gem_fd, obj.handle, sz - sizeof(bbe), &bbe, + sizeof(bbe)); + + for (i = 0; i < loops; i++) { + gem_execbuf(gem_fd, &eb); + gem_sync(gem_fd, obj.handle); + usleep(idle_us); + } + + gem_close(gem_fd, obj.handle); + } + + /* Let child run. */ + usleep(test_us / 4); + + /* Collect engine busyness for a subset of child runtime. */ + fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance)); + val[0] = __pmu_read_single(fd, &ts[0]); + usleep(test_us / 2); + val[1] = __pmu_read_single(fd, &ts[1]); + close(fd); + + igt_waitchildren(); + + busy_r = (double)(val[1] - val[0]) / (ts[1] - ts[0]); + + igt_info("error=%.2f%%\n", + 100.0 - 100.0 * (busy_r / ((double)target_busy_pct / 100.0))); + + assert_within_epsilon(busy_r, (double)target_busy_pct / 100.0, 0.2); +} + /** * Tests that i915 PMU corectly errors out in invalid initialization. * i915 PMU is uncore PMU, thus: @@ -801,7 +884,7 @@ static void cpu_hotplug(int gem_fd) static unsigned long calibrate_nop(int fd, const uint64_t calibration_us) { - const uint64_t cal_min_us = calibration_us * 3; + const uint64_t cal_min_us = 2e6; const unsigned int tolerance_pct = 10; const uint32_t bbe = MI_BATCH_BUFFER_END; const unsigned int loops = 17; @@ -844,7 +927,7 @@ static unsigned long calibrate_nop(int fd, const uint64_t calibration_us) } static void -test_interrupts(int gem_fd) +test_interrupts(int gem_fd, unsigned long cal_ms_sz) { const uint32_t bbe = MI_BATCH_BUFFER_END; const unsigned int test_duration_ms = 1000; @@ -854,14 +937,14 @@ test_interrupts(int gem_fd) .buffer_count = 1, .flags = I915_EXEC_FENCE_OUT, }; - unsigned long sz; - igt_spin_t *spin; const int target = 30; + const unsigned long sz = ALIGN(test_duration_ms * cal_ms_sz / target, + sizeof(uint32_t)); + igt_spin_t *spin; struct pollfd pfd; uint64_t idle, busy; int fd; - sz = calibrate_nop(gem_fd, test_duration_ms * 1000 / target); gem_quiescent_gpu(gem_fd); fd = open_pmu(I915_PMU_INTERRUPTS); @@ -1178,11 +1261,33 @@ igt_main igt_subtest("frequency") test_frequency(fd); - /** - * Test interrupt count reporting. - */ - igt_subtest("interrupts") - test_interrupts(fd); + igt_subtest_group { + unsigned long cal_ms_sz; + + igt_fixture { + cal_ms_sz = calibrate_nop(fd, 1e3); + igt_debug("%lu nops for a 1ms batch\n", cal_ms_sz / 4); + } + + /** + * Test interrupt count reporting. + */ + igt_subtest("interrupts") + test_interrupts(fd, cal_ms_sz); + + for_each_engine_class_instance(fd, e) { + unsigned int pct[] = { 2, 50, 98 }; + + /** + * Check engine busyness accuracy is as expected. + */ + for (i = 0; i < ARRAY_SIZE(pct); i++) { + igt_subtest_f("busy-accuracy-%u-%s", pct[i], + e->name) + accuracy(fd, e, cal_ms_sz, pct[i]); + } + } + } /** * Test RC6 residency reporting. -- 2.14.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx