Simple tests for validating the PMU implementation for GT C6 residencies and frequency. These tests validate the kernel series which is currently in review here - https://patchwork.freedesktop.org/series/139121/ Cc: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@xxxxxxxxx> --- lib/igt_perf.c | 18 ++ lib/igt_perf.h | 2 + tests/intel/xe_pmu.c | 412 +++++++++++++++++++++++++++++++++++++++++++ tests/meson.build | 1 + 4 files changed, 433 insertions(+) create mode 100644 tests/intel/xe_pmu.c diff --git a/lib/igt_perf.c b/lib/igt_perf.c index 3866c6d77..88ea66ffc 100644 --- a/lib/igt_perf.c +++ b/lib/igt_perf.c @@ -129,6 +129,18 @@ uint64_t igt_perf_type_id(const char *device) return strtoull(buf, NULL, 0); } +int igt_xe_perf_events_dir(int xe) +{ + char buf[80]; + char path[PATH_MAX]; + + memset(buf, 0, sizeof(buf)); + + xe_perf_device(xe, buf, sizeof(buf)); + snprintf(path, sizeof(path), "/sys/bus/event_source/devices/%s/events", buf); + return open(path, O_RDONLY); +} + int igt_perf_events_dir(int i915) { char buf[80]; @@ -183,6 +195,12 @@ int perf_xe_open(int xe, uint64_t config) PERF_FORMAT_TOTAL_TIME_ENABLED); } +int perf_xe_open_group(int xe, uint64_t config, int group) +{ + return _perf_open(xe_perf_type_id(xe), config, group, + PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_GROUP); +} + int perf_i915_open(int i915, uint64_t config) { return _perf_open(i915_perf_type_id(i915), config, -1, diff --git a/lib/igt_perf.h b/lib/igt_perf.h index 3d9ba2917..8aff78d0e 100644 --- a/lib/igt_perf.h +++ b/lib/igt_perf.h @@ -55,6 +55,7 @@ perf_event_open(struct perf_event_attr *attr, uint64_t igt_perf_type_id(const char *device); int igt_perf_events_dir(int i915); +int igt_xe_perf_events_dir(int xe); int igt_perf_open(uint64_t type, uint64_t config); int igt_perf_open_group(uint64_t type, uint64_t config, int group); @@ -71,5 +72,6 @@ int perf_i915_open(int i915, uint64_t config); int perf_i915_open_group(int i915, uint64_t config, int group); int perf_xe_open(int xe, uint64_t config); +int perf_xe_open_group(int xe, uint64_t config, int group); #endif /* I915_PERF_H */ diff --git a/tests/intel/xe_pmu.c b/tests/intel/xe_pmu.c new file mode 100644 index 000000000..f5ef24757 --- /dev/null +++ b/tests/intel/xe_pmu.c @@ -0,0 +1,412 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +/** + * TEST: Test Xe PMU functionality + * Category: Perf Monitoring Unit + * Mega feature: Perf Monitoring Unit + * Sub-category: Power Management + * Functionality: Power/Perf + * Test category: Functional tests + */ + +#include <fcntl.h> +#include <limits.h> +#include <time.h> +#include <errno.h> +#include <dirent.h> +#include <string.h> +#include <sys/time.h> + +#include "igt.h" +#include "igt_device.h" +#include "igt_power.h" +#include "igt_sysfs.h" +#include "igt_perf.h" + +#include "lib/igt_syncobj.h" +#include "xe/xe_ioctl.h" +#include "xe/xe_query.h" +#include "xe/xe_spin.h" +#include "xe/xe_util.h" + +#define SLEEP_DURATION 2 /* in seconds */ +const double tolerance = 0.1; +const unsigned long batch_duration_ns = 500e6; +const char *no_debug_data = "\0"; + +#define __assert_within_epsilon(x, ref, tol_up, tol_down, debug_data) \ + igt_assert_f((double)(x) <= (1.0 + (tol_up)) * (double)(ref) && \ + (double)(x) >= (1.0 - (tol_down)) * (double)(ref), \ + "'%s' != '%s' (%f not within +%.1f%%/-%.1f%% tolerance of %f)\n%s\n",\ + #x, #ref, (double)(x), \ + (tol_up) * 100.0, (tol_down) * 100.0, \ + (double)(ref), debug_data) + +#define assert_within_epsilon(x, ref, tolerance) \ + __assert_within_epsilon(x, ref, tolerance, tolerance, no_debug_data) + +#define assert_within_epsilon_debug(x, ref, tolerance, debug_data) \ + __assert_within_epsilon(x, ref, tolerance, tolerance, debug_data) + +struct workload { + struct drm_xe_sync sync[2]; + struct drm_xe_exec exec; + uint64_t addr; + struct xe_spin_opts spin_opts; + struct xe_spin *spin; + uint32_t exec_queue; + uint32_t syncobj; + size_t bo_size; + uint32_t bo; + uint32_t vm; +}; + +static int open_pmu(int xe, uint64_t config) +{ + int fd; + + fd = perf_xe_open(xe, config); + igt_skip_on(fd < 0 && errno == ENODEV); + igt_assert(fd >= 0); + + return fd; +} + +static int open_group(int xe, uint64_t config, int group) +{ + int fd; + + fd = perf_xe_open_group(xe, config, group); + igt_skip_on(fd < 0 && errno == ENODEV); + igt_assert(fd >= 0); + + return fd; +} + +static uint64_t __pmu_read_single(int fd, uint64_t *ts) +{ + uint64_t data[2]; + + igt_assert_eq(read(fd, data, sizeof(data)), sizeof(data)); + if (ts) + *ts = data[1]; + + return data[0]; +} + +static uint64_t pmu_read_multi(int fd, unsigned int num, uint64_t *val) +{ + uint64_t buf[2 + num]; + unsigned int i; + + igt_assert_eq(read(fd, buf, sizeof(buf)), sizeof(buf)); + + for (i = 0; i < num; i++) + val[i] = buf[2 + i]; + + return buf[1]; +} + +static unsigned long read_pmu_config(int fd, char *pmu_str) +{ + int dir_fd; + int ret; + unsigned long config; + char config_str[128]; + + dir_fd = igt_xe_perf_events_dir(fd); + igt_assert(dir_fd >= 0); + igt_assert_eq(igt_sysfs_scanf(dir_fd, pmu_str, "%127s", config_str), 1); + ret = sscanf(config_str, "config=0x%lx", &config); + igt_assert(ret == 1); + + close(dir_fd); + + return config; +} + +/** + * SUBTEST: c6 + * Description: Basic residency test to validate idle residency + * measured over a time interval is within the tolerance + * + * SUBTEST: frequency + * Description: Read requested freq and actual frequency via PMU within + * specified time interval while workload runs + */ +static unsigned int measured_usleep(unsigned int usec) +{ + struct timespec ts = { }; + unsigned int slept; + + slept = igt_nsec_elapsed(&ts); + igt_assert(slept == 0); + do { + usleep(usec - slept); + slept = igt_nsec_elapsed(&ts) / 1000; + } while (slept < usec); + + return igt_nsec_elapsed(&ts) / 1000; +} + +static unsigned long read_idle_residency(int fd, int gt) +{ + unsigned long residency = 0; + int gt_fd; + + gt_fd = xe_sysfs_gt_open(fd, gt); + igt_assert(gt_fd >= 0); + igt_assert(igt_sysfs_scanf(gt_fd, "gtidle/idle_residency_ms", "%lu", &residency) == 1); + close(gt_fd); + + return residency; +} + +static void test_rc6(int xe, unsigned int gt) +{ + int pmu_fd; + int pmu_config; + char event_str[100]; + uint64_t ts[2]; + unsigned long slept, start, end; + uint64_t val; + + sprintf(event_str, "rc6-residency-gt%d", gt); + pmu_config = read_pmu_config(xe, event_str); + pmu_fd = open_pmu(xe, pmu_config); + + igt_assert_f(igt_wait(xe_is_gt_in_c6(xe, gt), 3000, 1), "GT %d not in C6\n", gt); + + /* While idle check full RC6. */ + start = read_idle_residency(xe, gt); + val = __pmu_read_single(pmu_fd, &ts[0]); + slept = measured_usleep(SLEEP_DURATION * USEC_PER_SEC) / 1000; + end = read_idle_residency(xe, gt); + val = __pmu_read_single(pmu_fd, &ts[1]) - val; + + igt_debug("gt%u: slept=%lu, perf=%"PRIu64"\n", + gt, slept, val); + + igt_debug("Start res: %lu, end_res: %lu", start, end); + + assert_within_epsilon(val, + (ts[1] - ts[0])/1000000, + tolerance); + close(pmu_fd); +} + +static int set_freq(int fd, int gt_id, const char *freq_name, uint32_t freq) +{ + int ret = -EAGAIN; + char freq_attr[22]; + int gt_fd; + + snprintf(freq_attr, sizeof(freq_attr), "freq0/%s_freq", freq_name); + gt_fd = xe_sysfs_gt_open(fd, gt_id); + igt_assert(gt_fd >= 0); + + while (ret == -EAGAIN) + ret = igt_sysfs_printf(gt_fd, freq_attr, "%u", freq); + + close(gt_fd); + return ret; +} + +static uint32_t get_freq(int fd, int gt_id, const char *freq_name) +{ + uint32_t freq; + int err = -EAGAIN; + char freq_attr[22]; + int gt_fd; + + snprintf(freq_attr, sizeof(freq_attr), "freq0/%s_freq", freq_name); + gt_fd = xe_sysfs_gt_open(fd, gt_id); + igt_assert(gt_fd >= 0); + + while (err == -EAGAIN) + err = igt_sysfs_scanf(gt_fd, freq_attr, "%u", &freq); + + igt_debug("gt%d: %s freq %u\n", gt_id, freq_name, freq); + + close(gt_fd); + return freq; +} + +static void run_workload(int fd, int gt, struct drm_xe_engine_class_instance *eci, + struct workload *wl) +{ + struct drm_xe_sync sync[2] = { + { .type = DRM_XE_SYNC_TYPE_SYNCOBJ, .flags = DRM_XE_SYNC_FLAG_SIGNAL, }, + { .type = DRM_XE_SYNC_TYPE_SYNCOBJ, .flags = DRM_XE_SYNC_FLAG_SIGNAL, }, + }; + struct drm_xe_exec exec = { + .num_batch_buffer = 1, + .num_syncs = 2, + .syncs = to_user_pointer(sync), + }; + struct xe_spin_opts spin_opts = { + .addr = 0x1a0000, + .preempt = false + }; + struct xe_spin *spin; + + wl->addr = 0x1a0000; + + wl->vm = xe_vm_create(fd, 0, 0); + wl->bo_size = sizeof(*spin); + wl->bo_size = xe_bb_size(fd, wl->bo_size); + + wl->bo = xe_bo_create(fd, wl->vm, wl->bo_size, + vram_if_possible(fd, eci->gt_id), 0); + wl->spin = xe_bo_map(fd, wl->bo, wl->bo_size); + + wl->exec_queue = xe_exec_queue_create(fd, wl->vm, eci, 0); + wl->syncobj = syncobj_create(fd, 0); + + sync[0].handle = syncobj_create(fd, 0); + xe_vm_bind_async(fd, wl->vm, 0, wl->bo, 0, wl->addr, wl->bo_size, sync, 1); + + xe_spin_init(wl->spin, &spin_opts); + + sync[0].flags &= ~DRM_XE_SYNC_FLAG_SIGNAL; + sync[1].flags |= DRM_XE_SYNC_FLAG_SIGNAL; + sync[1].handle = wl->syncobj; + + exec.exec_queue_id = wl->exec_queue; + exec.address = wl->addr; + xe_exec(fd, &exec); + + xe_spin_wait_started(wl->spin); + usleep(50000); + igt_assert(!syncobj_wait(fd, &wl->syncobj, 1, 1, 0, NULL)); + + igt_info("Running on GT %d Engine %s:%d\n", eci->gt_id, + xe_engine_class_string(eci->engine_class), eci->engine_instance); + + /* Save it for the end_workload function */ + wl->sync[0] = sync[0]; + wl->sync[1] = sync[1]; +} + +static void end_workload(int fd, struct workload *wl) +{ + xe_spin_end(wl->spin); + + igt_assert(syncobj_wait(fd, &wl->syncobj, 1, INT64_MAX, 0, NULL)); + igt_assert(syncobj_wait(fd, &wl->sync[0].handle, 1, INT64_MAX, 0, NULL)); + + wl->sync[0].flags |= DRM_XE_SYNC_FLAG_SIGNAL; + xe_vm_unbind_async(fd, wl->vm, 0, 0, wl->addr, wl->bo_size, wl->sync, 1); + igt_assert(syncobj_wait(fd, &wl->sync[0].handle, 1, INT64_MAX, 0, NULL)); + + syncobj_destroy(fd, wl->sync[0].handle); + syncobj_destroy(fd, wl->syncobj); + xe_exec_queue_destroy(fd, wl->exec_queue); + + munmap(wl->spin, wl->bo_size); + gem_close(fd, wl->bo); + xe_vm_destroy(fd, wl->vm); +} + +static void test_frequency(int fd, int gt, struct drm_xe_engine_class_instance *eci) +{ + struct workload wl; + + uint64_t val[2], start[2], slept; + double min[2], max[2]; + int pmu_fd[2]; + uint32_t orig_min = get_freq(fd, gt, "min"); + uint32_t orig_max = get_freq(fd, gt, "max"); + unsigned long config_rq_freq, config_act_freq; + char event_str[100]; + + + sprintf(event_str, "requested-frequency-gt%d", gt); + config_rq_freq = read_pmu_config(fd, event_str); + pmu_fd[0] = open_group(fd, config_rq_freq, -1); + + memset(event_str, 0, 100); + sprintf(event_str, "actual-frequency-gt%d", gt); + config_act_freq = read_pmu_config(fd, event_str); + pmu_fd[1] = open_group(fd, config_act_freq, pmu_fd[0]); + + run_workload(fd, gt, eci, &wl); + /* + * Set GPU to min frequency and read PMU counters. + */ + igt_assert(set_freq(fd, gt, "max", orig_min) > 0); + igt_assert(get_freq(fd, gt, "max") == orig_min); + + slept = pmu_read_multi(pmu_fd[0], 2, start); + measured_usleep(batch_duration_ns / 1000); + slept = pmu_read_multi(pmu_fd[0], 2, val) - slept; + + min[0] = 1e9*(val[0] - start[0]) / slept; + min[1] = 1e9*(val[1] - start[1]) / slept; + + /* + * Set GPU to max frequency and read PMU counters. + */ + igt_assert(set_freq(fd, gt, "max", orig_max) > 0); + igt_assert(get_freq(fd, gt, "max") == orig_max); + igt_assert(set_freq(fd, gt, "min", orig_max) > 0); + igt_assert(get_freq(fd, gt, "min") == orig_max); + + slept = pmu_read_multi(pmu_fd[0], 2, start); + measured_usleep(batch_duration_ns / 1000); + slept = pmu_read_multi(pmu_fd[0], 2, val) - slept; + + max[0] = 1e9*(val[0] - start[0]) / slept; + max[1] = 1e9*(val[1] - start[1]) / slept; + + /* + * Restore min/max. + */ + igt_assert(set_freq(fd, gt, "min", orig_min) > 0); + igt_assert(get_freq(fd, gt, "min") == orig_min); + + igt_info("Minimum frequency: requested %.1f, actual %.1f\n", + min[0], min[1]); + igt_info("Maximum frequency: requested %.1f, actual %.1f\n", + max[0], max[1]); + + close(pmu_fd[0]); + close(pmu_fd[1]); + + end_workload(fd, &wl); + + assert_within_epsilon(min[0], orig_min, tolerance); + /* + * On thermally throttled devices we cannot be sure maximum frequency + * can be reached so use larger tolerance downards. + */ + __assert_within_epsilon(max[0], orig_max, tolerance, 0.15f, no_debug_data); +} + +igt_main +{ + int fd, gt; + struct drm_xe_engine_class_instance *hwe; + + igt_fixture { + fd = drm_open_driver(DRIVER_XE); + igt_require(!IS_PONTEVECCHIO(xe_dev_id(fd))); + } + + igt_describe("Validate PMU C6 residency counters"); + igt_subtest("c6") + xe_for_each_gt(fd, gt) + test_rc6(fd, gt); + + igt_describe("Validate PMU GT freq measured over a time interval is within the tolerance"); + igt_subtest("frequency") + xe_for_each_engine(fd, hwe) + test_frequency(fd, hwe->gt_id, hwe); + + igt_fixture { + close(fd); + } +} diff --git a/tests/meson.build b/tests/meson.build index 34b87b125..dc84ef748 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -308,6 +308,7 @@ intel_xe_progs = [ 'xe_pat', 'xe_peer2peer', 'xe_pm', + 'xe_pmu', 'xe_pm_residency', 'xe_prime_self_import', 'xe_query', -- 2.38.1