i915 RFC PMU: * https://patchwork.freedesktop.org/series/27488/ * https://patchwork.freedesktop.org/series/28842/ Tests: * init: try to initialize all possible metrics exposed in i915 PMU (limit to 0-instance of engines) * invalid_init: verify that i915 PMU correctly error out on invalid initialization * single: verify that BUSY metrics work for each engine * parallel: verify that parallel requests for metrics do not conflict Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@xxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- tests/Makefile.sources | 1 + tests/perf_pmu.c | 546 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 547 insertions(+) create mode 100644 tests/perf_pmu.c diff --git a/tests/Makefile.sources b/tests/Makefile.sources index bb013c7..51b684b 100644 --- a/tests/Makefile.sources +++ b/tests/Makefile.sources @@ -215,6 +215,7 @@ TESTS_progs = \ kms_vblank \ meta_test \ perf \ + perf_pmu \ pm_backlight \ pm_lpsp \ pm_rc6_residency \ diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c new file mode 100644 index 0000000..0d025a6 --- /dev/null +++ b/tests/perf_pmu.c @@ -0,0 +1,546 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "igt.h" +#include "igt_sysfs.h" + +#include <unistd.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <fcntl.h> +#include <inttypes.h> +#include <errno.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <sys/poll.h> +#include <sys/time.h> +#include <time.h> +#include "drm.h" + +#define LOCAL_I915_EXEC_NO_RELOC (1<<11) +#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12) + +//////////////////////////////////////////////////////////////////////// +// This is a copy of perf.h from intel-gpu-tools/overlay +// because I am lazy enough to move it to some common library +//////////////////////////////////////////////////////////////////////// + +#include <linux/perf_event.h> + +enum drm_i915_gem_engine_class { + I915_ENGINE_CLASS_OTHER = 0, + I915_ENGINE_CLASS_RENDER = 1, + I915_ENGINE_CLASS_COPY = 2, + I915_ENGINE_CLASS_VIDEO = 3, + I915_ENGINE_CLASS_VIDEO_ENHANCE = 4, + I915_ENGINE_CLASS_MAX /* non-ABI */ +}; + +enum drm_i915_pmu_engine_sample { + I915_SAMPLE_QUEUED = 0, + I915_SAMPLE_BUSY = 1, + I915_SAMPLE_WAIT = 2, + I915_SAMPLE_SEMA = 3 +}; + +#define I915_PMU_SAMPLE_BITS (4) +#define I915_PMU_SAMPLE_MASK (0xf) +#define I915_PMU_SAMPLE_INSTANCE_BITS (8) +#define I915_PMU_CLASS_SHIFT \ + (I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS) + +#define __I915_PMU_ENGINE(class, instance, sample) \ + ((class) << I915_PMU_CLASS_SHIFT | \ + (instance) << I915_PMU_SAMPLE_BITS | \ + (sample)) + +#define I915_PMU_ENGINE_QUEUED(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_QUEUED) + +#define I915_PMU_ENGINE_BUSY(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY) + +#define I915_PMU_ENGINE_WAIT(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT) + +#define I915_PMU_ENGINE_SEMA(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA) + +#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) + +#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0) +#define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1) +#define I915_PMU_ENERGY __I915_PMU_OTHER(2) +#define I915_PMU_INTERRUPTS __I915_PMU_OTHER(3) + +#define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(4) +#define I915_PMU_RC6p_RESIDENCY __I915_PMU_OTHER(5) +#define I915_PMU_RC6pp_RESIDENCY __I915_PMU_OTHER(6) + +static inline int +perf_event_open(struct perf_event_attr *attr, + pid_t pid, + int cpu, + int group_fd, + unsigned long flags) +{ +#ifndef __NR_perf_event_open +#if defined(__i386__) +#define __NR_perf_event_open 336 +#elif defined(__x86_64__) +#define __NR_perf_event_open 298 +#else +#define __NR_perf_event_open 0 +#endif +#endif + attr->size = sizeof(*attr); + return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); +} + +static uint64_t i915_type_id(void) +{ + char buf[1024]; + int fd, n; + + fd = open("/sys/bus/event_source/devices/i915/type", 0); + if (fd < 0) { + n = -1; + } else { + n = read(fd, buf, sizeof(buf)-1); + close(fd); + } + if (n < 0) + return 0; + + buf[n] = '\0'; + return strtoull(buf, 0, 0); +} + +//////////////////////////////////////////////////////////////////////// + +static double elapsed(const struct timespec *start, const struct timespec *end) +{ + return ((end->tv_sec - start->tv_sec) + + (end->tv_nsec - start->tv_nsec)*1e-9); +} + +static uint64_t elapsed_ns(const struct timespec *start, const struct timespec *end) +{ + return ((end->tv_sec - start->tv_sec)*1e9 + + (end->tv_nsec - start->tv_nsec)); +} + +static void nop_on_ring(int fd, uint32_t handle, unsigned ring_id, int timeout) +{ + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 obj; + struct timespec start, now; + + gem_require_ring(fd, ring_id); + + memset(&obj, 0, sizeof(obj)); + obj.handle = handle; + + memset(&execbuf, 0, sizeof(execbuf)); + execbuf.buffers_ptr = to_user_pointer(&obj); + execbuf.buffer_count = 1; + execbuf.flags = ring_id; + execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT; + execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC; + + clock_gettime(CLOCK_MONOTONIC, &start); + if (__gem_execbuf(fd, &execbuf)) { + execbuf.flags = ring_id; + gem_execbuf(fd, &execbuf); + } + + do { + for (int loop = 0; loop < 1024; loop++) { + gem_execbuf(fd, &execbuf); + } + clock_gettime(CLOCK_MONOTONIC, &now); + } while (elapsed(&start, &now) < timeout); + gem_sync(fd, handle); +} + +static int perf_i915_open(int config, int group, int read_format) +{ + struct perf_event_attr attr; + + memset(&attr, 0, sizeof (attr)); + + attr.type = i915_type_id(); + if (attr.type == 0) + return -ENOENT; + attr.config = config; + + attr.read_format = read_format; + if (group != -1) + attr.read_format &= ~PERF_FORMAT_GROUP; + + return perf_event_open(&attr, -1, 0, group, 0); +} + +struct metric { + int config; + uint64_t old_value; + uint64_t value; +}; + +struct pmu_metrics { + int fd; + int read_format; + int num_metrics; + struct metric* metrics; +}; + +static int perf_init(struct pmu_metrics *pm, int num_configs, int* configs) +{ + int i, res; + + memset(pm, 0, sizeof(struct pmu_metrics)); + pm->fd = -1; + pm->read_format = + PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_GROUP; + pm->metrics = (struct metric*)calloc(num_configs, sizeof(struct metric)); + if (!pm->metrics) + return -1; + + for (i = 0; i < num_configs; ++i) { + if (pm->fd < 0) + res = pm->fd = perf_i915_open(configs[i], -1, pm->read_format); + else + res = perf_i915_open(configs[i], pm->fd, pm->read_format); + if (res >= 0) { + pm->metrics[pm->num_metrics++].config = configs[i]; + } + } + + igt_info("perf_init: enabled %d metrics from %d requested\n", + pm->num_metrics, num_configs); + + return 0; +} + +static void perf_close(struct pmu_metrics *pm) +{ + if (pm->fd != -1 ) { close(pm->fd); pm->fd = -1; } + if (pm->metrics) { free(pm->metrics); pm->metrics= NULL; } +} + +/* see 'man 2 perf_event_open' */ +struct perf_read_format { + uint64_t nr_values; /* The number of events */ + uint64_t time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */ + struct { + uint64_t value; /* The value of the event */ + } values[1024]; +}; + +static int perf_read(struct pmu_metrics *pm) +{ + int read_format = + PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_GROUP; + struct perf_read_format data; + ssize_t len; + int i; + + if (pm->fd < 0) + return -1; + + if (pm->read_format != read_format) + return -1; + + len = read(pm->fd, &data, sizeof(data)); + if (len < 0) { + return -1; + } + + if (pm->num_metrics != data.nr_values) + return -1; + + for (i = 0; i < data.nr_values; ++i) { + pm->metrics[i].old_value = pm->metrics[i].value; + pm->metrics[i].value = data.values[i].value; + } + + return 0; +} + +static const char* perf_get_metric_name(int config) +{ + switch (config) { + case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0): + return "i915/rcs0-busy/"; + case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 0): + return "i915/vcs0-busy/"; + case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 1): + return "i915/vcs1-busy/"; + case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0): + return "i915/bcs0-busy/"; + case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO_ENHANCE, 0): + return "i915/vecs0-busy/"; + default: + return "i915/unknown/"; + } +} + +static uint64_t perf_elapsed(struct metric* m) +{ + return m->value - m->old_value; +} + +static void test_init(void) +{ + struct pmu_metrics pm; + unsigned int class[] = + { + I915_ENGINE_CLASS_RENDER, + I915_ENGINE_CLASS_VIDEO, + I915_ENGINE_CLASS_VIDEO, + I915_ENGINE_CLASS_COPY, + I915_ENGINE_CLASS_VIDEO_ENHANCE, + }; + int* configs = malloc(1024 * sizeof(int)); + int num_configs = 0; + + igt_assert(configs != NULL); + + for (int i=0; i < sizeof(class)/sizeof(class[0]); ++i) { + /* TODO Adding metrics for 0-instances only. Would be nice + * to get everything, but for that we either need to add + * check for different platforms here or use upcoming + * engines discover API. + */ + configs[num_configs++] = I915_PMU_ENGINE_BUSY(class[i], 0); + configs[num_configs++] = I915_PMU_ENGINE_QUEUED(class[i], 0); + configs[num_configs++] = I915_PMU_ENGINE_WAIT(class[i], 0); + configs[num_configs++] = I915_PMU_ENGINE_SEMA(class[i], 0); + } + configs[num_configs++] = I915_PMU_ACTUAL_FREQUENCY; + configs[num_configs++] = I915_PMU_REQUESTED_FREQUENCY; + configs[num_configs++] = I915_PMU_ENERGY; + configs[num_configs++] = I915_PMU_RC6_RESIDENCY; + configs[num_configs++] = I915_PMU_RC6p_RESIDENCY; + configs[num_configs++] = I915_PMU_RC6pp_RESIDENCY; + + igt_assert_eq(perf_init(&pm, num_configs, configs), 0); + igt_assert_eq(perf_read(&pm), 0); + igt_assert_eq(pm.num_metrics, num_configs); + + perf_close(&pm); +} + +/* Tests that i915 PMU corectly error out in invalid initialization. + * i915 PMU is uncore PMU, thus: + * - sampling period is not supported + * - pid > 0 is not supported since we can't count per-process (we count + * per whole system( + * - cpu != 0 is not supported since i915 PMU exposes cpumask for CPU0 + */ +static void test_invalid_init(void) +{ + struct perf_event_attr attr; + int pid, cpu; + +#define ATTR_INIT() \ + do { \ + memset(&attr, 0, sizeof (attr)); \ + attr.config = I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0); \ + attr.type = i915_type_id(); \ + igt_assert(attr.type != 0); \ + } while(0) + + ATTR_INIT(); + attr.sample_period = 100; + pid = -1; + cpu = 0; + igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1); + igt_assert_eq(errno, EINVAL); + + ATTR_INIT(); + pid = 0; + cpu = 0; + igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1); + igt_assert_eq(errno, EINVAL); + + ATTR_INIT(); + pid = -1; + cpu = 1; + igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1); + igt_assert_eq(errno, ENODEV); +} + +static int test_single(int fd, uint32_t handle) +{ + struct { + const char* engine_name; + unsigned int class; + unsigned int instance; + unsigned int ring_id; + } engines[] = { + { "rcs0", I915_ENGINE_CLASS_RENDER, 0, I915_EXEC_RENDER }, + { "vcs0", I915_ENGINE_CLASS_VIDEO, 0, I915_EXEC_BSD | I915_EXEC_BSD_RING1 }, + { "vcs1", I915_ENGINE_CLASS_VIDEO, 1, I915_EXEC_BSD | I915_EXEC_BSD_RING2 }, + { "bcs0", I915_ENGINE_CLASS_COPY, 0, I915_EXEC_BLT }, + { "vecs0", I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, I915_EXEC_VEBOX }, + }; + struct pmu_metrics pm; + int configs[] = { + I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0), + I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 0), + I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 1), + I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0), + I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO_ENHANCE, 0), + }; + int num_configs = sizeof(configs)/sizeof(configs[0]); + struct timespec start, now; + + igt_assert_eq(perf_init(&pm, num_configs, configs), 0); + igt_assert_eq(pm.num_metrics, num_configs); + + for (int i = 0; i < sizeof(engines)/sizeof(engines[0]); ++i) { + clock_gettime(CLOCK_MONOTONIC, &start); + igt_assert_eq(perf_read(&pm), 0); + + /* Create almost 100% load on the examined engine for specified time. */ + nop_on_ring(fd, handle, engines[i].ring_id, 20); + + igt_assert_eq(perf_read(&pm), 0); + clock_gettime(CLOCK_MONOTONIC, &now); + + igt_info("Executed on %s for %ldus\n", engines[i].engine_name, elapsed_ns(&start, &now)); + for (int j = 0; j < num_configs; ++j) { + igt_info(" %s: %ldus\n", perf_get_metric_name(pm.metrics[j].config), perf_elapsed(&pm.metrics[j])); + + igt_assert(perf_elapsed(&pm.metrics[j]) < elapsed_ns(&start, &now)); + + if (configs[j] == I915_PMU_ENGINE_BUSY(engines[i].class, engines[i].instance)) { + /* Check that the loaded engine had almost 100% load, we will have 1% tolerance. */ + igt_assert(perf_elapsed(&pm.metrics[j]) > 0.99 * elapsed_ns(&start, &now)); + } else if (configs[j] == I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0)) { + /* Check that BCS engine had just tiny load, we will have 1% tolerance. + * NOTE Some load on BCS is non-avoidable if you run under any graphical server, + * so we can't check for zero. + */ + igt_assert(perf_elapsed(&pm.metrics[j]) < 0.01 * elapsed_ns(&start, &now)); + } else { + /* Check that other engines did not have any load. + * NOTE This may fail if you have any other workload running in parallel to this test. + */ + igt_assert_eq(perf_elapsed(&pm.metrics[j]), 0); + } + } + } + perf_close(&pm); + + /* Return how many angines we have tried. */ + return sizeof(engines)/sizeof(engines[0]); +} + +static void test_parallel(int fd, uint32_t handle) +{ + struct pmu_metrics pm; + int configs[] = { + I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0), + I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 0), + I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 1), + I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0), + I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO_ENHANCE, 0), + }; + int num_configs = sizeof(configs)/sizeof(configs[0]); + int num_engines; + struct timespec start, now; + + igt_assert_eq(perf_init(&pm, num_configs, configs), 0); + igt_assert_eq(pm.num_metrics, num_configs); + + clock_gettime(CLOCK_MONOTONIC, &start); + igt_assert_eq(perf_read(&pm), 0); + + /* Create almost 100% load on the engines one by one, we will get back + * how many engines were tried. + */ + num_engines = test_single(fd, handle); + + igt_assert_eq(perf_read(&pm), 0); + clock_gettime(CLOCK_MONOTONIC, &now); + + igt_info("Executed on %d engines for %ldus\n", num_engines, elapsed_ns(&start, &now)); + for (int j = 0; j < num_configs; ++j) { + igt_info(" %s: %ldus\n", perf_get_metric_name(pm.metrics[j].config), perf_elapsed(&pm.metrics[j])); + + /* Since engines were loaded in turns one by one for the barely the same time, + * they each should have produced barely the same load proportional to the + * number of engines. + */ + igt_assert(perf_elapsed(&pm.metrics[j]) * num_engines > 0.99 * elapsed_ns(&start, &now)); + igt_assert(perf_elapsed(&pm.metrics[j]) * num_engines < 1.01 * elapsed_ns(&start, &now)); + } + perf_close(&pm); +} + +igt_main +{ + uint32_t handle = 0; + int device = -1; + + igt_fixture { + const uint32_t bbe = MI_BATCH_BUFFER_END; + + device = drm_open_driver(DRIVER_INTEL); + igt_require_gem(device); + + handle = gem_create(device, 4096); + gem_write(device, handle, 0, &bbe, sizeof(bbe)); + + igt_fork_hang_detector(device); + } + + /* Test that we can intialize all the metrics. */ + igt_subtest_f("init") + test_init(); + + /* Test that we can intialize all the metrics. */ + igt_subtest_f("invalid_init") + test_invalid_init(); + + /* Test single metrics consumet. */ + igt_subtest_f("single") + test_single(device, handle); + + /* Test parallel metrics consumers. */ + igt_subtest_f("parallel") + test_parallel(device, handle); + + igt_fixture { + igt_stop_hang_detector(); + gem_close(device, handle); + close(device); + } +} -- 1.8.3.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx