From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> --- tools/.gitignore | 1 + tools/Makefile.am | 2 + tools/Makefile.sources | 1 + tools/intel_gpu_top.c | 593 +++++++++++++++++++++++++++++++++++++++++++++++++ tools/meson.build | 1 + 5 files changed, 598 insertions(+) create mode 100644 tools/intel_gpu_top.c diff --git a/tools/.gitignore b/tools/.gitignore index 19a1f7cb8e50..6e3042810176 100644 --- a/tools/.gitignore +++ b/tools/.gitignore @@ -17,6 +17,7 @@ intel_framebuffer_dump intel_gem_info intel_gpu_frequency intel_gpu_time +intel_gpu_top intel_legacy_top intel_gtt intel_guc_logger diff --git a/tools/Makefile.am b/tools/Makefile.am index dcf282eaff4e..8f6c15791a3b 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -30,6 +30,8 @@ intel_aubdump_la_LDFLAGS = -module -avoid-version -no-undefined intel_aubdump_la_SOURCES = aubdump.c intel_aubdump_la_LIBADD = $(top_builddir)/lib/libintel_tools.la -ldl +intel_gpu_top_LDADD = $(top_builddir)/lib/libigt_perf.la -lm + bin_SCRIPTS = intel_aubdump CLEANFILES = $(bin_SCRIPTS) diff --git a/tools/Makefile.sources b/tools/Makefile.sources index 9699b7d2f737..6f8668bd4d56 100644 --- a/tools/Makefile.sources +++ b/tools/Makefile.sources @@ -17,6 +17,7 @@ tools_prog_lists = \ intel_gpu_frequency \ intel_firmware_decode \ intel_gpu_time \ + intel_gpu_top \ intel_legacy_top \ intel_gtt \ intel_guc_logger \ diff --git a/tools/intel_gpu_top.c b/tools/intel_gpu_top.c new file mode 100644 index 000000000000..59a112240092 --- /dev/null +++ b/tools/intel_gpu_top.c @@ -0,0 +1,593 @@ +#include <stdio.h> +#include <sys/types.h> +#include <dirent.h> +#include <stdint.h> +#include <assert.h> +#include <string.h> +#include <ctype.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <inttypes.h> +#include <sys/ioctl.h> +#include <errno.h> +#include <math.h> + +#include "igt_perf.h" + +struct pmu_pair { + uint64_t cur; + uint64_t prev; +}; + +struct pmu_counter { + uint64_t config; + unsigned int idx; + struct pmu_pair val; +}; + +#define NUM_LOADS (3) + +struct engine { + const char *name; + double qd[3]; + double load_avg[NUM_LOADS]; + struct pmu_counter busy; + struct pmu_counter wait; + struct pmu_counter sema; + struct pmu_counter queued; + struct pmu_counter runnable; + struct pmu_counter running; +}; + +struct engines { + unsigned int num_engines; + unsigned int num_counters; + DIR *root; + int fd; + struct pmu_pair ts; + + int rapl_fd; + double rapl_scale; + + struct pmu_counter freq_req; + struct pmu_counter freq_act; + struct pmu_counter irq; + struct pmu_counter rc6; + struct pmu_counter rapl; + + double qd_scale; + + double load_exp[NUM_LOADS]; + double load_avg[NUM_LOADS]; + + struct engine engine; +}; + +static uint64_t +get_pmu_config(int dirfd, const char *name, const char *counter) +{ + char buf[128], *p; + int fd, ret; + + ret = snprintf(buf, sizeof(buf), "%s-%s", name, counter); + if (ret < 0 || ret == sizeof(buf)) + return -1; + + fd = openat(dirfd, buf, O_RDONLY); + if (fd < 0) + return -1; + + ret = read(fd, buf, sizeof(buf)); + close(fd); + if (ret <= 0) + return -1; + + p = index(buf, '0'); + if (!p) + return -1; + + return strtoul(p, NULL, 0); +} + +#define engine_ptr(engines, n) \ + ((struct engine *)((unsigned char *)(&engines->engine) + (n) * sizeof(struct engine))) + +static struct engines *discover_engines(void) +{ + const char *sysfs_root = "/sys/devices/i915/events"; + struct engines *engines; + struct dirent *dent; + int ret = 0; + DIR *d; + + engines = malloc(sizeof(struct engines)); + if (!engines) + return NULL; + memset(engines, 0, sizeof(*engines)); + + engines->num_engines = 0; + + d = opendir(sysfs_root); + if (!d) + return NULL; + + while ((dent = readdir(d)) != NULL) { + const char *endswith = "-busy"; + const unsigned int endlen = strlen(endswith); + struct engine *engine = + engine_ptr(engines, engines->num_engines); + char buf[256]; + + if (dent->d_type != DT_REG) + continue; + + if (strlen(dent->d_name) >= sizeof(buf)) { + ret = -1; + break; + } + + strcpy(buf, dent->d_name); + + /* xxxN-busy */ + if (strlen(buf) < (endlen + 4)) + continue; + if (strcmp(&buf[strlen(buf) - endlen], endswith)) + continue; + + memset(engine, 0, sizeof(*engine)); + + buf[strlen(buf) - endlen] = 0; + engine->name = strdup(buf); + if (!engine->name) { + ret = -1; + break; + } + + engine->busy.config = get_pmu_config(dirfd(d), + engine->name, + "busy"); + if (engine->busy.config == -1) { + ret = -1; + break; + } + + engines->num_engines++; + engines = realloc(engines, sizeof(struct engines) + + engines->num_engines * sizeof(struct engine)); + if (!engines) { + ret = -ENOMEM; + break; + } + } + + if (ret) + free(engines); + else + engines->root = d; + + return ret == 0 ? engines : NULL; +} + +static int +filename_to_buf(const char *filename, char *buf, unsigned int bufsize) +{ + int fd; + ssize_t ret; + + fd = open(filename, O_RDONLY); + if (fd < 0) + return -1; + + ret = read(fd, buf, bufsize - 1); + close(fd); + if (ret < 1) + return -1; + + buf[ret] = '\0'; + + return 0; +} + +static uint64_t filename_to_u64(const char *filename, int base) +{ + char buf[64], *b; + + if (filename_to_buf(filename, buf, sizeof(buf))) + return 0; + + /* + * Handle both single integer and key=value formats by skipping + * leading non-digits. + */ + b = buf; + while (*b && !isdigit(*b)) + b++; + + return strtoull(b, NULL, base); +} + +static uint64_t rapl_type_id(void) +{ + return filename_to_u64("/sys/devices/power/type", 10); +} + +static uint64_t rapl_gpu_power(void) +{ + return filename_to_u64("/sys/devices/power/events/energy-gpu", 0); +} + +static double filename_to_double(const char *filename) +{ + char buf[64]; + + if (filename_to_buf(filename, buf, sizeof(buf))) + return 0; + + return strtod(buf, NULL); +} + +static double rapl_gpu_power_scale(void) +{ + return filename_to_double("/sys/devices/power/events/energy-gpu.scale"); +} + +static double i915_qd_scale(void) +{ + return filename_to_double("/sys/devices/i915/events/rcs0-queued.scale"); +} + +#define __open_pmu(engines, pmu, idx) \ +({ \ + int fd__; \ +\ + fd__ = perf_i915_open_group((pmu)->config, (engines)->fd); \ + if (fd__ >= 0) { \ + if ((engines)->fd == -1) \ + (engines)->fd = fd__; \ + (pmu)->idx = (idx)++; \ + (engines)->num_counters++; \ + } \ +\ + fd__; \ +}) + +static int pmu_init(struct engines *engines) +{ + unsigned int idx = 0; + unsigned int i; + int fd; + + engines->fd = -1; + engines->num_counters = 0; + + engines->freq_req.config = I915_PMU_REQUESTED_FREQUENCY; + fd = __open_pmu(engines, &engines->freq_req, idx); + if (fd < 0) + return -1; + + engines->freq_act.config = I915_PMU_ACTUAL_FREQUENCY; + fd = __open_pmu(engines, &engines->freq_act, idx); + if (fd < 0) + return -1; + + engines->irq.config = I915_PMU_INTERRUPTS; + fd = __open_pmu(engines, &engines->irq, idx); + if (fd < 0) + return -1; + + engines->rc6.config = I915_PMU_RC6_RESIDENCY; + fd = __open_pmu(engines, &engines->rc6, idx); + if (fd < 0) + return -1; + + engines->qd_scale = i915_qd_scale(); + + for (i = 0; i < engines->num_engines; i++) { + struct engine *engine = engine_ptr(engines, i); + struct { + struct pmu_counter *pmu; + const char *counter; + } *cnt, counters[] = { + { .pmu = &engine->busy, .counter = "busy" }, + { .pmu = &engine->wait, .counter = "wait" }, + { .pmu = &engine->sema, .counter = "sema" }, + { .pmu = &engine->queued, .counter = "queued" }, + { .pmu = &engine->runnable, .counter = "runnable" }, + { .pmu = &engine->running, .counter = "running" }, + { .pmu = NULL, .counter = NULL }, + }; + + for (cnt = counters; cnt->pmu; cnt++) { + if (!cnt->pmu->config) + cnt->pmu->config = + get_pmu_config(dirfd(engines->root), + engine->name, + cnt->counter); + fd = __open_pmu(engines, cnt->pmu, idx); + if (fd < 0) + return -1; + } + } + + engines->rapl_scale = rapl_gpu_power_scale(); + if (engines->rapl_scale != NAN) + engines->rapl_scale *= 1e3; /* from nano to micro */ + engines->rapl.config = rapl_gpu_power(); + engines->rapl_fd = igt_perf_open(rapl_type_id(), engines->rapl.config); + if (engines->rapl_fd < 0) + return -1; + + return 0; +} + +static uint64_t pmu_read_multi(int fd, unsigned int num, uint64_t *val) +{ + uint64_t buf[2 + num]; + unsigned int i; + + assert(read(fd, buf, sizeof(buf)) == sizeof(buf)); + + for (i = 0; i < num; i++) + val[i] = buf[2 + i]; + + return buf[1]; +} + + +static double pmu_calc(struct pmu_pair *p, double d, double t, double s) +{ + double pct; + + pct = p->cur - p->prev; + pct /= d; + pct /= t; + pct *= s; + + if (s == 100.0 && pct > 100.0) + pct = 100.0; + + return pct; +} + +static uint64_t __pmu_read_single(int fd, uint64_t *ts) +{ + uint64_t data[2]; + + assert(read(fd, data, sizeof(data)) == sizeof(data)); + + if (ts) + *ts = data[1]; + + return data[0]; +} + +static uint64_t pmu_read_single(int fd) +{ + return __pmu_read_single(fd, NULL); +} + +static void __update_sample(struct pmu_counter *counter, uint64_t val) +{ + counter->val.prev = counter->val.cur; + counter->val.cur = val; +} + +static void update_sample(struct pmu_counter *counter, uint64_t *val) +{ + __update_sample(counter, val[counter->idx]); +} + +static void pmu_sample(struct engines *engines) +{ + const int num_val = engines->num_counters; + uint64_t val[num_val]; + unsigned int i; + + engines->ts.prev = engines->ts.cur; + engines->ts.cur = pmu_read_multi(engines->fd, num_val, val); + + __update_sample(&engines->rapl, pmu_read_single(engines->rapl_fd)); + + update_sample(&engines->freq_req, val); + update_sample(&engines->freq_act, val); + update_sample(&engines->irq, val); + update_sample(&engines->rc6, val); + + for (i = 0; i < engines->num_engines; i++) { + struct engine *engine = engine_ptr(engines, i); + + update_sample(&engine->busy, val); + update_sample(&engine->sema, val); + update_sample(&engine->wait, val); + update_sample(&engine->queued, val); + update_sample(&engine->runnable, val); + update_sample(&engine->running, val); + } +} + +static const char *bars[] = { " ", "▏", "▎", "▍", "▌", "▋", "▊", "▉", "█" }; + +static void +print_percentage_bar(double percent, int max_len) +{ + int bar_len = percent * (8 * (max_len - 2)) / 100.0; + int i; + + putchar('|'); + + for (i = bar_len; i >= 8; i -= 8) + printf("%s", bars[8]); + if (i) + printf("%s", bars[i]); + + for (i = 0; i < (max_len - 2 - (bar_len + 7) / 8); i++) + putchar(' '); + + putchar('|'); +} + +#define DEFAULT_PERIOD_MS (1000) + +static void +usage(const char *appname) +{ + printf("intel_gpu_top - Display a top-like summary of Intel GPU usage\n" + "\n" + "Usage: %s [parameters]\n" + "\n" + "\tThe following parameters are optional:\n" + "\t[-s <samples>] refresh period in ms (default %ums)\n" + "\t[-h] show this help text\n" + "\n", + appname, DEFAULT_PERIOD_MS); +} + +static double update_load(double load, double exp, double val) +{ + return val + exp * (load - val); +} + +int main(int argc, char **argv) +{ + unsigned int period_us = DEFAULT_PERIOD_MS * 1000; + const double load_period[NUM_LOADS] = { 1.0, 30.0, 900.0 }; + struct engines *engines; + int con_w = -1, con_h = -1; + struct winsize ws; + unsigned int i; + double period; + int ret, ch; + + /* Parse options */ + while ((ch = getopt(argc, argv, "s:h")) != -1) { + switch (ch) { + case 's': + period_us = atoi(optarg) * 1000; + break; + case 'h': + usage(argv[0]); + exit(0); + default: + fprintf(stderr, "Invalid option %c!\n", (char)optopt); + usage(argv[0]); + exit(1); + } + } + + /* Get terminal size. */ + if (ioctl(0, TIOCGWINSZ, &ws) != -1) { + con_w = ws.ws_col; + con_h = ws.ws_row; + } + + engines = discover_engines(); + if (!engines) { + fprintf(stderr, "Failed to detect engines!\n"); + return 1; + } + + ret = pmu_init(engines); + if (ret) { + fprintf(stderr, "Failed to initialize PMU!\n"); + return 1; + } + + /* Load average setup. */ + period = (double)period_us / 1e6; + for (i = 0; i < NUM_LOADS; i++) + engines->load_exp[i] = exp(-period / load_period[i]); + + pmu_sample(engines); + + for (;;) { + double t, freq[2], irq, rc6, power; + double qd = 0; + int lines = 0; + unsigned int j; + + usleep(period_us); + + pmu_sample(engines); + t = (double)(engines->ts.cur - engines->ts.prev) / 1e9; + + printf("\033[H\033[J"); + + freq[0] = pmu_calc(&engines->freq_req.val, 1.0, t, 1); + freq[1] = pmu_calc(&engines->freq_act.val, 1.0, t, 1); + irq = pmu_calc(&engines->irq.val, 1.0, t, 1); + rc6 = pmu_calc(&engines->rc6.val, 1e9, t, 100); + power = pmu_calc(&engines->rapl.val, 1.0, t, engines->rapl_scale); + + for (i = 0; i < engines->num_engines; i++) { + struct engine *engine = engine_ptr(engines, i); + + engine->qd[0] = pmu_calc(&engine->queued.val, 1, t, + engines->qd_scale); + engine->qd[1] = pmu_calc(&engine->runnable.val, 1, t, + engines->qd_scale); + engine->qd[2] = pmu_calc(&engine->running.val, 1, t, + engines->qd_scale); + + qd += engine->qd[1] + engine->qd[2]; + + for (j = 0; j < NUM_LOADS; j++) { + engine->load_avg[j] = + update_load(engine->load_avg[j], + engines->load_exp[j], + engine->qd[1] + + engine->qd[2]); + } + } + + for (j = 0; j < NUM_LOADS; j++) { + engines->load_avg[j] = + update_load(engines->load_avg[j], + engines->load_exp[j], + qd); + } + + printf("intel-gpu-top - load avg %5.2f, %5.2f, %5.2f; %4.0f/%4.0f MHz; %3.0f%% RC6; %6.0fmW; %8.0f irqs/s\n", + engines->load_avg[0], + engines->load_avg[1], + engines->load_avg[2], + freq[0], freq[1], + rc6, power, irq); + lines++; + + printf("\n"); + lines++; + + for (i = 0; i < engines->num_engines && lines < con_h; i++) { + struct engine *engine = engine_ptr(engines, i); + unsigned int max_w = con_w - 1; + unsigned int len; + double val[2]; + char buf[128]; + + val[0] = pmu_calc(&engine->wait.val, 1e9, t, 100); + val[1] = pmu_calc(&engine->sema.val, 1e9, t, 100); + len = snprintf(buf, sizeof(buf), + "%6.2f%% wait, %6.2f%% sema", + val[0], val[1]); + + val[0] = pmu_calc(&engine->busy.val, 1e9, t, 100); + len += printf("%8s %6.2f%% (%5.2f/%5.2f/%5.2f) ", + engine->name, + val[0], + engine->qd[0], + engine->qd[1], + engine->qd[2]); + print_percentage_bar(val[0], max_w - len); + + printf("%s\n", buf); + + lines++; + } + + printf("\n"); + } + + return 0; +} diff --git a/tools/meson.build b/tools/meson.build index ebce4e305d00..36038f7a9d22 100644 --- a/tools/meson.build +++ b/tools/meson.build @@ -23,6 +23,7 @@ tools_progs = [ 'intel_gpu_frequency', 'intel_firmware_decode', 'intel_gpu_time', + 'intel_gpu-top', 'intel_legacy_top', 'intel_gtt', 'intel_guc_logger', -- 2.14.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx