From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Use the i915 exported data in /proc/<pid>/fdinfo to show GPU utilization per DRM client. Example of the output: intel-gpu-top: Intel Tigerlake (Gen12) @ /dev/dri/card0 - 220/ 221 MHz 70% RC6; 0.62/ 7.08 W; 760 irqs/s ENGINES BUSY MI_SEMA MI_WAIT Render/3D 23.06% |██████▊ | 0% 0% Blitter 0.00% | | 0% 0% Video 5.40% |█▋ | 0% 0% VideoEnhance 20.67% |██████ | 0% 0% PID NAME Render/3D Blitter Video VideoEnhance 3082 mpv | || ||▌ ||██ | 3117 neverball |█▉ || || || | 1 systemd |▍ || || || | 2338 gnome-shell | || || || | Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> --- man/intel_gpu_top.rst | 4 + tools/intel_gpu_top.c | 801 +++++++++++++++++++++++++++++++++++++++++- tools/meson.build | 2 +- 3 files changed, 804 insertions(+), 3 deletions(-) diff --git a/man/intel_gpu_top.rst b/man/intel_gpu_top.rst index b3b765b05feb..f4dbfc5b44d9 100644 --- a/man/intel_gpu_top.rst +++ b/man/intel_gpu_top.rst @@ -56,6 +56,10 @@ Supported keys: 'q' Exit from the tool. 'h' Show interactive help. '1' Toggle between aggregated engine class and physical engine mode. + 'n' Toggle display of numeric client busyness overlay. + 's' Toggle between sort modes (runtime, total runtime, pid, client id). + 'i' Toggle display of clients which used no GPU time. + 'H' Toggle between per PID aggregation and individual clients. DEVICE SELECTION ================ diff --git a/tools/intel_gpu_top.c b/tools/intel_gpu_top.c index bc11fce2bb1e..73815cdea8aa 100644 --- a/tools/intel_gpu_top.c +++ b/tools/intel_gpu_top.c @@ -43,8 +43,10 @@ #include <sys/types.h> #include <unistd.h> #include <termios.h> +#include <sys/sysmacros.h> #include "igt_perf.h" +#include "igt_drm_fdinfo.h" #define ARRAY_SIZE(arr) (sizeof(arr)/sizeof(arr[0])) @@ -311,7 +313,8 @@ static int engine_cmp(const void *__a, const void *__b) return a->instance - b->instance; } -#define is_igpu_pci(x) (strcmp(x, "0000:00:02.0") == 0) +#define IGPU_PCI "0000:00:02.0" +#define is_igpu_pci(x) (strcmp(x, IGPU_PCI) == 0) #define is_igpu(x) (strcmp(x, "i915") == 0) static struct engines *discover_engines(char *device) @@ -635,6 +638,547 @@ static void pmu_sample(struct engines *engines) } } +enum client_status { + FREE = 0, /* mbz */ + ALIVE, + PROBE +}; + +struct clients; + +struct client { + struct clients *clients; + + enum client_status status; + unsigned int id; + unsigned int pid; + char name[24]; + char print_name[24]; + unsigned int samples; + unsigned long total_runtime; + unsigned long last_runtime; + unsigned long *val; + uint64_t *last; +}; + +struct clients { + unsigned int num_clients; + unsigned int active_clients; + + unsigned int num_classes; + struct engine_class *class; + + char pci_slot[64]; + + struct client *client; +}; + +#define for_each_client(clients, c, tmp) \ + for ((tmp) = (clients)->num_clients, c = (clients)->client; \ + (tmp > 0); (tmp)--, (c)++) + +static struct clients *init_clients(const char *pci_slot) +{ + struct clients *clients; + + clients = malloc(sizeof(*clients)); + if (!clients) + return NULL; + + memset(clients, 0, sizeof(*clients)); + + strncpy(clients->pci_slot, pci_slot, sizeof(clients->pci_slot)); + + return clients; +} + +static struct client * +find_client(struct clients *clients, enum client_status status, unsigned int id) +{ + unsigned int start, num; + struct client *c; + + start = status == FREE ? clients->active_clients : 0; /* Free block at the end. */ + num = clients->num_clients - start; + + for (c = &clients->client[start]; num; c++, num--) { + if (status != c->status) + continue; + + if (status == FREE || c->id == id) + return c; + } + + return NULL; +} + +static void +update_client(struct client *c, unsigned int pid, char *name, uint64_t val[16]) +{ + unsigned int i; + + if (c->pid != pid) + c->pid = pid; + + if (strcmp(c->name, name)) { + char *p; + + strncpy(c->name, name, sizeof(c->name) - 1); + strncpy(c->print_name, name, sizeof(c->print_name) - 1); + + p = c->print_name; + while (*p) { + if (!isprint(*p)) + *p = '*'; + p++; + } + } + + c->last_runtime = 0; + c->total_runtime = 0; + + for (i = 0; i < c->clients->num_classes; i++) { + if (val[i] < c->last[i]) + continue; /* It will catch up soon. */ + + c->total_runtime += val[i]; + c->val[i] = val[i] - c->last[i]; + c->last_runtime += c->val[i]; + c->last[i] = val[i]; + } + + c->samples++; + c->status = ALIVE; +} + +static void +add_client(struct clients *clients, unsigned int id, unsigned int pid, + char *name, uint64_t busy[16]) +{ + struct client *c; + + assert(!find_client(clients, ALIVE, id)); + + c = find_client(clients, FREE, 0); + if (!c) { + unsigned int idx = clients->num_clients; + + clients->num_clients += (clients->num_clients + 2) / 2; + clients->client = realloc(clients->client, + clients->num_clients * sizeof(*c)); + assert(clients->client); + + c = &clients->client[idx]; + memset(c, 0, (clients->num_clients - idx) * sizeof(*c)); + } + + c->id = id; + c->clients = clients; + c->val = calloc(clients->num_classes, sizeof(c->val)); + c->last = calloc(clients->num_classes, sizeof(c->last)); + assert(c->val && c->last); + + update_client(c, pid, name, busy); +} + +static void free_client(struct client *c) +{ + free(c->val); + free(c->last); + memset(c, 0, sizeof(*c)); +} + +static int client_last_cmp(const void *_a, const void *_b) +{ + const struct client *a = _a; + const struct client *b = _b; + long tot_a, tot_b; + + /* + * Sort clients in descending order of runtime in the previous sampling + * period for active ones, followed by inactive. Tie-breaker is client + * id. + */ + + tot_a = a->status == ALIVE ? a->last_runtime : -1; + tot_b = b->status == ALIVE ? b->last_runtime : -1; + + tot_b -= tot_a; + if (tot_b > 0) + return 1; + if (tot_b < 0) + return -1; + + return (int)b->id - a->id; +} + +static int client_total_cmp(const void *_a, const void *_b) +{ + const struct client *a = _a; + const struct client *b = _b; + long tot_a, tot_b; + + tot_a = a->status == ALIVE ? a->total_runtime : -1; + tot_b = b->status == ALIVE ? b->total_runtime : -1; + + tot_b -= tot_a; + if (tot_b > 0) + return 1; + if (tot_b < 0) + return -1; + + return (int)b->id - a->id; +} + +static int client_id_cmp(const void *_a, const void *_b) +{ + const struct client *a = _a; + const struct client *b = _b; + int id_a, id_b; + + id_a = a->status == ALIVE ? a->id : -1; + id_b = b->status == ALIVE ? b->id : -1; + + id_b -= id_a; + if (id_b > 0) + return 1; + if (id_b < 0) + return -1; + + return (int)b->id - a->id; +} + +static int client_pid_cmp(const void *_a, const void *_b) +{ + const struct client *a = _a; + const struct client *b = _b; + int pid_a, pid_b; + + pid_a = a->status == ALIVE ? a->pid : INT_MAX; + pid_b = b->status == ALIVE ? b->pid : INT_MAX; + + pid_b -= pid_a; + if (pid_b > 0) + return -1; + if (pid_b < 0) + return 1; + + return (int)a->id - b->id; +} + +static int (*client_cmp)(const void *, const void *) = client_last_cmp; + +static struct clients *sort_clients(struct clients *clients, + int (*cmp)(const void *, const void *)) +{ + unsigned int active, free; + struct client *c; + int tmp; + + if (!clients) + return clients; + + qsort(clients->client, clients->num_clients, sizeof(*clients->client), + cmp); + + /* Trim excessive array space. */ + active = 0; + for_each_client(clients, c, tmp) { + if (c->status != ALIVE) + break; /* Active clients are first in the array. */ + active++; + } + + clients->active_clients = active; + + free = clients->num_clients - active; + if (free > clients->num_clients / 2) { + active = clients->num_clients - free / 2; + if (active != clients->num_clients) { + clients->num_clients = active; + clients->client = realloc(clients->client, + clients->num_clients * + sizeof(*c)); + } + } + + return clients; +} + +static bool aggregate_pids = true; + +static struct clients *display_clients(struct clients *clients) +{ + struct client *ac, *c, *cp = NULL; + struct clients *aggregated; + int tmp, num = 0; + + if (!aggregate_pids) + goto out; + + /* Sort by pid first to make it easy to aggregate while walking. */ + sort_clients(clients, client_pid_cmp); + + aggregated = calloc(1, sizeof(*clients)); + assert(aggregated); + + ac = calloc(clients->num_clients, sizeof(*c)); + assert(ac); + + aggregated->num_classes = clients->num_classes; + aggregated->class = clients->class; + aggregated->client = ac; + + for_each_client(clients, c, tmp) { + unsigned int i; + + if (c->status == FREE) + break; + + assert(c->status == ALIVE); + + if ((cp && c->pid != cp->pid) || !cp) { + ac = &aggregated->client[num++]; + + /* New pid. */ + ac->clients = aggregated; + ac->status = ALIVE; + ac->id = -c->pid; + ac->pid = c->pid; + strcpy(ac->name, c->name); + strcpy(ac->print_name, c->print_name); + ac->val = calloc(clients->num_classes, + sizeof(ac->val[0])); + assert(ac->val); + ac->samples = 1; + } + + cp = c; + + if (c->samples < 2) + continue; + + ac->samples = 2; /* All what matters for display. */ + ac->total_runtime += c->total_runtime; + ac->last_runtime += c->last_runtime; + + for (i = 0; i < clients->num_classes; i++) + ac->val[i] += c->val[i]; + } + + aggregated->num_clients = num; + aggregated->active_clients = num; + + clients = aggregated; + +out: + return sort_clients(clients, client_cmp); +} + +static void free_clients(struct clients *clients) +{ + struct client *c; + unsigned int tmp; + + for_each_client(clients, c, tmp) { + free(c->val); + free(c->last); + } + + free(clients->client); + free(clients); +} + +static bool is_drm_fd(DIR *fd_dir, const char *name) +{ + struct stat stat; + int ret; + + ret = fstatat(dirfd(fd_dir), name, &stat, 0); + + return ret == 0 && + (stat.st_mode & S_IFMT) == S_IFCHR && + major(stat.st_rdev) == 226; +} + +static bool get_task_name(const char *buffer, char *out, unsigned long sz) +{ + char *s = index(buffer, '('); + char *e = rindex(buffer, ')'); + unsigned int len; + + if (!s || !e) + return false; + + len = --e - ++s + 1; + if(!len || (len + 1) >= sz) + return false; + + strncpy(out, s, len); + out[len] = 0; + + return true; +} + +static DIR *opendirat(DIR *at, const char *name) +{ + DIR *dir; + int fd; + + fd = openat(dirfd(at), name, O_DIRECTORY); + if (fd < 0) + return NULL; + + dir = fdopendir(fd); + if (!dir) + close(fd); + + return dir; +} + +static FILE *fropenat(DIR *at, const char *name) +{ + FILE *f; + int fd; + + fd = openat(dirfd(at), name, O_RDONLY); + if (fd < 0) + return NULL; + + f = fdopen(fd, "r"); + if (!f) + close(fd); + + return f; +} + +static size_t freadat2buf(char *buf, const size_t sz, DIR *at, const char *name) +{ + size_t count; + FILE *f; + + f = fropenat(at, name); + if (!f) + return 0; + + memset(buf, 0, sz); + count = fread(buf, 1, sz, f); + fclose(f); + + return count; +} + +static struct clients *scan_clients(struct clients *clients) +{ + struct dirent *proc_dent; + struct client *c; + DIR *proc_dir; + int tmp; + + if (!clients) + return clients; + + for_each_client(clients, c, tmp) { + assert(c->status != PROBE); + if (c->status == ALIVE) + c->status = PROBE; + else + break; /* Free block at the end of array. */ + } + + proc_dir = opendir("/proc"); + if (!proc_dir) + return clients; + + while ((proc_dent = readdir(proc_dir)) != NULL) { + DIR *pid_dir = NULL, *fd_dir = NULL, *fdinfo_dir = NULL; + struct dirent *fdinfo_dent; + char client_name[64] = { }; + unsigned int client_pid; + char buf[4096]; + size_t count; + + if (proc_dent->d_type != DT_DIR) + continue; + if (!isdigit(proc_dent->d_name[0])) + continue; + + pid_dir = opendirat(proc_dir, proc_dent->d_name); + if (!pid_dir) + continue; + + count = freadat2buf(buf, sizeof(buf), pid_dir, "stat"); + if (!count) + goto next; + + client_pid = atoi(buf); + if (!client_pid) + goto next; + + if (!get_task_name(buf, client_name, sizeof(client_name))) + goto next; + + fd_dir = opendirat(pid_dir, "fd"); + if (!fd_dir) + goto next; + + fdinfo_dir = opendirat(pid_dir, "fdinfo"); + if (!fdinfo_dir) + goto next; + + while ((fdinfo_dent = readdir(fdinfo_dir)) != NULL) { + struct drm_client_fdinfo info = { }; + + if (fdinfo_dent->d_type != DT_REG) + continue; + if (!isdigit(fdinfo_dent->d_name[0])) + continue; + + if (!is_drm_fd(fd_dir, fdinfo_dent->d_name)) + continue; + + if (!__igt_parse_drm_fdinfo(dirfd(fdinfo_dir), + fdinfo_dent->d_name, + &info)) + continue; + + if (strcmp(info.driver, "i915")) + continue; + if (strcmp(info.pdev, clients->pci_slot)) + continue; + if (find_client(clients, ALIVE, info.id)) + continue; /* Skip duplicate fds. */ + + c = find_client(clients, PROBE, info.id); + if (!c) + add_client(clients, info.id, client_pid, + client_name, info.busy); + else + update_client(c, client_pid, client_name, + info.busy); + } + +next: + if (fdinfo_dir) + closedir(fdinfo_dir); + if (fd_dir) + closedir(fd_dir); + if (pid_dir) + closedir(pid_dir); + } + + closedir(proc_dir); + + for_each_client(clients, c, tmp) { + if (c->status == PROBE) + free_client(c); + else if (c->status == FREE) + break; + } + + return display_clients(clients); +} + static const char *bars[] = { " ", "▏", "▎", "▍", "▌", "▋", "▊", "▉", "█" }; static void n_spaces(const unsigned int n) @@ -776,6 +1320,18 @@ json_close_struct(void) fflush(stdout); } +static void +__json_add_member(const char *key, const char *val) +{ + assert(json_indent_level < ARRAY_SIZE(json_indent)); + + fprintf(out, "%s%s\"%s\": \"%s\"", + json_struct_members ? ",\n" : "", + json_indent[json_indent_level], key, val); + + json_struct_members++; +} + static unsigned int json_add_member(const struct cnt_group *parent, struct cnt_item *item, unsigned int headers) @@ -1501,6 +2057,157 @@ print_engines(struct engines *engines, double t, int lines, int w, int h) return lines; } +static int +print_clients_header(struct clients *clients, int lines, + int con_w, int con_h, int *class_w) +{ + if (output_mode == INTERACTIVE) { + const char *pidname = " PID NAME "; + unsigned int num_active = 0; + int len = strlen(pidname); + + if (lines++ >= con_h) + return lines; + + printf("\033[7m"); + printf("%s", pidname); + + if (lines++ >= con_h || len >= con_w) + return lines; + + if (clients->num_classes) { + unsigned int i; + int width; + + for (i = 0; i < clients->num_classes; i++) { + if (clients->class[i].num_engines) + num_active++; + } + + *class_w = width = (con_w - len) / num_active; + + for (i = 0; i < clients->num_classes; i++) { + const char *name = clients->class[i].name; + int name_len = strlen(name); + int pad = (width - name_len) / 2; + int spaces = width - pad - name_len; + + if (!clients->class[i].num_engines) + continue; /* Assert in the ideal world. */ + + if (pad < 0 || spaces < 0) + continue; + + n_spaces(pad); + printf("%s", name); + n_spaces(spaces); + len += pad + name_len + spaces; + } + } + + n_spaces(con_w - len); + printf("\033[0m\n"); + } else { + if (clients->num_classes) + pops->open_struct("clients"); + } + + return lines; +} + +static bool numeric_clients; +static bool filter_idle; + +static int +print_client(struct client *c, struct engines *engines, double t, int lines, + int con_w, int con_h, unsigned int period_us, int *class_w) +{ + struct clients *clients = c->clients; + unsigned int i; + + if (output_mode == INTERACTIVE) { + if (filter_idle && (!c->total_runtime || c->samples < 2)) + return lines; + + lines++; + + printf("%6u %17s ", c->pid, c->print_name); + + for (i = 0; c->samples > 1 && i < clients->num_classes; i++) { + double pct; + + if (!clients->class[i].num_engines) + continue; /* Assert in the ideal world. */ + + pct = (double)c->val[i] / period_us / 1e3 * 100 / + clients->class[i].num_engines; + + /* + * Guard against possible time-drift between sampling + * client data and time we obtained our time-delta from + * PMU. + */ + if (pct > 100.0) + pct = 100.0; + + print_percentage_bar(pct, *class_w, numeric_clients); + } + + putchar('\n'); + } else if (output_mode == JSON) { + char buf[64]; + + snprintf(buf, sizeof(buf), "%u", c->id); + pops->open_struct(buf); + + __json_add_member("name", c->print_name); + + snprintf(buf, sizeof(buf), "%u", c->pid); + __json_add_member("pid", buf); + + if (c->samples > 1) { + pops->open_struct("engine-classes"); + + for (i = 0; i < clients->num_classes; i++) { + double pct; + + snprintf(buf, sizeof(buf), "%s", + clients->class[i].name); + pops->open_struct(buf); + + pct = (double)c->val[i] / period_us / 1e3 * 100; + snprintf(buf, sizeof(buf), "%f", pct); + __json_add_member("busy", buf); + + __json_add_member("unit", "%"); + + pops->close_struct(); + } + + pops->close_struct(); + } + + pops->close_struct(); + } + + return lines; +} + +static int +print_clients_footer(struct clients *clients, double t, + int lines, int con_w, int con_h) +{ + if (output_mode == INTERACTIVE) { + if (lines++ < con_h) + printf("\n"); + } else { + if (clients->num_classes) + pops->close_struct(); + } + + return lines; +} + static void restore_term(void) { tcsetattr(STDIN_FILENO, TCSANOW, &termios_orig); @@ -1565,6 +2272,31 @@ static void interactive_stdin(void) assert(ret == 0); } +static void select_client_sort(void) +{ + struct { + int (*cmp)(const void *, const void *); + const char *msg; + } cmp[] = { + { client_last_cmp, "Sorting clients by current GPU usage." }, + { client_total_cmp, "Sorting clients by accummulated GPU usage." }, + { client_pid_cmp, "Sorting clients by pid." }, + { client_id_cmp, "Sorting clients by DRM id." }, + }; + static unsigned int client_sort; + +bump: + if (++client_sort >= ARRAY_SIZE(cmp)) + client_sort = 0; + + client_cmp = cmp[client_sort].cmp; + header_msg = cmp[client_sort].msg; + + /* Sort by client id makes no sense with pid aggregation. */ + if (aggregate_pids && client_cmp == client_id_cmp) + goto bump; +} + static bool in_help; static void process_help_stdin(void) @@ -1607,9 +2339,29 @@ static void process_normal_stdin(void) else header_msg = "Showing physical engines."; break; + case 'i': + filter_idle ^= true; + if (filter_idle) + header_msg = "Hiding inactive clients."; + else + header_msg = "Showing inactive clients."; + break; + case 'n': + numeric_clients ^= true; + break; + case 's': + select_client_sort(); + break; case 'h': in_help = true; break; + case 'H': + aggregate_pids ^= true; + if (aggregate_pids) + header_msg = "Aggregating clients."; + else + header_msg = "Showing individual clients."; + break; }; } } @@ -1637,6 +2389,10 @@ static void show_help_screen(void) printf( "Help for interactive commands:\n\n" " '1' Toggle between aggregated engine class and physical engine mode.\n" +" 'n' Toggle display of numeric client busyness overlay.\n" +" 's' Toggle between sort modes (runtime, total runtime, pid, client id).\n" +" 'i' Toggle display of clients which used no GPU time.\n" +" 'H' Toggle between per PID aggregation and individual clients.\n" "\n" " 'h' or 'q' Exit interactive help.\n" "\n"); @@ -1645,6 +2401,7 @@ static void show_help_screen(void) int main(int argc, char **argv) { unsigned int period_us = DEFAULT_PERIOD_MS * 1000; + struct clients *clients = NULL; int con_w = -1, con_h = -1; char *output_path = NULL; struct engines *engines; @@ -1783,15 +2540,24 @@ int main(int argc, char **argv) ret = EXIT_SUCCESS; + clients = init_clients(card.pci_slot_name[0] ? + card.pci_slot_name : IGPU_PCI); init_engine_classes(engines); + if (clients) { + clients->num_classes = engines->num_classes; + clients->class = engines->class; + } pmu_sample(engines); + scan_clients(clients); codename = igt_device_get_pretty_name(&card, false); while (!stop_top) { + struct clients *disp_clients; bool consumed = false; + int j, lines = 0; struct winsize ws; - int lines = 0; + struct client *c; double t; /* Update terminal size. */ @@ -1810,6 +2576,8 @@ int main(int argc, char **argv) pmu_sample(engines); t = (double)(engines->ts.cur - engines->ts.prev) / 1e9; + disp_clients = scan_clients(clients); + if (stop_top) break; @@ -1829,12 +2597,41 @@ int main(int argc, char **argv) lines = print_engines(engines, t, lines, con_w, con_h); + if (disp_clients) { + int class_w; + + lines = print_clients_header(disp_clients, lines, + con_w, con_h, + &class_w); + + for_each_client(disp_clients, c, j) { + assert(c->status != PROBE); + if (c->status != ALIVE) + break; /* Active clients are first in the array. */ + + if (lines >= con_h) + break; + + lines = print_client(c, engines, t, + lines, con_w, + con_h, period_us, + &class_w); + } + + lines = print_clients_footer(disp_clients, t, + lines, con_w, + con_h); + } + pops->close_struct(); } if (stop_top) break; + if (disp_clients != clients) + free_clients(disp_clients); + if (output_mode == INTERACTIVE) process_stdin(period_us); else diff --git a/tools/meson.build b/tools/meson.build index b6b9753463a9..771d0b9e3d5d 100644 --- a/tools/meson.build +++ b/tools/meson.build @@ -91,7 +91,7 @@ install_subdir('registers', install_dir : datadir) executable('intel_gpu_top', 'intel_gpu_top.c', install : true, install_rpath : bindir_rpathdir, - dependencies : [lib_igt_perf,lib_igt_device_scan,math]) + dependencies : [lib_igt_perf,lib_igt_device_scan,lib_igt_drm_fdinfo,math]) executable('amd_hdmi_compliance', 'amd_hdmi_compliance.c', dependencies : [tool_deps], -- 2.32.0