On 16/12/2020 15:51, Chris Wilson wrote:
Quoting Tvrtko Ursulin (2020-12-16 15:28:09)From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Similarly to how top(1) handles SMP, we can default to showing engines of a same class as a single bar graph entry. To achieve this a little bit of hackery is employed. PMU sampling is left as is and only at the presentation layer we create a fake set of engines, one for each class, summing and normalizing the load respectively. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> --- man/intel_gpu_top.rst | 1 + tools/intel_gpu_top.c | 192 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 180 insertions(+), 13 deletions(-) diff --git a/man/intel_gpu_top.rst b/man/intel_gpu_top.rst index 2e0c3a05acc1..35ab10da9bb4 100644 --- a/man/intel_gpu_top.rst +++ b/man/intel_gpu_top.rst @@ -54,6 +54,7 @@ RUNTIME CONTROL Supported keys:'q' Exit from the tool.+ '1' Toggle between aggregated engine class and physical engine mode.DEVICE SELECTION================ diff --git a/tools/intel_gpu_top.c b/tools/intel_gpu_top.c index 68911940f1d0..8c4280aa19b9 100644 --- a/tools/intel_gpu_top.c +++ b/tools/intel_gpu_top.c @@ -76,8 +76,16 @@ struct engine { struct pmu_counter sema; };+struct engine_class {+ unsigned int class; + const char *name; + unsigned int num_engines; +}; + struct engines { unsigned int num_engines; + unsigned int num_classes; + struct engine_class *class; unsigned int num_counters; DIR *root; int fd; @@ -1118,6 +1126,8 @@ print_imc(struct engines *engines, double t, int lines, int con_w, int con_h) return lines; }+static bool class_view;+ static int print_engines_header(struct engines *engines, double t, int lines, int con_w, int con_h) @@ -1133,8 +1143,13 @@ print_engines_header(struct engines *engines, double t, pops->open_struct("engines");if (output_mode == INTERACTIVE) {- const char *a = " ENGINE BUSY "; const char *b = " MI_SEMA MI_WAIT"; + const char *a; + + if (class_view) + a = " ENGINES BUSY "; + else + a = " ENGINE BUSY ";printf("\033[7m%s%*s%s\033[0m\n",a, (int)(con_w - 1 - strlen(a) - strlen(b)), @@ -1214,6 +1229,164 @@ print_engines_footer(struct engines *engines, double t, return lines; }+static int class_cmp(const void *_a, const void *_b)+{ + const struct engine_class *a = _a; + const struct engine_class *b = _b; + + return a->class - b->class; +} + +static void init_engine_classes(struct engines *engines) +{ + struct engine_class *classes; + unsigned int i, num; + int max = -1; + + for (i = 0; i < engines->num_engines; i++) { + struct engine *engine = engine_ptr(engines, i); + + if ((int)engine->class > max) + max = engine->class; + } + assert(max >= 0); + + num = max + 1; + + classes = calloc(num, sizeof(*classes)); + assert(classes); + + for (i = 0; i < engines->num_engines; i++) { + struct engine *engine = engine_ptr(engines, i); + + classes[engine->class].num_engines++; + } + + for (i = 0; i < num; i++) { + classes[i].class = i; + classes[i].name = class_display_name(i); + }Do you want to remove empty classes at this point?
I need this array 1:1 with class ids so no. I didn't find yet that "empty" entries would cause a problem anywhere in the code. Hm actually there wouldn't be any empty classes, since class generation is driven of discovered engines.
I need to sprinkle some more asserts and comments around.
+ + qsort(classes, num, sizeof(*classes), class_cmp); + + engines->num_classes = num; + engines->class = classes; +} + +static void __pmu_sum(struct pmu_pair *dst, struct pmu_pair *src) +{ + dst->prev += src->prev; + dst->cur += src->cur; +} + +static void __pmu_normalize(struct pmu_pair *val, unsigned int n) +{ + val->prev /= n; + val->cur /= n;I was expecting just the delta to be normalized. This works as well.
Yeah, this allows basically no changes to existing code.Maybe a running average algorithm would be better to not overflow the u64 but I haven't bothered calculating if that is a theoretical possibility or not.
+} + +static struct engines *init_classes(struct engines *engines) +{ + struct engines *classes; + unsigned int i, j; + + init_engine_classes(engines); + + classes = calloc(1, sizeof(struct engines) + + engines->num_classes * sizeof(struct engine)); + assert(classes); + + classes->num_engines = engines->num_classes; + classes->num_classes = engines->num_classes; + classes->class = engines->class; + + for (i = 0; i < engines->num_classes; i++) { + struct engine *engine = engine_ptr(classes, i); + + engine->class = i; + engine->instance = -1; + + if (!engines->class[i].num_engines) + continue; + + engine->display_name = strdup(class_display_name(i)); + assert(engine->display_name); + engine->short_name = strdup(class_short_name(i)); + assert(engine->short_name); + + for (j = 0; j < engines->num_engines; j++) { + struct engine *e = engine_ptr(engines, j); + + if (e->class == i) { + engine->num_counters = e->num_counters; + engine->busy = e->busy; + engine->sema = e->sema; + engine->wait = e->wait; + } + } + } + + return classes; +} + +static struct engines * +update_classes(struct engines *classes, struct engines *engines) +{ + unsigned int i, j; + + if (!classes) + classes = init_classes(engines); + + for (i = 0; i < classes->num_engines; i++) { + unsigned int num_engines = classes->class[i].num_engines; + struct engine *engine = engine_ptr(classes, i); + + if (!num_engines) + continue; + + memset(&engine->busy.val, 0, sizeof(engine->busy.val)); + memset(&engine->sema.val, 0, sizeof(engine->sema.val)); + memset(&engine->wait.val, 0, sizeof(engine->wait.val)); + + for (j = 0; j < engines->num_engines; j++) { + struct engine *e = engine_ptr(engines, j); + + if (e->class == i) { + __pmu_sum(&engine->busy.val, &e->busy.val); + __pmu_sum(&engine->sema.val, &e->sema.val); + __pmu_sum(&engine->wait.val, &e->wait.val); + } + } + + __pmu_normalize(&engine->busy.val, num_engines); + __pmu_normalize(&engine->sema.val, num_engines); + __pmu_normalize(&engine->wait.val, num_engines);Ok. So you wrap the normal engines with class_view, where each engine in that class_view is an average of all engines within it.
Yes, and then just pass this wrapped/aggregated struct engines * to the existing code.
+ } + + return classes; +} + +static int +print_engines(struct engines *engines, double t, int lines, int w, int h) +{ + static struct engines *classes; + struct engines *show; + + if (class_view) + classes = show = update_classes(classes, engines);Something is not right here. Oh static, nvm.
Too hacky? Maybe "show = classes = update_classes()"would read better. Regards, Tvrtko _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx