A little tool I've been meaning to write for a while... Convert the .wsim into their dag and find the longest chains and evaluate them on an simulated machine. v2: Implement barriers to handle sync commands Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> --- benchmarks/Makefile.sources | 1 + benchmarks/sim_wsim.c | 413 ++++++++++++++++++++++++++++++++++++ 2 files changed, 414 insertions(+) create mode 100644 benchmarks/sim_wsim.c diff --git a/benchmarks/Makefile.sources b/benchmarks/Makefile.sources index d150035aa..b80a7644c 100644 --- a/benchmarks/Makefile.sources +++ b/benchmarks/Makefile.sources @@ -17,6 +17,7 @@ benchmarks_prog_list = \ gem_wsim \ kms_vblank \ prime_lookup \ + sim_wsim \ vgem_mmap \ $(NULL) diff --git a/benchmarks/sim_wsim.c b/benchmarks/sim_wsim.c new file mode 100644 index 000000000..5ebc1c8c4 --- /dev/null +++ b/benchmarks/sim_wsim.c @@ -0,0 +1,413 @@ +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "igt_aux.h" + +#if 0 +#define DBG(...) printf(__VA_ARGS__) +#else +#define DBG(...) do { } while(0) +#endif + +struct dependency { + struct task *signal; + struct igt_list link; +}; + +struct task { + int step; + int ctx; + int class; + int instance; + + unsigned long min, max; + unsigned long duration; + unsigned long deadline; + + struct igt_list link; + struct igt_list sched; + struct igt_list signals; + + bool visited; +}; + +struct work { + struct task **tasks; + unsigned int count, size; + + struct igt_list ends; + struct task *barrier; +}; + +static void add_dependency(struct task *task, struct task *signal) +{ + struct dependency *dep; + + DBG("%s %d (context %d, engine %d) -> %d (context %d, engine %d)\n", + __func__, + signal->step, signal->ctx, signal->class, + task->step, task->ctx, task->class); + + dep = malloc(sizeof(*dep)); + dep->signal = signal; + igt_list_add(&dep->link, &task->signals); + if (!igt_list_empty(&signal->link)) { + igt_list_del(&signal->link); + igt_list_init(&signal->link); + } +} + +enum class { + RCS, + BCS, + VCS, + VECS, +}; + +static void add_task(struct work *work, char *line) +{ + static const char *engines[] = { + [RCS] = "rcs", + [BCS] = "bcs", + [VCS] = "vcs", + [VECS] = "vecs", + }; + struct task *task; + char *token; + int i; + + DBG("line='%s'\n", line); + + token = strtok(line, "."); + + if (!strcasecmp(token, "s")) { /* sync point */ + int sync = atoi(strtok(NULL, ".")); + + DBG("syncpt %d\n", sync); + + work->barrier = work->tasks[work->count + sync]; + return; + } + + if (!isdigit(*token)) + return; + + /* + * 1.RCS.2800-3100.-1.0 + * - context + * - engine + * - delay + * - dependencies + * - sync + */ + task = malloc(sizeof(*task)); + + igt_list_init(&task->signals); + task->step = work->count; + task->visited = false; + + /* context */ + DBG("context='%s'\n", token); + task->ctx = atoi(token); + + /* engine */ + token = strtok(NULL, "."); + DBG("engine='%s'\n", token); + task->class = -1; + for (i = 0; i < sizeof(engines)/sizeof(*engines); i++) { + const int len = strlen(engines[i]); + if (!strncasecmp(token, engines[i], len)) { + task->class = i; + if (token[len]) + task->instance = atoi(token + len); + else + task->instance = -1; + break; + } + } + + /* delay */ + token = strtok(NULL, "."); + DBG("delay='%s'\n", token); + task->min = strtol(token, &token, 0); + if (*token) + task->max = strtol(token + 1, NULL, 0); + else + task->max = task->min; + task->duration = (task->min + task->max) / 2; + DBG("min=%lu, max=%lu; duration=%lu\n", task->min, task->max, task->duration); + + /* dependencies */ + token = strtok(NULL, "."); + DBG("deps='%s'\n", token); + while ((i = strtol(token, &token, 0))) { + add_dependency(task, work->tasks[work->count + i]); + if (*token) + token++; + } + + /* add a dependency for the context+engine timeline */ + for (i = work->count; --i >= 0; ) { + if (work->tasks[i]->ctx == task->ctx && + work->tasks[i]->class == task->class) { + add_dependency(task, work->tasks[i]); + break; + } + } + + if (work->barrier) + add_dependency(task, work->barrier); + + /* sync -- we become the barrier */ + if (atoi(strtok(NULL, ","))) { + DBG("marking as a sync point\n"); + work->barrier = task; + } + + igt_list_add(&task->link, &work->ends); + work->tasks[work->count++] = task; +} + +static struct work *parse_work(FILE *file) +{ + struct work *work; + char *line = NULL; + size_t len = 0; + + work = malloc(sizeof(*work)); + igt_list_init(&work->ends); + work->barrier = NULL; + + work->size = 64; + work->count = 0; + work->tasks = malloc(sizeof(*work->tasks) * work->size); + + while (getline(&line, &len, file) != -1) { + if (work->count == work->size) { + work->tasks = realloc(work->tasks, + sizeof(*work->tasks) * work->size); + work->size *= 2; + } + add_task(work, line); + } + + free(line); + + DBG("%d tasks\n", work->count); + return work; +} + +static unsigned long sum_durations(struct task *task) +{ + unsigned long max_duration = 0; + struct task *signal = NULL; + struct dependency *dep; + + igt_list_for_each(dep, &task->signals, link) { + if (dep->signal->duration > max_duration) { + signal = dep->signal; + max_duration = signal->duration; + } + } + + return task->duration + (signal ? sum_durations(signal) : 0); +} + +static void ideal_depth(struct work *work) +{ + unsigned long total_duration; + unsigned long max_duration; + struct task *task; + int i; + + /* + * The ideal depth is the longest chain of dependencies as the + * dependency chain requires sequential task execution. Each + * chain is assumed to be run in parallel on an infinite set of + * engines, so the ratelimiting step is its longest path. + */ + max_duration = 0; + igt_list_for_each(task, &work->ends, link) { + unsigned long duration = sum_durations(task); + if (duration > max_duration) + max_duration = duration; + } + + total_duration = 0; + for (i = 0; i < work->count; i++) + total_duration += work->tasks[i]->duration; + + printf("Single client\n"); + printf(" total duration %luus; %.2f wps\n", total_duration, 1e6/total_duration); + printf(" ideal duration %luus; %.2f wps\n", max_duration, 1e6/max_duration); +} + +struct sim_class { + int ninstance; + unsigned long deadline[4]; + unsigned long busy[4]; +}; + +static void simulate_client(struct work *work) +{ + struct sim_class sim[] = { + [RCS] = { 1 }, + [BCS] = { 1 }, + [VCS] = { 2 }, + [VECS] = { 1 }, + }, *class; + IGT_LIST(sched); + struct task *task; + unsigned long max; + int i, j; + + printf("Simulated clients:\n"); + + for (i = 0; i < work->count; i++) + igt_list_init(&work->tasks[i]->sched); + + igt_list_for_each(task, &work->ends, link) + igt_list_add_tail(&task->sched, &sched); + + igt_list_for_each(task, &sched, sched) { + struct dependency *dep; + + igt_list_for_each(dep, &task->signals, link) + igt_list_move_tail(&dep->signal->sched, &sched); + } + + igt_list_for_each_reverse(task, &sched, sched) { + struct dependency *dep; + int instance; + + class = &sim[task->class]; + max = class->deadline[0]; + + instance = task->instance; + if (instance < 0) { + instance = 0; + for (i = 1; i < class->ninstance; i++) { + if (class->deadline[i] < max) { + max = class->deadline[i]; + instance = i; + } + } + } + + /* + * Greedy (first available), not true optimal scheduling. + * + * For optimal, we do have to compute the global optimal + * ordering by checking every permutation... + */ + igt_list_for_each(dep, &task->signals, link) { + if (dep->signal->deadline > max) + max = dep->signal->deadline; + } + + DBG("task %d: engine %d, instance %d; finish %lu\n", + task->step, task->class, instance, max); + + task->deadline = max + task->duration; + class->deadline[instance] = task->deadline; + class->busy[instance] += task->duration; + } + + max = 0; + for (i = 0; i < sizeof(sim)/sizeof(sim[0]); i++) { + class = &sim[i]; + for (j = 0; j < class->ninstance; j++) { + if (class->deadline[j] > max) + max = class->deadline[j]; + } + } + printf(" single duration %luus; %.2f wps\n", max, 1e6/max); + + /* + * Compute the maximum duration required on any engine. + * + * With sufficient clients forcing maximum occupancy under their weight, + * the ratelimiting step becomes a single engine and how many clients + * it takes to fill. + */ + max = 0; + for (i = 0; i < sizeof(sim)/sizeof(sim[0]); i++) { + class = &sim[i]; + for (j = 0; j < class->ninstance; j++) { + if (class->busy[j] > max) + max = class->busy[j]; + } + } + printf(" packed duration %luus; %.2f wps\n", max, 1e6/max); +} + +static void graphviz(struct work *work) +{ +#if 0 + int i, j; + + printf("digraph {\n"); + printf(" rankdir=LR;\n"); + printf(" splines=line;\n"); + printf("\n"); + + for (i = 0; i < work->count; i++) { + struct task *task = work->tasks[i]; + + if (task->visited) + goto skip; + + printf(" subgraph cluster_%d {\n", task->ctx); + printf(" label=\"Context %d\"\n", task->ctx); + for (j = i; j < work->count; j++) { + if (work->tasks[j]->ctx == task->ctx) { + printf(" task_%03d;\n", j); + work->tasks[j]->visited = true; + } + } + printf(" }\n\n"); + +skip: + task->visited = false; + } + + for (i = 0; i < work->count; i++) { + struct task *task = work->tasks[i]; + struct dependency *dep; + + igt_list_for_each(dep, &task->signals, link) { + printf(" task_%03d -> task_%03d;\n", + dep->signal->step, task->step); + } + } + + printf("}\n"); +#endif +} + +int main(int argc, char **argv) +{ + int i; + + for (i = 1; i < argc; i++) { + FILE *file = fopen(argv[i], "r"); + struct work *work; + + if (!file) { + perror(argv[i]); + return 1; + } + + work = parse_work(file); + fclose(file); + + graphviz(work); + + ideal_depth(work); + simulate_client(work); + } + + return 0; +} -- 2.17.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx