Different engines take different number of cycles for MI_NOOP. As we specify workloads in us, we need to take into account the different calibration values so that the workloads behave as expected. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> --- benchmarks/gem_wsim.c | 72 +++++++++++++++++++++++++++++++------------ 1 file changed, 52 insertions(+), 20 deletions(-) diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c index 9564dcb70..50a062f0e 100644 --- a/benchmarks/gem_wsim.c +++ b/benchmarks/gem_wsim.c @@ -238,7 +238,7 @@ struct workload }; static const unsigned int nop_calibration_us = 1000; -static unsigned long nop_calibration; +static unsigned long nop_calibration[NUM_ENGINES]; static unsigned int context_vcs_rr; @@ -808,9 +808,9 @@ static unsigned int get_duration(struct w_step *w) (dur->max + 1 - dur->min); } -static unsigned long get_bb_sz(unsigned int duration) +static unsigned long get_bb_sz(unsigned int engine, unsigned int duration) { - return ALIGN(duration * nop_calibration * sizeof(uint32_t) / + return ALIGN(duration * nop_calibration[engine] * sizeof(uint32_t) / nop_calibration_us, sizeof(uint32_t)); } @@ -818,7 +818,7 @@ static void init_bb(struct w_step *w, unsigned int flags) { const unsigned int arb_period = - get_bb_sz(w->preempt_us) / sizeof(uint32_t); + get_bb_sz(w->engine, w->preempt_us) / sizeof(uint32_t); const unsigned int mmap_len = ALIGN(w->bb_sz, 4096); unsigned int i; uint32_t *ptr; @@ -1043,10 +1043,10 @@ alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags) if (w->unbound_duration) /* nops + MI_ARB_CHK + MI_BATCH_BUFFER_START */ - w->bb_sz = max(64, get_bb_sz(w->preempt_us)) + + w->bb_sz = max(64, get_bb_sz(w->engine, w->preempt_us)) + (1 + 3) * sizeof(uint32_t); else - w->bb_sz = get_bb_sz(w->duration.max); + w->bb_sz = get_bb_sz(w->engine, w->duration.max); w->bb_handle = w->obj[j].handle = gem_create(fd, w->bb_sz + (w->unbound_duration ? 4096 : 0)); init_bb(w, flags); terminate_bb(w, flags); @@ -2300,7 +2300,7 @@ do_eb(struct workload *wrk, struct w_step *w, enum intel_engine_id engine, w->eb.batch_start_offset = w->unbound_duration ? 0 : - ALIGN(w->bb_sz - get_bb_sz(get_duration(w)), + ALIGN(w->bb_sz - get_bb_sz(engine, get_duration(w)), 2 * sizeof(uint32_t)); for (i = 0; i < w->fence_deps.nr; i++) { @@ -2580,17 +2580,23 @@ static void fini_workload(struct workload *wrk) free(wrk); } -static unsigned long calibrate_nop(unsigned int tolerance_pct) +static unsigned long calibrate_nop(unsigned int engine, double tolerance_pct) { const uint32_t bbe = 0xa << 23; unsigned int loops = 17; unsigned int usecs = nop_calibration_us; struct drm_i915_gem_exec_object2 obj = {}; - struct drm_i915_gem_execbuffer2 eb = - { .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj}; + struct drm_i915_gem_execbuffer2 eb = { + .buffer_count = 1, + .buffers_ptr = (uintptr_t)&obj, + .flags = eb_engine_map[engine], + }; long size, last_size; struct timespec t_0, t_end; + if (__gem_execbuf(fd, &eb) != -ENOENT) + return 0; + clock_gettime(CLOCK_MONOTONIC, &t_0); size = 256 * 1024; @@ -2803,8 +2809,8 @@ int main(int argc, char **argv) int master_workload = -1; char *append_workload_arg = NULL; struct w_arg *w_args = NULL; - unsigned int tolerance_pct = 1; const struct workload_balancer *balancer = NULL; + double tolerance_pct = 1; char *endptr = NULL; int prio = 0; double t; @@ -2852,10 +2858,28 @@ int main(int argc, char **argv) clients = strtol(optarg, NULL, 0); break; case 't': - tolerance_pct = strtol(optarg, NULL, 0); + tolerance_pct = strtod(optarg, NULL); break; case 'n': - nop_calibration = strtol(optarg, NULL, 0); + if (strchr(optarg, ',')) { + char *ctx = NULL; + char *str = optarg; + char *token; + + while ((token = strtok_r(str, ",", &ctx)) != NULL) { + unsigned long nop; + int engine; + + str = NULL; + if (sscanf(token, "%d:%lu", + &engine, &nop) == 2) + nop_calibration[engine] = nop; + } + } else { + nop_calibration[0] = strtol(optarg, NULL, 0); + for (i = 1; i < NUM_ENGINES; i++) + nop_calibration[i] = nop_calibration[0]; + } break; case 'r': repeat = strtol(optarg, NULL, 0); @@ -2930,14 +2954,22 @@ int main(int argc, char **argv) return 1; } - if (!nop_calibration) { + if (!nop_calibration[0]) { + int engine; + if (verbose > 1) - printf("Calibrating nop delay with %u%% tolerance...\n", + printf("Calibrating nop delay with %.1f%% tolerance...\n", tolerance_pct); - nop_calibration = calibrate_nop(tolerance_pct); - if (verbose) - printf("Nop calibration for %uus delay is %lu.\n", - nop_calibration_us, nop_calibration); + + for (engine = 0; engine < NUM_ENGINES; engine++) { + nop_calibration[engine] = calibrate_nop(engine, tolerance_pct); + if (!nop_calibration[engine]) + continue; + + if (verbose) + printf("Nop(engine:%d) calibration for %uus delay is %lu.\n", + engine, nop_calibration_us, nop_calibration[engine]); + } return 0; } @@ -2997,7 +3029,7 @@ int main(int argc, char **argv) if (verbose > 1) { printf("Using %lu nop calibration for %uus delay.\n", - nop_calibration, nop_calibration_us); + nop_calibration[0], nop_calibration_us); printf("%u client%s.\n", clients, clients > 1 ? "s" : ""); if (flags & SWAPVCS) printf("Swapping VCS rings between clients.\n"); -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx