While for stressing the system we want to submit as many batches as we can as that shows us worst case impact on system latency, it is not a very realistic case. To introduce a bit more realism allow the batches run for a user defined duration. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- benchmarks/gem_syslatency.c | 71 ++++++++++++++++++++++++++++++++++--- 1 file changed, 67 insertions(+), 4 deletions(-) diff --git a/benchmarks/gem_syslatency.c b/benchmarks/gem_syslatency.c index d1056773a..45cabe86c 100644 --- a/benchmarks/gem_syslatency.c +++ b/benchmarks/gem_syslatency.c @@ -51,6 +51,7 @@ static volatile int done; struct gem_busyspin { pthread_t thread; + unsigned long sz; unsigned long count; bool leak; bool interrupts; @@ -96,7 +97,8 @@ static void *gem_busyspin(void *arg) struct gem_busyspin *bs = arg; struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 obj[2]; - const unsigned sz = bs->leak ? 16 << 20 : 4 << 10; + const unsigned sz = + bs->sz ? bs->sz + sizeof(bbe) : bs->leak ? 16 << 20 : 4 << 10; unsigned engines[16]; unsigned nengine; unsigned engine; @@ -112,7 +114,7 @@ static void *gem_busyspin(void *arg) obj[0].handle = gem_create(fd, 4096); obj[0].flags = EXEC_OBJECT_WRITE; obj[1].handle = gem_create(fd, sz); - gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe)); + gem_write(fd, obj[1].handle, bs->sz, &bbe, sizeof(bbe)); memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)(obj + !bs->interrupts); @@ -125,6 +127,12 @@ static void *gem_busyspin(void *arg) } while (!done) { + for (int n = 0; n < nengine; n++) { + const int m = rand() % nengine; + unsigned int tmp = engines[n]; + engines[n] = engines[m]; + engines[m] = tmp; + } for (int n = 0; n < nengine; n++) { execbuf.flags &= ~ENGINE_FLAGS; execbuf.flags |= engines[n]; @@ -134,7 +142,7 @@ static void *gem_busyspin(void *arg) if (bs->leak) { gem_madvise(fd, obj[1].handle, I915_MADV_DONTNEED); obj[1].handle = gem_create(fd, sz); - gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe)); + gem_write(fd, obj[1].handle, bs->sz, &bbe, sizeof(bbe)); } } @@ -294,6 +302,50 @@ static void *background_fs(void *path) return NULL; } +static unsigned long calibrate_nop(unsigned int target_us, + unsigned int tolerance_pct) +{ + const uint32_t bbe = MI_BATCH_BUFFER_END; + const unsigned int loops = 100; + struct drm_i915_gem_exec_object2 obj = {}; + struct drm_i915_gem_execbuffer2 eb = + { .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj}; + struct timespec t_0, t_end; + long sz, prev; + int fd; + + fd = drm_open_driver(DRIVER_INTEL); + + clock_gettime(CLOCK_MONOTONIC, &t_0); + + sz = 256 * 1024; + do { + struct timespec t_start; + + obj.handle = gem_create(fd, sz + sizeof(bbe)); + gem_write(fd, obj.handle, sz, &bbe, sizeof(bbe)); + gem_execbuf(fd, &eb); + gem_sync(fd, obj.handle); + + clock_gettime(CLOCK_MONOTONIC, &t_start); + for (int loop = 0; loop < loops; loop++) + gem_execbuf(fd, &eb); + gem_sync(fd, obj.handle); + clock_gettime(CLOCK_MONOTONIC, &t_end); + + gem_close(fd, obj.handle); + + prev = sz; + sz = loops * sz / elapsed(&t_start, &t_end) * 1e3 * target_us; + sz = ALIGN(sz, sizeof(uint32_t)); + } while (elapsed(&t_0, &t_end) < 5 || + abs(sz - prev) > (sz * tolerance_pct / 100)); + + close(fd); + + return sz; +} + int main(int argc, char **argv) { struct gem_busyspin *busy; @@ -309,9 +361,10 @@ int main(int argc, char **argv) int enable_gem_sysbusy = 1; bool leak = false; bool interrupts = false; + long batch = 0; int n, c; - while ((c = getopt(argc, argv, "t:f:bmni1")) != -1) { + while ((c = getopt(argc, argv, "r:t:f:bmni1")) != -1) { switch (c) { case '1': ncpus = 1; @@ -328,6 +381,10 @@ int main(int argc, char **argv) if (time < 0) time = INT_MAX; break; + case 'r': + /* Duration of each batch (microseconds) */ + batch = atoi(optarg); + break; case 'f': /* Select an output field */ field = atoi(optarg); @@ -350,11 +407,17 @@ int main(int argc, char **argv) force_low_latency(); min = min_measurement_error(); + if (batch > 0) + batch = calibrate_nop(batch, 2); + else + batch = -batch; + busy = calloc(ncpus, sizeof(*busy)); pthread_attr_init(&attr); if (enable_gem_sysbusy) { for (n = 0; n < ncpus; n++) { bind_cpu(&attr, n); + busy[n].sz = batch; busy[n].leak = leak; busy[n].interrupts = interrupts; pthread_create(&busy[n].thread, &attr, -- 2.17.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx