Chris, By your patch, measure_qlen() reports how many gem_execbuf() can be executed(queue length) within timeout of the slowest engine, correct? Run time becomes 95 sec which is less than half. -caz On Sat, 2019-02-23 at 01:34 +0000, Chris Wilson wrote: > Not all engines are created equal, and our weighting ends up > favouring > the many faster xCS rings at the expense of RCS. Our qlen estimation > also failed to factor in the context switch overhead, which is a > significant factor for nop batches. So we oversubscribe the number of > batches submitted to RCS and end up waiting for those to complete at > the > end of our subtest timeslice. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Caz Yokoyama <caz.yokoyama@xxxxxxxxx> > --- > tests/i915/gem_ctx_switch.c | 39 +++++++++++++++++++++++++++++---- > ---- > 1 file changed, 31 insertions(+), 8 deletions(-) > > diff --git a/tests/i915/gem_ctx_switch.c > b/tests/i915/gem_ctx_switch.c > index 1208cb8d7..87e13b915 100644 > --- a/tests/i915/gem_ctx_switch.c > +++ b/tests/i915/gem_ctx_switch.c > @@ -26,6 +26,7 @@ > */ > > #include "igt.h" > +#include <limits.h> > #include <unistd.h> > #include <stdlib.h> > #include <stdint.h> > @@ -58,29 +59,50 @@ static int measure_qlen(int fd, > { > const struct drm_i915_gem_exec_object2 * const obj = > (struct drm_i915_gem_exec_object2 *)(uintptr_t)execbuf- > >buffers_ptr; > - int qlen = 64; > + uint32_t ctx[64]; > + int min = INT_MAX, max = 0; > + > + for (int i = 0; i < ARRAY_SIZE(ctx); i++) > + ctx[i] = gem_context_create(fd); > > for (unsigned int n = 0; n < nengine; n++) { > uint64_t saved = execbuf->flags; > struct timespec tv = {}; > + int q; > > execbuf->flags |= engine[n]; > > - igt_nsec_elapsed(&tv); > - for (int loop = 0; loop < qlen; loop++) > + for (int i = 0; i < ARRAY_SIZE(ctx); i++) { > + execbuf->rsvd1 = ctx[i]; > gem_execbuf(fd, execbuf); > + } > gem_sync(fd, obj->handle); > > - execbuf->flags = saved; > + igt_nsec_elapsed(&tv); > + for (int i = 0; i < ARRAY_SIZE(ctx); i++) { > + execbuf->rsvd1 = ctx[i]; > + gem_execbuf(fd, execbuf); > + } > + gem_sync(fd, obj->handle); > > /* > * Be conservative and aim not to overshoot timeout, so > scale > * down by 8 for hopefully a max of 12.5% error. > */ > - qlen = qlen * timeout * 1e9 / igt_nsec_elapsed(&tv) / 8 > + 1; > + q = ARRAY_SIZE(ctx) * timeout * 1e9 / > igt_nsec_elapsed(&tv) / 8 + 1; > + if (q < min) > + min = q; > + if (q > max) > + max = q; > + > + execbuf->flags = saved; > } > > - return qlen; > + for (int i = 0; i < ARRAY_SIZE(ctx); i++) > + gem_context_destroy(fd, ctx[i]); > + > + igt_debug("Estimated qlen: {min:%d, max:%d}\n", min, max); > + return min; > } > > static void single(int fd, uint32_t handle, > @@ -259,9 +281,10 @@ static void all(int fd, uint32_t handle, > unsigned flags, int timeout) > clock_gettime(CLOCK_MONOTONIC, &now); > gem_close(fd, obj[0].handle); > > - igt_info("[%d:%d] %s: %'u cycles: > %.3fus%s\n", > + igt_info("[%d:%d] %s: %'u cycles: > %.3fus%s (elapsed: %.3fs)\n", > nctx, child, name[child], > count, elapsed(&start, &now)*1e6 / count, > - flags & INTERRUPTIBLE ? " > (interruptible)" : ""); > + flags & INTERRUPTIBLE ? " > (interruptible)" : "", > + elapsed(&start, &now)); > } > igt_waitchildren(); > } _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx