Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > We need to exercise the HW and submission paths for switching contexts > rapidly to check that features such as execlists' wa_tail are adequate. > Plus it's an interesting baseline latency metric. > > v2: Check the initial request for allocation errors > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > --- > .../gpu/drm/i915/selftests/i915_gem_context.c | 188 ++++++++++++++++++ > 1 file changed, 188 insertions(+) > > diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c > index a89d31075c7a..f1a57e3c3820 100644 > --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c > @@ -22,6 +22,8 @@ > * > */ > > +#include <linux/prime_numbers.h> > + > #include "../i915_selftest.h" > #include "i915_random.h" > #include "igt_flush_test.h" > @@ -32,6 +34,191 @@ > > #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) > > +struct live_test { > + struct drm_i915_private *i915; > + const char *func; > + const char *name; > + > + unsigned int reset_count; > +}; > + > +static int begin_live_test(struct live_test *t, > + struct drm_i915_private *i915, > + const char *func, > + const char *name) > +{ > + int err; > + > + t->i915 = i915; > + t->func = func; > + t->name = name; > + > + err = i915_gem_wait_for_idle(i915, > + I915_WAIT_LOCKED, > + MAX_SCHEDULE_TIMEOUT); > + if (err) { > + pr_err("%s(%s): failed to idle before, with err=%d!", > + func, name, err); > + return err; > + } > + > + i915->gpu_error.missed_irq_rings = 0; Blunt but should work as we check this for tests that matter and thus leftovers should not matter. > + t->reset_count = i915_reset_count(&i915->gpu_error); > + > + return 0; > +} > + > +static int end_live_test(struct live_test *t) > +{ > + struct drm_i915_private *i915 = t->i915; > + > + i915_retire_requests(i915); > + > + if (wait_for(intel_engines_are_idle(i915), 10)) { > + pr_err("%s(%s): GPU not idle\n", t->func, t->name); > + return -EIO; > + } > + > + if (t->reset_count != i915_reset_count(&i915->gpu_error)) { > + pr_err("%s(%s): GPU was reset %d times!\n", > + t->func, t->name, > + i915_reset_count(&i915->gpu_error) - t->reset_count); > + return -EIO; > + } > + > + if (i915->gpu_error.missed_irq_rings) { > + pr_err("%s(%s): Missed interrupts on engines %lx\n", > + t->func, t->name, i915->gpu_error.missed_irq_rings); > + return -EIO; > + } > + > + return 0; > +} > + > +static int live_nop_switch(void *arg) > +{ > + const unsigned int nctx = 1024; > + struct drm_i915_private *i915 = arg; > + struct intel_engine_cs *engine; > + struct i915_gem_context **ctx; > + enum intel_engine_id id; > + struct drm_file *file; > + struct live_test t; > + unsigned long n; > + int err = -ENODEV; > + > + /* > + * Create as many contexts as we can feasibly get away with > + * and check we can switch between them rapidly. > + * > + * Serves as very simple stress test for submission and HW switching > + * between contexts. > + */ > + > + if (!DRIVER_CAPS(i915)->has_logical_contexts) > + return 0; > + > + file = mock_file(i915); > + if (IS_ERR(file)) > + return PTR_ERR(file); > + > + mutex_lock(&i915->drm.struct_mutex); > + > + ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); > + if (!ctx) { > + err = -ENOMEM; > + goto out_unlock; > + } > + > + for (n = 0; n < nctx; n++) { > + ctx[n] = i915_gem_create_context(i915, file->driver_priv); > + if (IS_ERR(ctx[n])) { > + err = PTR_ERR(ctx[n]); > + goto out_unlock; > + } > + } > + > + for_each_engine(engine, i915, id) { > + struct i915_request *rq; > + unsigned long end_time, prime; > + ktime_t times[2] = {}; > + > + times[0] = ktime_get_raw(); > + for (n = 0; n < nctx; n++) { > + rq = i915_request_alloc(engine, ctx[n]); > + if (IS_ERR(rq)) { > + err = PTR_ERR(rq); > + goto out_unlock; > + } > + i915_request_add(rq); > + } > + i915_request_wait(rq, > + I915_WAIT_LOCKED, > + MAX_SCHEDULE_TIMEOUT); > + times[1] = ktime_get_raw(); > + > + pr_info("Populated %d contexts on %s in %lluns\n", > + nctx, engine->name, ktime_to_ns(times[1] - times[0])); > + > + err = begin_live_test(&t, i915, __func__, engine->name); > + if (err) > + goto out_unlock; > + > + end_time = jiffies + i915_selftest.timeout_jiffies; > + for_each_prime_number_from(prime, 2, 8192) { > + times[1] = ktime_get_raw(); > + > + for (n = 0; n < prime; n++) { > + rq = i915_request_alloc(engine, ctx[n % nctx]); > + if (IS_ERR(rq)) { > + err = PTR_ERR(rq); > + goto out_unlock; > + } > + > + /* > + * This space is left intentionally blank. > + * > + * We do not actually want to perform any > + * action with this request, we just want > + * to measure the latency in allocation > + * and submission of our breadcrumbs - > + * ensuring that the bare request is sufficient > + * for the system to work (i.e. proper HEAD > + * tracking of the rings, interrupt handling, > + * etc). It also gives us the lowest bounds > + * for latency. > + */ > + > + i915_request_add(rq); We get the breadcrumbs in place, with the wa_tails and so on so this should work without any nops nor bb ends. > + } > + i915_request_wait(rq, > + I915_WAIT_LOCKED, > + MAX_SCHEDULE_TIMEOUT); > + > + times[1] = ktime_sub(ktime_get_raw(), times[1]); > + if (prime == 2) > + times[0] = times[1]; > + > + if (__igt_timeout(end_time, NULL)) > + break; > + } > + > + err = end_live_test(&t); > + if (err) > + goto out_unlock; > + > + pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", > + engine->name, > + ktime_to_ns(times[0]), > + prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); > + } > + > +out_unlock: > + mutex_unlock(&i915->drm.struct_mutex); > + mock_file_free(i915, file); > + return err; > +} > + > static struct i915_vma * > gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value) > { > @@ -718,6 +905,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) > { > static const struct i915_subtest tests[] = { > SUBTEST(igt_switch_to_kernel_context), > + SUBTEST(live_nop_switch), Due to the nature of the test, the nop has a minor twist to think there is a nop in ring. This is more of a nop request, or empty request. 'live_bare|empty_switch' ? Can't think of better so I dont insist, so regardless Reviewed-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> > SUBTEST(igt_ctx_exec), > SUBTEST(igt_ctx_readonly), > }; > -- > 2.19.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx