Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > We assume that both timestamps are driven off the same clock [reported > to userspace as I915_PARAM_CS_TIMESTAMP_FREQUENCY]. Verify that this is > so by reading the timestamp registers around a busywait (on an otherwise > idle engine so there should be no preemptions). > > v2: Icelake (not ehl, nor tgl) seems to be using a fixed 80ns interval > for, and only for, CTX_TIMESTAMP -- or it may be GPU frequency and the > test is always running at maximum frequency?. As far as I can tell, this > isolated change in behaviour is undocumented. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/gt/selftest_engine_pm.c | 203 ++++++++++++++++++- > 1 file changed, 202 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c > index 163a10b07f85..d88504a5d69c 100644 > --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c > +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c > @@ -4,15 +4,215 @@ > * Copyright © 2018 Intel Corporation > */ > > -#include "intel_gpu_commands.h" > +#include <linux/sort.h> > > #include "i915_selftest.h" > +#include "intel_gpu_commands.h" > +#include "intel_gt_clock_utils.h" > #include "selftest_engine.h" > #include "selftest_engine_heartbeat.h" > #include "selftests/igt_atomic.h" > #include "selftests/igt_flush_test.h" > #include "selftests/igt_spinner.h" > > +#define COUNT 5 > + > +static int cmp_u64(const void *A, const void *B) > +{ > + const u64 *a = A, *b = B; > + > + return *a - *b; > +} > + > +static u64 trifilter(u64 *a) > +{ > + sort(a, COUNT, sizeof(*a), cmp_u64, NULL); > + return (a[1] + 2 * a[2] + a[3]) >> 2; > +} > + > +static u32 *emit_wait(u32 *cs, u32 offset, int op, u32 value) > +{ > + *cs++ = MI_SEMAPHORE_WAIT | > + MI_SEMAPHORE_GLOBAL_GTT | > + MI_SEMAPHORE_POLL | > + op; > + *cs++ = value; > + *cs++ = offset; > + *cs++ = 0; > + > + return cs; > +} > + > +static u32 *emit_store(u32 *cs, u32 offset, u32 value) > +{ > + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; > + *cs++ = offset; > + *cs++ = 0; > + *cs++ = value; > + > + return cs; > +} > + > +static u32 *emit_srm(u32 *cs, i915_reg_t reg, u32 offset) > +{ > + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; > + *cs++ = i915_mmio_reg_offset(reg); > + *cs++ = offset; > + *cs++ = 0; > + > + return cs; > +} > + > +static void write_semaphore(u32 *x, u32 value) > +{ > + WRITE_ONCE(*x, value); > + wmb(); > +} > + > +static int __measure_timestamps(struct intel_context *ce, > + u64 *dt, u64 *d_ring, u64 *d_ctx) > +{ > + struct intel_engine_cs *engine = ce->engine; > + u32 *sema = memset32(engine->status_page.addr + 1000, 0, 5); > + u32 offset = i915_ggtt_offset(engine->status_page.vma); > + struct i915_request *rq; > + u32 *cs; > + > + rq = intel_context_create_request(ce); > + if (IS_ERR(rq)) > + return PTR_ERR(rq); > + > + cs = intel_ring_begin(rq, 28); > + if (IS_ERR(cs)) { > + i915_request_add(rq); > + return PTR_ERR(cs); > + } > + > + /* Signal & wait for start */ > + cs = emit_store(cs, offset + 4008, 1); > + cs = emit_wait(cs, offset + 4008, MI_SEMAPHORE_SAD_NEQ_SDD, 1); > + > + cs = emit_srm(cs, RING_TIMESTAMP(engine->mmio_base), offset + 4000); > + cs = emit_srm(cs, RING_CTX_TIMESTAMP(engine->mmio_base), offset + 4004); > + > + /* Busy wait */ > + cs = emit_wait(cs, offset + 4008, MI_SEMAPHORE_SAD_EQ_SDD, 1); > + > + cs = emit_srm(cs, RING_TIMESTAMP(engine->mmio_base), offset + 4016); > + cs = emit_srm(cs, RING_CTX_TIMESTAMP(engine->mmio_base), offset + 4012); > + > + intel_ring_advance(rq, cs); > + i915_request_get(rq); > + i915_request_add(rq); > + intel_engine_flush_submission(engine); > + > + /* Wait for the request to start executing, that then waits for us */ > + while (READ_ONCE(sema[2]) == 0) > + cpu_relax(); > + > + /* Run the request for a 100us, sampling timestamps before/after */ > + preempt_disable(); > + *dt = ktime_get_raw_fast_ns(); > + write_semaphore(&sema[2], 0); > + udelay(100); > + write_semaphore(&sema[2], 1); > + *dt = ktime_get_raw_fast_ns() - *dt; > + preempt_enable(); > + > + if (i915_request_wait(rq, 0, HZ / 2) < 0) { > + i915_request_put(rq); > + return -ETIME; > + } > + i915_request_put(rq); > + > + pr_debug("%s CTX_TIMESTAMP: [%x, %x], RING_TIMESTAMP: [%x, %x]\n", > + engine->name, sema[1], sema[3], sema[0], sema[4]); > + > + *d_ctx = sema[3] - sema[1]; > + *d_ring = sema[4] - sema[0]; > + return 0; > +} > + > +static int __live_engine_timestamps(struct intel_engine_cs *engine) > +{ > + u64 s_ring[COUNT], s_ctx[COUNT], st[COUNT], d_ring, d_ctx, dt; > + struct intel_context *ce; > + int i, err = 0; > + > + ce = intel_context_create(engine); > + if (IS_ERR(ce)) > + return PTR_ERR(ce); > + > + for (i = 0; i < COUNT; i++) { > + err = __measure_timestamps(ce, &st[i], &s_ring[i], &s_ctx[i]); > + if (err) > + break; > + } > + intel_context_put(ce); > + if (err) > + return err; > + > + dt = trifilter(st); > + d_ring = trifilter(s_ring); > + d_ctx = trifilter(s_ctx); > + > + pr_info("%s elapsed:%lldns, CTX_TIMESTAMP:%dns, RING_TIMESTAMP:%dns\n", > + engine->name, dt, > + intel_gt_clock_interval_to_ns(engine->gt, d_ctx), > + intel_gt_clock_interval_to_ns(engine->gt, d_ring)); > + > + d_ring = intel_gt_clock_interval_to_ns(engine->gt, d_ring); > + if (3 * dt > 4 * d_ring || 4 * dt < 3 * d_ring) { > + pr_err("%s Mismatch between ring timestamp and walltime!\n", > + engine->name); > + return -EINVAL; > + } > + > + d_ring = trifilter(s_ring); > + d_ctx = trifilter(s_ctx); > + > + d_ctx *= RUNTIME_INFO(engine->i915)->cs_timestamp_frequency_hz; > + if (IS_ICELAKE(engine->i915)) > + d_ring *= 12500000; /* Fixed 80ns for icl ctx timestamp? */ This is...weird. But I am not going to argue against hardware. Will be interesting to see if this can find something funny. Reviewed-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> > + else > + d_ring *= RUNTIME_INFO(engine->i915)->cs_timestamp_frequency_hz; > + > + if (3 * d_ctx > 4 * d_ring || 4 * d_ctx < 3 * d_ring) { > + pr_err("%s Mismatch between ring and context timestamps!\n", > + engine->name); > + return -EINVAL; > + } > + > + return 0; > +} > + > +static int live_engine_timestamps(void *arg) > +{ > + struct intel_gt *gt = arg; > + struct intel_engine_cs *engine; > + enum intel_engine_id id; > + > + /* > + * Check that CS_TIMESTAMP / CTX_TIMESTAMP are in sync, i.e. share > + * the same CS clock. > + */ > + > + if (INTEL_GEN(gt->i915) < 8) > + return 0; > + > + for_each_engine(engine, gt, id) { > + int err; > + > + st_engine_heartbeat_disable(engine); > + err = __live_engine_timestamps(engine); > + st_engine_heartbeat_enable(engine); > + if (err) > + return err; > + } > + > + return 0; > +} > + > static int live_engine_busy_stats(void *arg) > { > struct intel_gt *gt = arg; > @@ -179,6 +379,7 @@ static int live_engine_pm(void *arg) > int live_engine_pm_selftests(struct intel_gt *gt) > { > static const struct i915_subtest tests[] = { > + SUBTEST(live_engine_timestamps), > SUBTEST(live_engine_busy_stats), > SUBTEST(live_engine_pm), > }; > -- > 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx