Re: [PATCH 3/5] drm/i915: Add live selftests for indirect ctx batchbuffers

Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> · Thu, 23 Apr 2020 13:40:40 +0300

Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes:

> Quoting Mika Kuoppala (2020-04-21 14:16:31)
>> Indirect ctx batchbuffers are a hw feature of which
>> batch can be run, by hardware, during context restoration stage.
>> Driver can setup a batchbuffer and also an offset into the
>> context image. When context image is marshalled from
>> memory to registers, and when the offset from the start of
>> context register state is equal of what driver pre-determined,
>> batch will run. So one can manipulate context restoration
>> process at any granularity of one lri, given some
>> limitations, as you need to have rudimentaries in place
>> before you can run a batch.
>> 
>> Add selftest which will write the ring start register
>> to a canary spot. This will test that hardware will run a
>> batchbuffer for the context in question.
>> 
>> Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx>
>> ---
>>  drivers/gpu/drm/i915/gt/selftest_lrc.c | 156 ++++++++++++++++++++++++-
>>  1 file changed, 155 insertions(+), 1 deletion(-)
>> 
>> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
>> index 32d2b0850dec..32c4096b627b 100644
>> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
>> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
>> @@ -5363,6 +5363,159 @@ static int live_lrc_isolation(void *arg)
>>         return err;
>>  }
>>  
>> +static int ctx_bb_submit_req(struct intel_context *ce)
>> +{
>> +       struct i915_request *rq;
>> +       int err;
>> +
>> +       rq = intel_context_create_request(ce);
>> +       if (IS_ERR(rq))
>> +               return PTR_ERR(rq);
>> +
>> +       i915_request_get(rq);
>> +       i915_request_add(rq);
>> +
>> +       err = i915_request_wait(rq, 0, HZ / 5);
>> +       if (err < 0)
>> +               pr_err("%s: request not completed!\n", rq->engine->name);
>> +
>> +       i915_request_put(rq);
>> +
>> +       return 0;
>
> if (i915_request_wait() < 0)
> 	err = -ETIME;
>
> return err;
>
>
>> +}
>> +
>> +#define CTX_BB_CANARY_OFFSET (3*1024)
>> +#define CTX_BB_CANARY_INDEX  (CTX_BB_CANARY_OFFSET/sizeof(u32))
>> +
>> +static u32 *
>> +emit_ctx_bb_canary(struct intel_context *ce, u32 *cs)
>> +{
>> +       const u32 ring_start_reg = i915_mmio_reg_offset(RING_START(0));
>> +       const u32 srm = MI_STORE_REGISTER_MEM_GEN8 |
>> +               MI_SRM_LRM_GLOBAL_GTT | MI_LRI_LRM_CS_MMIO;
>> +
>> +       *cs++ = srm;
>> +       *cs++ = ring_start_reg;
>> +       *cs++ = i915_ggtt_offset(ce->state) +
>> +               ce->ctx_bb_offset + CTX_BB_CANARY_OFFSET;
>> +       *cs++ = 0;
>> +
>> +       return cs;
>> +}
>> +
>> +static void
>> +ctx_bb_setup(struct intel_context *ce)
>> +{
>> +       u32 *cs = context_indirect_bb(ce);
>> +
>> +       cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
>> +
>> +       setup_indirect_ctx_bb(ce, emit_ctx_bb_canary);
>> +}
>> +
>> +static bool check_ring_start(struct intel_context *ce)
>> +{
>> +       const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
>> +               LRC_STATE_PN * PAGE_SIZE + ce->ctx_bb_offset;
>
> _OFFSET or did this get updated?
>
>> +
>> +       if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
>> +               return true;
>> +
>> +       pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
>> +              ctx_bb[CTX_BB_CANARY_INDEX],
>> +              ce->lrc_reg_state[CTX_RING_START]);
>> +
>> +       return false;
>> +}
>> +
>> +static int ctx_bb_check(struct intel_context *ce)
>> +{
>> +       int err;
>> +
>> +       err = ctx_bb_submit_req(ce);
>> +       if (err)
>> +               return err;
>> +
>> +       if (!check_ring_start(ce))
>> +               return -EINVAL;
>> +
>> +       return 0;
>> +}
>> +
>> +static int __per_ctx_bb(struct intel_engine_cs *engine)
>> +{
>> +       struct intel_context *ce1, *ce2;
>
> I'd vote for a, b; a greater %% of unique characters for ease of the
> reader.
>
>> +       int err = 0;
>> +
>> +       ce1 = intel_context_create(engine);
>> +       ce2 = intel_context_create(engine);
>> +
>> +       err = intel_context_pin(ce1);
>> +       if (err)
>> +               return err;
>> +
>> +       err = intel_context_pin(ce2);
>> +       if (err) {
>> +               intel_context_put(ce1);
>> +               return err;
>> +       }
>> +
>> +       /* We use the already reserved extra page in context state */
>> +       if (!ce1->ctx_bb_offset) {
>> +               GEM_BUG_ON(ce2->ctx_bb_offset);
>> +               GEM_BUG_ON(INTEL_GEN(engine->i915) == 12);
>> +               goto out;
>> +       }
>> +
>> +       /* In order to test that our per context bb is truly per context,
>> +        * and executes at the intended spot on context restoring process,
>> +        * make the batch store the ring start value to memory.
>> +        * As ring start is restored apriori of starting the indirect ctx bb and
>> +        * as it will be different for each context, it fits to this purpose.
>> +        */
>> +       ctx_bb_setup(ce1);
>> +       ctx_bb_setup(ce2);
>> +
>> +       err = ctx_bb_check(ce1);
>> +       if (err)
>> +               goto out;
>> +
>> +       err = ctx_bb_check(ce2);
>> +out:
>> +       intel_context_unpin(ce2);
>> +       intel_context_put(ce2);
>> +
>> +       intel_context_unpin(ce1);
>> +       intel_context_put(ce1);
>> +
>> +       return err;
>> +}
>> +
>> +static int live_lrc_indirect_ctx_bb(void *arg)
>> +{
>> +       struct intel_gt *gt = arg;
>> +       struct intel_engine_cs *engine;
>> +       enum intel_engine_id id;
>> +       int err = 0;
>> +
>> +       for_each_engine(engine, gt, id) {
>> +
>> +               intel_engine_pm_get(engine);
>> +               err = __per_ctx_bb(engine);
>> +               intel_engine_pm_put(engine);
>> +
>> +               if (err)
>> +                       break;
>> +
>> +               if (igt_flush_test(gt->i915)) {
>> +                       err = -EIO;
>> +                       break;
>> +               }
>
> for_each_engine() {
> 	intel_engine_pm_get()
> 	err = __per_ctx_bb();
> 	intel_engine_pm_put();
> 	if (igt_flush_test())
> 		err = -EIO;
> 	if (err)
> 		break;
>
> __per_ctx_bb is a bit confusing, no?

It is, remnants of when this test did both indirect and per ctx.
But that is for future work if we ever need it.

>
> Should it be __live_lrc_indirect_ctx_bb or __lrc_indirect_ctx_bb?

Both are better, I will try to see what fits.
-Mika
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx