Re: [PATCH v2 2/4] drm/i915: Verify the engine workarounds stick on application

Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxxxxxxx> · Tue, 16 Apr 2019 11:36:12 +0100

On 16/04/2019 10:57, Chris Wilson wrote:
Quoting Tvrtko Ursulin (2019-04-16 10:43:43)

On 16/04/2019 09:10, Chris Wilson wrote:
Read the engine workarounds back using the GPU after loading the initial
context state to verify that we are setting them correctly, and bail if
it fails.

Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
---
   drivers/gpu/drm/i915/i915_gem.c               |   6 +
   drivers/gpu/drm/i915/intel_workarounds.c      | 120 ++++++++++++++++++
   drivers/gpu/drm/i915/intel_workarounds.h      |   2 +
   .../drm/i915/selftests/intel_workarounds.c    |  53 +-------
   4 files changed, 134 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0a818a60ad31..95ae69753e91 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4717,6 +4717,12 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
               i915_request_add(rq);
               if (err)
                       goto err_active;
+
+             if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) &&
+                 intel_engine_verify_workarounds(engine, "load")) {
+                     err = -EIO;
+                     goto err_active;

The two submission use different contexts timelines so strictly speaking
should be somehow explicitly serialized.

Yes, we are reading from the kernel context. Neither of these contexts
will overwrite the existing register state on first load, and there is
nothing to prevent fifo here. But it doesn't matter which order the read
is done, as the w/a are set by mmio, not engine->init_context().
Although I've been contemplating using init_context in case LRI work
better.

Using init_context for what, engine wa/?


It could be in a separate loop beforehand, the only catch is that we
expect to leave this function with the GT idle, so it's best done before
we park.

Yep since the placement in the ->init_context loop confused me I think a 
separate loop would indeed be better.

__i915_gem_restart_engines looks like the place, to follow 
engine->init_hw which applies the engine workarounds.

diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
index 1c54b5030807..db99f2e676bb 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -1259,6 +1259,126 @@ void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
       wa_list_apply(engine->uncore, &engine->wa_list);
   }
   
+static struct i915_vma *
+create_scratch(struct i915_address_space *vm, int count)
+{
+     struct drm_i915_gem_object *obj;
+     struct i915_vma *vma;
+     unsigned int size;
+     int err;
+
+     size = round_up(count * 4, PAGE_SIZE);

sizeof(u32) if you want.

+     obj = i915_gem_object_create_internal(vm->i915, size);
+     if (IS_ERR(obj))
+             return ERR_CAST(obj);
+
+     i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
+
+     vma = i915_vma_instance(obj, vm, NULL);
+     if (IS_ERR(vma)) {
+             err = PTR_ERR(vma);
+             goto err_obj;
+     }
+
+     err = i915_vma_pin(vma, 0, 0,
+                        i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
+     if (err)
+             goto err_obj;
+
+     return vma;
+
+err_obj:
+     i915_gem_object_put(obj);
+     return ERR_PTR(err);
+}
+
+static int
+wa_list_srm(struct i915_request *rq,
+         const struct i915_wa_list *wal,
+         struct i915_vma *vma)
+{
+     const struct i915_wa *wa;
+     u32 srm, *cs;
+     int i;

A little bit of inconsistency between type of i here and in
engine_wa_list_verify. Up to you.

+
+     srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
+     if (INTEL_GEN(rq->i915) >= 8)
+             srm++;
+
+     cs = intel_ring_begin(rq, 4 * wal->count);
+     if (IS_ERR(cs))
+             return PTR_ERR(cs);
+
+     for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
+             *cs++ = srm;
+             *cs++ = i915_mmio_reg_offset(wa->reg);
+             *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
+             *cs++ = 0;
+     }
+     intel_ring_advance(rq, cs);
+
+     return 0;
+}
+
+static int engine_wa_list_verify(struct intel_engine_cs *engine,
+                              const struct i915_wa_list * const wal,
+                              const char *from)
+{
+     const struct i915_wa *wa;
+     struct i915_request *rq;
+     struct i915_vma *vma;
+     unsigned int i;
+     u32 *results;
+     int err;
+
+     if (!wal->count)
+             return 0;
+
+     vma = create_scratch(&engine->i915->ggtt.vm, wal->count);
+     if (IS_ERR(vma))
+             return PTR_ERR(vma);
+
+     rq = i915_request_alloc(engine, engine->kernel_context->gem_context);
+     if (IS_ERR(rq)) {
+             err = PTR_ERR(rq);
+             goto err_vma;
+     }
+
+     err = wa_list_srm(rq, wal, vma);
+     if (err)
+             goto err_vma;
+
+     i915_request_add(rq);
+     if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) {

Wouldn't:

err = i915_request_wait(...
if (err < 0 || !i915_request_completed())

be more correct?

It can't return for any other reason as it's an uninterruptible wait, so
it times out, or the request is completed.

If you want to consider that hangcheck could have kicked in the 0.2ms
and cancelled the request, then we could need to check
request->fence.error.

I was going by the comment for i915_request_wait which says "Returns the 
remaining time (in jiffies) if the request completed, which may be zero 
or -ETIME if the request is unfinished after the timeout expires.". So 
two timeout conditions. Anyways, I know it is highly hypothetical if that.



Okay so MMIO verification happens immediately after application and with
SRM from selftests.

What about the context workarounds? With the SRM infrastructure those
could be verified easily as well I think.

They can, one problem at at time :) I thought the context workarounds
were listed and checked by gem_workarounds. Aye, but only ctx_wa:

static int i915_wa_registers(struct seq_file *m, void *unused)
{
         struct drm_i915_private *i915 = node_to_i915(m->private);
         const struct i915_wa_list *wal = &i915->engine[RCS0]->ctx_wa_list;
         struct i915_wa *wa;
         unsigned int i;

         seq_printf(m, "Workarounds applied: %u\n", wal->count);
         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
                 seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n",
                            i915_mmio_reg_offset(wa->reg), wa->val, wa->mask);

         return 0;
}




Yes one thing at a time is fine. :)

Regards,

Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx