Re: [PATCH] drm/i915: read EU powergating status from command streamer

Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> · Thu, 21 Jun 2018 14:14:52 +0100

On 21/06/18 12:54, Chris Wilson wrote:
Quoting Lionel Landwerlin (2018-06-21 12:27:12)
Powergating of the EU array is configured as part of the context
image. This seems to imply we need a context to have run before we can
read the slice/subslice/EU powergating status.

This change captures the values of the powergating status registers
from the command streamer to ensure a valid we get valid values. This
is currently used only on gen9+ but someone could rightfully argue to
go as far as gen8.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103484
---
  drivers/gpu/drm/i915/i915_debugfs.c | 177 ++++++++++++++++++++++++----
  1 file changed, 155 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index c400f42a54ec..0da8ce8acbcb 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4310,14 +4310,120 @@ static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
  #undef SS_MAX
  }
  
-static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
-                                    struct sseu_dev_info *sseu)
+static struct drm_i915_gem_object *
+gen9_read_registers(struct drm_i915_private *i915,
+                   u32 *register_offsets,
+                   u32 n_registers)
unsigned int n_register (or even unsigned long for total pedantry). I
don't see why you want to specify a bitwidth.

Sure.


+{
+       struct drm_i915_gem_object *bo;
+       struct i915_request *rq;
+       struct i915_vma *vma;
+       u32 *cs, bo_size = PAGE_SIZE * DIV_ROUND_UP(n_registers * 4, PAGE_SIZE);
+       int ret, r;
+
+       ret = i915_mutex_lock_interruptible(&i915->drm);
+       if (ret)
+               return ERR_PTR(ret);
+
+       bo = i915_gem_object_create(i915, bo_size);
create_internal() (just be sure you don't read back uninitialised data)

bo_size = PAGE_ALIGN(sizeof(u32) * n_registers);

Thanks.


+       if (IS_ERR(bo)) {
+               ret = PTR_ERR(bo);
+               goto unlock;
+       }
+
+       ret = i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
+       if (ret)
+               goto put_bo;
+
+
+       vma = i915_vma_instance(bo,
+                               &i915->kernel_context->ppgtt->vm,
+                               NULL);
+       if (IS_ERR(vma)) {
+               ret = PTR_ERR(vma);
+               goto put_bo;
+       }
+
+       ret = i915_vma_pin(vma, 0, GEN8_LR_CONTEXT_ALIGN, PIN_USER);
CONTEXT_ALIGN?

Dammit... copy-paste...


+       if (ret)
+               goto vma_unpin;
+
+
+       rq = i915_request_alloc(i915->engine[RCS], i915->kernel_context);
+       if (IS_ERR(rq)) {
+               ret = PTR_ERR(rq);
+               goto vma_unpin;
+       }
+
+       cs = intel_ring_begin(rq, n_registers * 4);
+       if (IS_ERR(cs)) {
+               i915_request_add(rq);
+               ret = PTR_ERR(cs);
+               goto vma_unpin;
+       }
+
+       for (r = 0; r < n_registers; r++) {
+               u64 offset = vma->node.start + r * 4;
+
+               *cs++ = MI_STORE_REGISTER_MEM_GEN8;
+               *cs++ = register_offsets[r];
+               *cs++ = lower_32_bits(offset);
+               *cs++ = upper_32_bits(offset);
+       }
I think you should i915_vma_move_to_active() for safety and give it
an active reference.

Indeed.


+       intel_ring_advance(rq, cs);
+
+       i915_request_add(rq);
+
+       mutex_unlock(&i915->drm.struct_mutex);
+
+       i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
For sanity, check for an error. (Hence why using vma_move_to_active
becomes relevant.)

Thanks!


+       return bo;
+
+vma_unpin:
+       i915_vma_unpin(vma);
+put_bo:
+       i915_gem_object_put(bo);
+unlock:
+       mutex_unlock(&i915->drm.struct_mutex);
+       return bo;
Report the ERR_PTR(ret) (otherwise, a neat use-after-free).

Oops my bad.


+}
+
+
+static int gen10_sseu_device_status(struct drm_i915_private *dev_priv,
+                                   struct sseu_dev_info *sseu)
  {
  #define SS_MAX 6
         const struct intel_device_info *info = INTEL_INFO(dev_priv);
-       u32 s_reg[SS_MAX], eu_reg[2 * SS_MAX], eu_mask[2];
+       struct drm_i915_gem_object *bo;
+       struct {
+               u32 s_reg[SS_MAX];
+               u32 eu_reg[2 * SS_MAX];
+       } reg_offsets, *reg_data;
+       u32 eu_mask[2];
         int s, ss;
  
+       for (s = 0; s < info->sseu.max_slices; s++) {
+               reg_offsets.s_reg[s] =
+                       i915_mmio_reg_offset(GEN10_SLICE_PGCTL_ACK(s));
+               reg_offsets.eu_reg[2 * s] =
+                       i915_mmio_reg_offset(GEN10_SS01_EU_PGCTL_ACK(s));
+               reg_offsets.eu_reg[2 * s] =
+                       i915_mmio_reg_offset(GEN10_SS23_EU_PGCTL_ACK(s));
I started by wishing you used i915_mmio_t, but I can appreciate the
logic of having offset/data tied together in the same layout.

+       }
+
+       bo = gen9_read_registers(dev_priv, reg_offsets.s_reg,
+                                sizeof(reg_offsets) / sizeof(u32));
+       if (IS_ERR(bo))
+               return PTR_ERR(bo);
I'd prefer this to use i915_gem_object_to_cpu_domain() but there's no
advantage to that, and you've set it up to just work. So leave it.

+       reg_data = i915_gem_object_pin_map(bo, I915_MAP_WB);
+       if (IS_ERR(reg_data)) {
+               i915_gem_object_put(bo);
+               return PTR_ERR(reg_data);
+       }
Other than questioning your sanity here at doing this rather than just
deleting the debugfs, there's nothing inherently broken. I would do it
for gen8 as well, no point leaving the odd one out.

We have an igt tests checking these values and because you haven't 
brought up deleting the test before, I assumed we still wanted it fixed.
But now that you brought it up, I'm equally fine deleting this debugfs 
code and removing the igt test.


How about if we just hexdumped the kernel_context image and let the
debugger inspect any and all registers at their discretion?

Hmm... these Ack registers aren't saved in the context image I think.

-Chris


_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx