Re: [PATCH v5 09/10] drm/i915/perf: execute OA configuration from command stream

Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> · Thu, 27 Jun 2019 11:02:25 +0100

Quoting Lionel Landwerlin (2019-06-27 09:00:44)
> We can't run into issues with doing writing the global OA/NOA
> registers configuration from CPU so far, but HW engineers actually
> recommend doing this from the command streamer.
> 
> Since we have a command buffer prepared for the execbuffer side of
> things, we can reuse that approach here too.
> 
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx>
> ---
>  drivers/gpu/drm/i915/i915_perf.c | 203 +++++++++++++++----------------
>  1 file changed, 100 insertions(+), 103 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index bf4f5fee6764..7e636463e1f5 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -389,6 +389,19 @@ void i915_oa_config_release(struct kref *ref)
>         kfree(oa_config);
>  }
>  
> +static void i915_oa_config_dispose_buffers(struct drm_i915_private *i915)
> +{
> +       struct i915_oa_config *oa_config, *next;
> +
> +       mutex_lock(&i915->perf.metrics_lock);
> +       list_for_each_entry_safe(oa_config, next, &i915->perf.metrics_buffers, vma_link) {
> +               list_del(&oa_config->vma_link);
> +               i915_gem_object_put(oa_config->obj);
> +               oa_config->obj = NULL;
> +       }
> +       mutex_unlock(&i915->perf.metrics_lock);
> +}
> +
>  static u32 *write_cs_mi_lri(u32 *cs, const struct i915_oa_reg *reg_data, u32 n_regs)
>  {
>         u32 i;
> @@ -1813,67 +1826,86 @@ static int alloc_noa_wait(struct drm_i915_private *i915)
>         return ret;
>  }
>  
> -static void config_oa_regs(struct drm_i915_private *dev_priv,
> -                          const struct i915_oa_reg *regs,
> -                          u32 n_regs)
> +static int config_oa_regs(struct drm_i915_private *i915,
> +                         struct i915_oa_config *oa_config)
>  {
> -       u32 i;
> +       struct i915_request *rq;
> +       struct i915_vma *vma;
> +       long timeout;
> +       u32 *cs;
> +       int err;
>  
> -       for (i = 0; i < n_regs; i++) {
> -               const struct i915_oa_reg *reg = regs + i;
> +       rq = i915_request_create(i915->engine[RCS0]->kernel_context);
> +       if (IS_ERR(rq))
> +               return PTR_ERR(rq);
>  
> -               I915_WRITE(reg->addr, reg->value);
> +       err = i915_active_request_set(&i915->engine[RCS0]->last_oa_config,
> +                                     rq);

This will need an explicit mutex as it is not serialised by any single
timeline.

i915->perf.metrics_lock viable? Needs to be taken here and in execbuf.

> +       if (err) {
> +               i915_request_add(rq);
> +               return err;
> +       }
> +
> +       vma = i915_vma_instance(oa_config->obj, &i915->ggtt.vm, NULL);
> +       if (unlikely(IS_ERR(vma))) {
> +               i915_request_add(rq);
> +               return PTR_ERR(vma);
> +       }
> +
> +       err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
> +       if (err) {
> +               i915_request_add(rq);
> +               return err;
>         }

This time there is no excuse for not pinning outside of the
timeline->mutex.

> +
> +       err = i915_vma_move_to_active(vma, rq, 0);
> +       if (err) {
> +               i915_vma_unpin(vma);
> +               i915_request_add(rq);
> +               return err;
> +       }

> +
> +       cs = intel_ring_begin(rq, INTEL_GEN(i915) >= 8 ? 4 : 2);
> +       if (IS_ERR(cs)) {
> +               i915_vma_unpin(vma);
> +               i915_request_add(rq);
> +               return PTR_ERR(cs);
> +       }
> +
> +       if (INTEL_GEN(i915) > 8) {
> +               *cs++ = MI_BATCH_BUFFER_START_GEN8;
> +               *cs++ = lower_32_bits(vma->node.start);
> +               *cs++ = upper_32_bits(vma->node.start);
> +               *cs++ = MI_NOOP;
> +       } else {
> +               *cs++ = MI_BATCH_BUFFER_START;
> +               *cs++ = vma->node.start;
> +       }
> +
> +       intel_ring_advance(rq, cs);
> +
> +       i915_vma_unpin(vma);
> +
> +       i915_request_add(rq);
> +
> +       i915_request_get(rq);

Strictly, before the add, as the add is the xfer of the ref.

> +       timeout = i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
> +                                   MAX_SCHEDULE_TIMEOUT);
> +       i915_request_put(rq);
> +
> +       return timeout < 0 ? err : 0;

Report err, but timeline carries the errno?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx