Re: [Intel-gfx] [PATCH] drm/i915/perf: fix perf stream opening lock

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 28 February 2018 at 11:45, Lionel Landwerlin
<lionel.g.landwerlin@xxxxxxxxx> wrote:
> We're seeing on CI that some contexts don't have the programmed OA
> period timer that directs the OA unit on how often to write reports.
>
> The issue is that we're not holding the drm lock from when we edit the
> context images down to when we set the exclusive_stream variable. This
> leaves a window for the deferred context allocation to call
> i915_oa_init_reg_state() that will not program the expected OA timer
> value, because we haven't set the exclusive_stream yet.
>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx>
> Fixes: 701f8231a2f ("drm/i915/perf: prune OA configs")
> Cc: <stable@xxxxxxxxxxxxxxx> # v4.14+
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102254
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103715
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103755
> ---
>  drivers/gpu/drm/i915/i915_perf.c | 41 +++++++++++++++++++---------------------
>  1 file changed, 19 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 2741b1bc7095..7016abfc8ba9 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1757,21 +1757,16 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr
>   */
>  static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
>                                        const struct i915_oa_config *oa_config,
> -                                      bool interruptible)
> +                                      bool need_lock)
>  {
>         struct i915_gem_context *ctx;
>         int ret;
>         unsigned int wait_flags = I915_WAIT_LOCKED;
>
> -       if (interruptible) {
> -               ret = i915_mutex_lock_interruptible(&dev_priv->drm);
> -               if (ret)
> -                       return ret;
> -
> -               wait_flags |= I915_WAIT_INTERRUPTIBLE;
> -       } else {
> +       if (need_lock)
>                 mutex_lock(&dev_priv->drm.struct_mutex);
> -       }
> +
> +       lockdep_assert_held(&dev_priv->drm.struct_mutex);
>
>         /* Switch away from any user context. */
>         ret = gen8_switch_to_updated_kernel_context(dev_priv, oa_config);
> @@ -1819,7 +1814,8 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
>         }
>
>   out:
> -       mutex_unlock(&dev_priv->drm.struct_mutex);
> +       if (need_lock)
> +               mutex_unlock(&dev_priv->drm.struct_mutex);
>
>         return ret;
>  }
> @@ -1863,7 +1859,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
>          * to make sure all slices/subslices are ON before writing to NOA
>          * registers.
>          */
> -       ret = gen8_configure_all_contexts(dev_priv, oa_config, true);
> +       ret = gen8_configure_all_contexts(dev_priv, oa_config, false);
>         if (ret)
>                 return ret;
>
> @@ -1878,7 +1874,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
>  static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
>  {
>         /* Reset all contexts' slices/subslices configurations. */
> -       gen8_configure_all_contexts(dev_priv, NULL, false);
> +       gen8_configure_all_contexts(dev_priv, NULL, true);

Maybe move the ops.disable_metric_set() to also be within the lock, so
need_lock=false, then we can get rid of need_lock?

>
>         I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
>                                       ~GT_NOA_ENABLE));
> @@ -1888,7 +1884,7 @@ static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
>  static void gen10_disable_metric_set(struct drm_i915_private *dev_priv)
>  {
>         /* Reset all contexts' slices/subslices configurations. */
> -       gen8_configure_all_contexts(dev_priv, NULL, false);
> +       gen8_configure_all_contexts(dev_priv, NULL, true);
>
>         /* Make sure we disable noa to save power. */
>         I915_WRITE(RPM_CONFIG1,
> @@ -2138,13 +2134,6 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>         if (ret)
>                 goto err_oa_buf_alloc;
>
> -       ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv,
> -                                                     stream->oa_config);
> -       if (ret)
> -               goto err_enable;
> -
> -       stream->ops = &i915_oa_stream_ops;
> -
>         /* Lock device for exclusive_stream access late because
>          * enable_metric_set() might lock as well on gen8+.
>          */

Ok, we can get rid of this comment now.

> @@ -2152,16 +2141,24 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>         if (ret)
>                 goto err_lock;
>
> +       ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv,
> +                                                     stream->oa_config);
> +       if (ret)
> +               goto err_enable;
> +
> +       stream->ops = &i915_oa_stream_ops;
> +
>         dev_priv->perf.oa.exclusive_stream = stream;
>
>         mutex_unlock(&dev_priv->drm.struct_mutex);
>
>         return 0;
>
> -err_lock:
> +err_enable:
> +       mutex_unlock(&dev_priv->drm.struct_mutex);
>         dev_priv->perf.oa.ops.disable_metric_set(dev_priv);

We can drop the disable_metric_set here; no need to disable it if we
never enabled it.

Nice catch,
Reviewed-by: Matthew Auld <matthew.auld@xxxxxxxxx>



[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]