On 28/02/18 18:10, Matthew Auld wrote:
On 28 February 2018 at 11:45, Lionel Landwerlin
<lionel.g.landwerlin@xxxxxxxxx> wrote:
We're seeing on CI that some contexts don't have the programmed OA
period timer that directs the OA unit on how often to write reports.
The issue is that we're not holding the drm lock from when we edit the
context images down to when we set the exclusive_stream variable. This
leaves a window for the deferred context allocation to call
i915_oa_init_reg_state() that will not program the expected OA timer
value, because we haven't set the exclusive_stream yet.
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx>
Fixes: 701f8231a2f ("drm/i915/perf: prune OA configs")
Cc: <stable@xxxxxxxxxxxxxxx> # v4.14+
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102254
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103715
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103755
---
drivers/gpu/drm/i915/i915_perf.c | 41 +++++++++++++++++++---------------------
1 file changed, 19 insertions(+), 22 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 2741b1bc7095..7016abfc8ba9 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1757,21 +1757,16 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr
*/
static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
const struct i915_oa_config *oa_config,
- bool interruptible)
+ bool need_lock)
{
struct i915_gem_context *ctx;
int ret;
unsigned int wait_flags = I915_WAIT_LOCKED;
- if (interruptible) {
- ret = i915_mutex_lock_interruptible(&dev_priv->drm);
- if (ret)
- return ret;
-
- wait_flags |= I915_WAIT_INTERRUPTIBLE;
- } else {
+ if (need_lock)
mutex_lock(&dev_priv->drm.struct_mutex);
- }
+
+ lockdep_assert_held(&dev_priv->drm.struct_mutex);
/* Switch away from any user context. */
ret = gen8_switch_to_updated_kernel_context(dev_priv, oa_config);
@@ -1819,7 +1814,8 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
}
out:
- mutex_unlock(&dev_priv->drm.struct_mutex);
+ if (need_lock)
+ mutex_unlock(&dev_priv->drm.struct_mutex);
return ret;
}
@@ -1863,7 +1859,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
* to make sure all slices/subslices are ON before writing to NOA
* registers.
*/
- ret = gen8_configure_all_contexts(dev_priv, oa_config, true);
+ ret = gen8_configure_all_contexts(dev_priv, oa_config, false);
if (ret)
return ret;
@@ -1878,7 +1874,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
{
/* Reset all contexts' slices/subslices configurations. */
- gen8_configure_all_contexts(dev_priv, NULL, false);
+ gen8_configure_all_contexts(dev_priv, NULL, true);
Maybe move the ops.disable_metric_set() to also be within the lock, so
need_lock=false, then we can get rid of need_lock?
Yeah, sounds like a nice cleanup.
I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
~GT_NOA_ENABLE));
@@ -1888,7 +1884,7 @@ static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
static void gen10_disable_metric_set(struct drm_i915_private *dev_priv)
{
/* Reset all contexts' slices/subslices configurations. */
- gen8_configure_all_contexts(dev_priv, NULL, false);
+ gen8_configure_all_contexts(dev_priv, NULL, true);
/* Make sure we disable noa to save power. */
I915_WRITE(RPM_CONFIG1,
@@ -2138,13 +2134,6 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
if (ret)
goto err_oa_buf_alloc;
- ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv,
- stream->oa_config);
- if (ret)
- goto err_enable;
-
- stream->ops = &i915_oa_stream_ops;
-
/* Lock device for exclusive_stream access late because
* enable_metric_set() might lock as well on gen8+.
*/
Ok, we can get rid of this comment now.
Thanks, removing.
@@ -2152,16 +2141,24 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
if (ret)
goto err_lock;
+ ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv,
+ stream->oa_config);
+ if (ret)
+ goto err_enable;
+
+ stream->ops = &i915_oa_stream_ops;
+
dev_priv->perf.oa.exclusive_stream = stream;
mutex_unlock(&dev_priv->drm.struct_mutex);
return 0;
-err_lock:
+err_enable:
+ mutex_unlock(&dev_priv->drm.struct_mutex);
dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
We can drop the disable_metric_set here; no need to disable it if we
never enabled it.
Well in this case we did, we edited all the context images.
Better cleanup out changes.
Nice catch,
Reviewed-by: Matthew Auld <matthew.auld@xxxxxxxxx>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx