On blb and pnv, we are seeing sporadic
i915 0000:00:02.0: Resetting chip after gpu hang
[drm:intel_gpu_reset [i915]] rcs0: timed out on STOP_RING
[drm:i915_reset [i915]] *ERROR* Failed hw init on reset -5
which notably lack the actual root cause of the error. Ostensibly it
should be the init_ring_common() that failed, but it's error paths are
covered by DRM_ERROR.
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
---
drivers/gpu/drm/i915/i915_drv.c | 6 ++++--
drivers/gpu/drm/i915/i915_gem.c | 11 ++++++++---
drivers/gpu/drm/i915/intel_ringbuffer.c | 18 +++++++++---------
3 files changed, 21 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index e9f1daf258fe..bb605e0b996c 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1957,7 +1957,8 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags)
*/
ret = i915_ggtt_enable_hw(i915);
if (ret) {
- DRM_ERROR("Failed to re-enable GGTT following reset %d\n", ret);
+ DRM_ERROR("Failed to re-enable GGTT following reset (%d)\n",
+ ret);
goto error;
}
@@ -1974,7 +1975,8 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags)
*/
ret = i915_gem_init_hw(i915);
if (ret) {
- DRM_ERROR("Failed hw init on reset %d\n", ret);
+ DRM_ERROR("Failed to initialise HW following reset (%d)\n",
+ ret);
goto error;
}
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1632f18e6a64..c1b80cd52f9e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5065,8 +5065,11 @@ static int __i915_gem_restart_engines(void *data)
for_each_engine(engine, i915, id) {
err = engine->init_hw(engine);
- if (err)
+ if (err) {
+ DRM_ERROR("Failed to restart %s (%d)\n",
+ engine->name, err);
return err;
+ }
}
return 0;
@@ -5118,14 +5121,16 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv)
ret = i915_ppgtt_init_hw(dev_priv);
if (ret) {
- DRM_ERROR("PPGTT enable HW failed %d\n", ret);
+ DRM_ERROR("Enabling PPGTT failed (%d)\n", ret);
goto out;
}
/* We can't enable contexts until all firmware is loaded */
ret = intel_uc_init_hw(dev_priv);
- if (ret)
+ if (ret) {
+ DRM_ERROR("Enabling uc failed (%d)\n", ret);
goto out;
+ }
intel_mocs_init_l3cc_table(dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e2085820b586..0eb1b7ea3ed8 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -453,13 +453,13 @@ static int init_ring_common(struct intel_engine_cs *engine)
if (!stop_ring(engine)) {
/* G45 ring initialization often fails to reset head to zero */
- DRM_DEBUG_KMS("%s head not reset to zero "
- "ctl %08x head %08x tail %08x start %08x\n",
- engine->name,
- I915_READ_CTL(engine),
- I915_READ_HEAD(engine),
- I915_READ_TAIL(engine),
- I915_READ_START(engine));
+ DRM_DEBUG_DRIVER("%s head not reset to zero "
+ "ctl %08x head %08x tail %08x start %08x\n",
+ engine->name,
+ I915_READ_CTL(engine),
+ I915_READ_HEAD(engine),
+ I915_READ_TAIL(engine),
+ I915_READ_START(engine));
if (!stop_ring(engine)) {
DRM_ERROR("failed to set %s head to zero "
@@ -492,8 +492,8 @@ static int init_ring_common(struct intel_engine_cs *engine)
/* WaClearRingBufHeadRegAtInit:ctg,elk */
if (I915_READ_HEAD(engine))
- DRM_DEBUG("%s initialization failed [head=%08x], fudging\n",
- engine->name, I915_READ_HEAD(engine));
+ DRM_DEBUG_DRIVER("%s initialization failed [head=%08x], fudging\n",
+ engine->name, I915_READ_HEAD(engine));
intel_ring_update_space(ring);
I915_WRITE_HEAD(engine, ring->head);