On 16/12/16 12:37, Chris Wilson wrote:
On Fri, Dec 16, 2016 at 12:20:07PM -0800, Michel Thierry wrote:
From: Arun Siluvery <arun.siluvery@xxxxxxxxxxxxxxx>
Driver maintains count of how many times a given engine is reset, useful to
capture this in error state also. It gives an idea of how engine is coping
up with the workloads it is executing before this error state.
Also i915_engine_info().
I'll add it there too.
Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx>
Signed-off-by: Arun Siluvery <arun.siluvery@xxxxxxxxxxxxxxx>
Signed-off-by: Michel Thierry <michel.thierry@xxxxxxxxx>
---
drivers/gpu/drm/i915/i915_drv.c | 1 +
drivers/gpu/drm/i915/i915_drv.h | 9 +++++++++
drivers/gpu/drm/i915/i915_gpu_error.c | 3 +++
3 files changed, 13 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index a034793bc246..1c706a082d60 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1877,6 +1877,7 @@ int i915_reset_engine(struct intel_engine_cs *engine)
intel_engine_reset_cancel(engine);
intel_execlists_restart_submission(engine);
+ dev_priv->gpu_error.engine_reset_count[engine->id]++;
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
return 0;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a97cc8f50ade..25183762ed94 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -937,6 +937,7 @@ struct drm_i915_error_state {
enum intel_engine_hangcheck_action hangcheck_action;
struct i915_address_space *vm;
int num_requests;
+ u32 reset_count;
/* position of active request inside the ring */
u32 rq_head, rq_post, rq_tail;
@@ -1629,6 +1630,8 @@ struct i915_gpu_error {
#define I915_RESET_IN_PROGRESS 0
#define I915_WEDGED (BITS_PER_LONG - 1)
+ unsigned long engine_reset_count[I915_NUM_ENGINES];
+
/**
* Waitqueue to signal when a hang is detected. Used to for waiters
* to release the struct_mutex for the reset to procede.
@@ -3397,6 +3400,12 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
return READ_ONCE(error->reset_count);
}
+static inline u32 i915_engine_reset_count(struct i915_gpu_error *error,
That's a weird hodgepodge. We first think of intel_engine and are
confused, then we spot the reset. i915_reset_engine_count?
Agreed i915_reset_engine_count is better (plus it's called from
i915_reset_info).
+ struct intel_engine_cs *engine)
+{
+ return READ_ONCE(error->engine_reset_count[engine->id]);
+}
+
Then the same rename here, reset_engine_count[].
Thanks
void i915_gem_reset(struct drm_i915_private *dev_priv);
void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
void i915_gem_reset_engine(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index e16037d1b0ba..f168ad873521 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -453,6 +453,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
err_printf(m, " hangcheck action timestamp: %lu, %u ms ago\n",
ee->hangcheck_timestamp,
jiffies_to_msecs(jiffies - ee->hangcheck_timestamp));
+ err_printf(m, " engine reset count: %u\n", ee->reset_count);
error_print_request(m, " ELSP[0]: ", &ee->execlist[0]);
error_print_request(m, " ELSP[1]: ", &ee->execlist[1]);
@@ -1170,6 +1171,8 @@ static void error_record_engine_registers(struct drm_i915_error_state *error,
ee->hangcheck_timestamp = engine->hangcheck.action_timestamp;
ee->hangcheck_action = engine->hangcheck.action;
ee->hangcheck_stalled = engine->hangcheck.stalled;
+ ee->reset_count = i915_engine_reset_count(&dev_priv->gpu_error,
+ engine);
if (USES_PPGTT(dev_priv)) {
int i;
--
2.11.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx