Quoting Michel Thierry (2018-03-19 16:48:01) > On 16/03/18 14:50, Chris Wilson wrote: > > Not all callers want the GPU error to handled in the same way, so expose > > a control parameter. In the first instance, some callers do not want the > > heavyweight error capture so add a bit to request the state to be > > captured and saved. > > > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > > Cc: Jeff McGee <jeff.mcgee@xxxxxxxxx> > > Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> > > --- > > drivers/gpu/drm/i915/i915_debugfs.c | 4 ++-- > > drivers/gpu/drm/i915/i915_drv.h | 4 +++- > > drivers/gpu/drm/i915/i915_irq.c | 22 ++++++++++++++-------- > > drivers/gpu/drm/i915/intel_hangcheck.c | 6 +++--- > > drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 2 +- > > 5 files changed, 23 insertions(+), 15 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > > index 5378863e3238..03b74a92caed 100644 > > --- a/drivers/gpu/drm/i915/i915_debugfs.c > > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > > @@ -3952,8 +3952,8 @@ i915_wedged_set(void *data, u64 val) > > engine->hangcheck.stalled = true; > > } > > > > - i915_handle_error(i915, val, "Manually set wedged engine mask = %llx", > > - val); > > + i915_handle_error(i915, val, I915_ERROR_CAPTURE, > > + "Manually set wedged engine mask = %llx", val); > > > > wait_on_bit(&i915->gpu_error.flags, > > I915_RESET_HANDOFF, > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > > index e27ba8fb64e6..53009ba50640 100644 > > --- a/drivers/gpu/drm/i915/i915_drv.h > > +++ b/drivers/gpu/drm/i915/i915_drv.h > > @@ -2751,10 +2751,12 @@ static inline void i915_queue_hangcheck(struct drm_i915_private *dev_priv) > > &dev_priv->gpu_error.hangcheck_work, delay); > > } > > > > -__printf(3, 4) > > +__printf(4, 5) > > void i915_handle_error(struct drm_i915_private *dev_priv, > > u32 engine_mask, > > + unsigned long flags, > > const char *fmt, ...); > > +#define I915_ERROR_CAPTURE BIT(0) > > > Should this be in the new i915_gpu_error.h? And move it over from i915_irq.c to somewhere more useful? Probably intel_hangcheck.c since i915_gpu_error.c is conditionally compiled. > > extern void intel_irq_init(struct drm_i915_private *dev_priv); > > extern void intel_irq_fini(struct drm_i915_private *dev_priv); > > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > > index 44eef355e12c..b3a4dc7cb26c 100644 > > --- a/drivers/gpu/drm/i915/i915_irq.c > > +++ b/drivers/gpu/drm/i915/i915_irq.c > > @@ -2955,6 +2955,7 @@ static void i915_clear_error_registers(struct drm_i915_private *dev_priv) > > * i915_handle_error - handle a gpu error > > * @dev_priv: i915 device private > > * @engine_mask: mask representing engines that are hung > > + * @flags: control flags > > * @fmt: Error message format string > > * > > * Do some basic checking of register state at error time and > > @@ -2965,16 +2966,11 @@ static void i915_clear_error_registers(struct drm_i915_private *dev_priv) > > */ > > void i915_handle_error(struct drm_i915_private *dev_priv, > > u32 engine_mask, > > + unsigned long flags, > > const char *fmt, ...) > > { > > struct intel_engine_cs *engine; > > unsigned int tmp; > > - va_list args; > > - char error_msg[80]; > > - > > - va_start(args, fmt); > > - vscnprintf(error_msg, sizeof(error_msg), fmt, args); > > - va_end(args); > > > > /* > > * In most cases it's guaranteed that we get here with an RPM > > @@ -2986,8 +2982,18 @@ void i915_handle_error(struct drm_i915_private *dev_priv, > > intel_runtime_pm_get(dev_priv); > > > > engine_mask &= INTEL_INFO(dev_priv)->ring_mask; > > - i915_capture_error_state(dev_priv, engine_mask, error_msg); > > - i915_clear_error_registers(dev_priv); > > + > > + if (flags & I915_ERROR_CAPTURE) { > > + char error_msg[80]; > > + va_list args; > > + > > + va_start(args, fmt); > > + vscnprintf(error_msg, sizeof(error_msg), fmt, args); > > + va_end(args); > > + > > + i915_capture_error_state(dev_priv, engine_mask, error_msg); > > + i915_clear_error_registers(dev_priv); > > + } > else > DRM_INFO or DRM_NOTE the error_msg ? > > Otherwise the 'kicking wait/semaphore' text from below will never be > printed. I liked it disappearing ;) So we should feed it to i915_reset(const char *reason). That could then probably replace I915_RESET_QUIET. -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx