Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > Some machines, *cough* snb *cough*, fail catastrophically if asked to > reset the GPU under certain conditions. The initial guess is that this > is when the rings are still busy at the time of the reset request > (because that's a pattern we've seen elsewhere, hence why we do try > gen3_stop_engines() before reset) so abandon the reset and leave the > device wedged, if gen3_stop_engines() fails. > > v2: Only give up if not idle after emptying the ring. > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103240 > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/intel_uncore.c | 43 ++++++++++++++++++++++++++++--------- > 1 file changed, 33 insertions(+), 10 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c > index 96ee6b2754be..f80dbff3595f 100644 > --- a/drivers/gpu/drm/i915/intel_uncore.c > +++ b/drivers/gpu/drm/i915/intel_uncore.c > @@ -1372,7 +1372,7 @@ int i915_reg_read_ioctl(struct drm_device *dev, > return ret; > } > > -static void gen3_stop_engine(struct intel_engine_cs *engine) > +static bool gen3_stop_engine(struct intel_engine_cs *engine) > { > struct drm_i915_private *dev_priv = engine->i915; > const u32 base = engine->mmio_base; > @@ -1392,26 +1392,46 @@ static void gen3_stop_engine(struct intel_engine_cs *engine) > I915_WRITE_FW(RING_HEAD(base), 0); > I915_WRITE_FW(RING_TAIL(base), 0); > > + /* Check acts as a post */ > + if (intel_wait_for_register_fw(dev_priv, > + mode, > + MODE_IDLE, > + MODE_IDLE, > + 1000)) { > + DRM_DEBUG_DRIVER("%s: timed out after clearing ring\n", > + engine->name); > + return false; > + } > + I recall that this bailout was the reason I didn't want to use the stop_ring in intel_ringbuffer.c Now if you choose to reintroduce the bailout, I think you can make a generic stop_engine and get rid of the copy. -Mika > /* The ring must be empty before it is disabled */ > I915_WRITE_FW(RING_CTL(base), 0); > + POSTING_READ_FW(RING_CTL(base)); > > - /* Check acts as a post */ > - if (I915_READ_FW(RING_HEAD(base)) != 0) > - DRM_DEBUG_DRIVER("%s: ring head not parked\n", > - engine->name); > + return true; > } > > -static void i915_stop_engines(struct drm_i915_private *dev_priv, > - unsigned engine_mask) > +static int i915_stop_engines(struct drm_i915_private *dev_priv, > + unsigned engine_mask) > { > struct intel_engine_cs *engine; > enum intel_engine_id id; > + bool idle; > > if (INTEL_GEN(dev_priv) < 3) > - return; > + return true; > > + idle = true; > for_each_engine_masked(engine, dev_priv, engine_mask, id) > - gen3_stop_engine(engine); > + idle &= gen3_stop_engine(engine); > + if (idle) > + return true; > + > + dev_err(dev_priv->drm.dev, "Failed to stop all engines\n"); > + for_each_engine_masked(engine, dev_priv, engine_mask, id) { > + struct drm_printer p = drm_debug_printer(__func__); > + intel_engine_dump(engine, &p); > + } > + return false; > } > > static bool i915_reset_complete(struct pci_dev *pdev) > @@ -1772,7 +1792,10 @@ int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) > * > * FIXME: Wa for more modern gens needs to be validated > */ > - i915_stop_engines(dev_priv, engine_mask); > + if (!i915_stop_engines(dev_priv, engine_mask)) { > + ret = -EIO; > + break; > + } > > ret = -ENODEV; > if (reset) > -- > 2.15.0.rc2 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx