Quoting Mika Kuoppala (2018-08-10 15:00:36) > If engine reports that it is not ready for reset, we > give up. Evidence shows that forcing a per engine reset > on an engine which is not reporting to be ready for reset, > can bring it back into a working order. There is risk that > we corrupt the context image currently executing on that > engine. But that is a risk worth taking as if we unblock > the engine, we prevent a whole device wedging in a case > of full gpu reset. > > Reset individual engine even if it reports that it is not > prepared for reset, but only if we aim for full gpu reset > and not on first reset attempt. > > v2: force reset only on later attempts, readability (Chris) > > Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/intel_uncore.c | 49 +++++++++++++++++++++++------ > 1 file changed, 39 insertions(+), 10 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c > index 027d14574bfa..d24026839b17 100644 > --- a/drivers/gpu/drm/i915/intel_uncore.c > +++ b/drivers/gpu/drm/i915/intel_uncore.c > @@ -2099,9 +2099,6 @@ static int gen8_reset_engine_start(struct intel_engine_cs *engine) > RESET_CTL_READY_TO_RESET, > 700, 0, > NULL); > - if (ret) > - DRM_ERROR("%s: reset request timeout\n", engine->name); > - > return ret; > } > > @@ -2113,6 +2110,42 @@ static void gen8_reset_engine_cancel(struct intel_engine_cs *engine) > _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET)); > } > > +static int reset_engines(struct drm_i915_private *i915, > + unsigned int engine_mask, > + unsigned int retry) > +{ > + if (INTEL_GEN(i915) >= 11) > + return gen11_reset_engines(i915, engine_mask); > + else > + return gen6_reset_engines(i915, engine_mask, retry); > +} > + > +static int gen8_prepare_engine_for_reset(struct intel_engine_cs *engine, > + unsigned int retry) > +{ > + const bool force_reset_on_non_ready = retry >= 1; > + int ret; > + > + ret = gen8_reset_engine_start(engine); > + > + if (ret && force_reset_on_non_ready) { > + /* > + * Try to unblock a single non-ready engine by risking > + * context corruption. > + */ > + ret = reset_engines(engine->i915, > + intel_engine_flag(engine), > + retry); > + if (!ret) > + ret = gen8_reset_engine_start(engine); > + > + DRM_ERROR("%s: reset request timeout, forcing reset (%d)\n", > + engine->name, ret); This looks dubious now ;) After the force you then do a reset in the caller. Twice the reset for twice the unpreparedness. > + } > + > + return ret; > +} > + > static int gen8_reset_engines(struct drm_i915_private *dev_priv, > unsigned int engine_mask, > unsigned int retry) > @@ -2122,16 +2155,12 @@ static int gen8_reset_engines(struct drm_i915_private *dev_priv, > int ret; > > for_each_engine_masked(engine, dev_priv, engine_mask, tmp) { > - if (gen8_reset_engine_start(engine)) { > - ret = -EIO; > + ret = gen8_prepare_engine_for_reset(engine, retry); > + if (ret) if (ret && !force_reset_unread) > goto not_ready; > - } > } > > - if (INTEL_GEN(dev_priv) >= 11) > - ret = gen11_reset_engines(dev_priv, engine_mask); > - else > - ret = gen6_reset_engines(dev_priv, engine_mask, retry); > + ret = reset_engines(dev_priv, engine_mask, retry); > > not_ready: > for_each_engine_masked(engine, dev_priv, engine_mask, tmp) > -- > 2.17.1 > _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx