Quoting Carlos Santa (2019-02-14 02:57:09) > diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c > index 58b6ff8453dc..bc10acb24d9a 100644 > --- a/drivers/gpu/drm/i915/intel_hangcheck.c > +++ b/drivers/gpu/drm/i915/intel_hangcheck.c > @@ -218,7 +218,8 @@ static void hangcheck_accumulate_sample(struct intel_engine_cs *engine, > > static void hangcheck_declare_hang(struct drm_i915_private *i915, > unsigned int hung, > - unsigned int stuck) > + unsigned int stuck, > + unsigned int watchdog) Absolutely not. > +/* From GEN9 onwards, all engines use the same RING_CNTR format */ > +static inline u32 get_watchdog_disable(struct intel_engine_cs *engine) > +{ > + if (engine->id == RCS || INTEL_GEN(engine->i915) >= 9) > + return GEN8_WATCHDOG_DISABLE; > + else > + return GEN8_XCS_WATCHDOG_DISABLE; > +} > + > +#define GEN8_WATCHDOG_1000US(dev_priv) watchdog_to_clock_counts(dev_priv, 1000) > +static void gen8_watchdog_irq_handler(unsigned long data) > +{ > + struct intel_engine_cs *engine = (struct intel_engine_cs *)data; > + struct drm_i915_private *dev_priv = engine->i915; > + enum forcewake_domains fw_domains; > + u32 current_seqno; > + > + switch (engine->class) { > + default: > + MISSING_CASE(engine->id); > + /* fall through */ > + case RENDER_CLASS: > + fw_domains = FORCEWAKE_RENDER; > + break; > + case VIDEO_DECODE_CLASS: > + case VIDEO_ENHANCEMENT_CLASS: > + fw_domains = FORCEWAKE_MEDIA; > + break; > + } > + > + intel_uncore_forcewake_get(dev_priv, fw_domains); Doesn't scale well down into the 1ms response range. > + /* Stop the counter to prevent further timeout interrupts */ > + I915_WRITE_FW(RING_CNTR(engine->mmio_base), get_watchdog_disable(engine)); A bit late? About 200ms or more may have passed since the interrupt, so what's the point? > + current_seqno = intel_engine_get_hangcheck_seqno(engine); Doesn't exist. Only may exist as a temporary hack. If we need the seqno counter, include it with the watchdog LRI. > + /* did the request complete after the timer expired? */ > + if (engine->hangcheck.next_seqno == current_seqno) > + goto fw_put; > + > + if (engine->hangcheck.watchdog == current_seqno) { > + /* Make sure the active request will be marked as guilty */ > + engine->hangcheck.acthd = intel_engine_get_active_head(engine); > + engine->hangcheck.last_seqno = current_seqno; Incorrect even for the current code. And as I keep repeating the design is for the engine reset to be processed immediately. > + /* And try to run the hangcheck_work as soon as possible */ > + set_bit(I915_RESET_WATCHDOG, &dev_priv->gpu_error.flags); > + queue_delayed_work(system_long_wq, > + &dev_priv->gpu_error.hangcheck_work, > + round_jiffies_up_relative(HZ)); > + } else { > + engine->hangcheck.watchdog = current_seqno; > + /* Re-start the counter, if really hung, it will expire again */ > + I915_WRITE_FW(RING_THRESH(engine->mmio_base), > + GEN8_WATCHDOG_1000US(dev_priv)); > + I915_WRITE_FW(RING_CNTR(engine->mmio_base), GEN8_WATCHDOG_ENABLE); > + } > + > +fw_put: > + intel_uncore_forcewake_put(dev_priv, fw_domains); > +} _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx