Quoting Mika Kuoppala (2017-10-26 15:01:44) > There is a possibility on gen9 hardware to miss the forcewake ack > message. The recommended workaround is to use another free > bit and toggle it until original bit is successfully acknowledged. > > The workaround is a bit misleadingly named as WaRsForcewakeAddDelayForAck. > The reason for naming is most likely that workaround was named before > the most recent recommendation evolved to use the reserve bit. > > Some future gen9 revs might or might not fix the underlying issue but > the fallback to reserve bit dance can be considered as harmless: > without the ack timeout we never reach the reserve bit forcewake. > Thus as of now we adopt a blanket approach for all gen9 and leave > the bypassing the reserve bit approach for future patches if > corresponding hw revisions do appear. > > Commit 83e3337204b2 ("drm/i915: Increase maximum polling time to 50ms > for forcewake request/clear ack") did increase the forcewake timeout. > If the issue was a delayed ack, future work could include finding > a suitable timeout value both for primary ack and reserve toggle > to reduce the worst case latency. > > References: HSDES #1604254524 > References: https://bugs.freedesktop.org/show_bug.cgi?id=102051 > Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx> > Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> > Cc: Sagar Arun Kamble <sagar.a.kamble@xxxxxxxxx> > Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_reg.h | 5 +- > drivers/gpu/drm/i915/intel_uncore.c | 112 +++++++++++++++++++++++++++++++++--- > 2 files changed, 108 insertions(+), 9 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index f138eae82bf0..c04c634af5ae 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -7774,8 +7774,9 @@ enum { > #define FORCEWAKE_ACK_MEDIA_GEN9 _MMIO(0x0D88) > #define FORCEWAKE_ACK_RENDER_GEN9 _MMIO(0x0D84) > #define FORCEWAKE_ACK_BLITTER_GEN9 _MMIO(0x130044) > -#define FORCEWAKE_KERNEL 0x1 > -#define FORCEWAKE_USER 0x2 > +#define FORCEWAKE_KERNEL BIT(0) > +#define FORCEWAKE_USER BIT(1) > +#define FORCEWAKE_RESERVE BIT(12) Why 12? How about 15? FORCEWAKE_KERNEL2 or FORCEWAKE_KERNEL_FALLBACK Reserved tends to imply future hw bits. So I think s/reserve/fallback/ works throughout the patch. > #define FORCEWAKE_MT_ACK _MMIO(0x130040) > #define ECOBUS _MMIO(0xa180) > #define FORCEWAKE_MT_ENABLE (1<<5) > diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c > index 20e3c65c0999..fc6d090244c5 100644 > --- a/drivers/gpu/drm/i915/intel_uncore.c > +++ b/drivers/gpu/drm/i915/intel_uncore.c > @@ -69,17 +69,80 @@ fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d) > HRTIMER_MODE_REL); > } > > +static inline bool > +wait_ack_clear(const struct drm_i915_private *i915, > + const struct intel_uncore_forcewake_domain *d, > + const u32 ack) > +{ > + return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack) == 0, > + FORCEWAKE_ACK_TIMEOUT_MS); > +} > + > +static inline bool > +wait_ack_set(const struct drm_i915_private *i915, > + const struct intel_uncore_forcewake_domain *d, > + const u32 ack) > +{ > + return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack), > + FORCEWAKE_ACK_TIMEOUT_MS); > +} Let's make these one function, to cut down on the wait_for() expansions. Keeping the wait_ack_clear/wait_ack_set wrappers. static inline int __wait_for_ack(i915, d, ack, value) { return wait_for_atomic(((__raw_i915_read32(i915, d->reg_ack) & ack)) == value, FORCEWAKE_ACK_TIMEOUT_MS); } static inline int wait_ack_clear() { return __wait_for_ack(i915, d, ack, 0)); static inline int wait_ack_set() { return __wait_for_ack(i915, d, ack, ack)); > static inline void > fw_domain_wait_ack_clear(const struct drm_i915_private *i915, > const struct intel_uncore_forcewake_domain *d) > { > - if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & > - FORCEWAKE_KERNEL) == 0, > - FORCEWAKE_ACK_TIMEOUT_MS)) > + if (wait_ack_clear(i915, d, FORCEWAKE_KERNEL)) > DRM_ERROR("%s: timed out waiting for forcewake ack to clear.\n", > intel_uncore_forcewake_domain_to_str(d->id)); > } > > +enum ack_type { > + ACK_CLEAR = 0, > + ACK_SET > +}; > + > +static bool > +fw_domain_reserve_fallback(const struct drm_i915_private *i915, > + const struct intel_uncore_forcewake_domain *d, > + const enum ack_type type) > +{ > + bool timeout; > + int retry = 10; > + > + /* Fallback to toggle another fw bit to wake up the gpu */ The comment needs some work; the first bit itself was meant to wake the gpu, so why is this bit any more likely to work? > + do { > + wait_ack_clear(i915, d, FORCEWAKE_RESERVE); > + __raw_i915_write32(i915, d->reg_set, > + _MASKED_BIT_ENABLE(FORCEWAKE_RESERVE)); > + wait_ack_set(i915, d, FORCEWAKE_RESERVE); > + > + if (type == ACK_CLEAR) > + timeout = wait_ack_clear(i915, d, FORCEWAKE_KERNEL); > + else > + timeout = wait_ack_set(i915, d, FORCEWAKE_KERNEL); > + > + __raw_i915_write32(i915, d->reg_set, > + _MASKED_BIT_DISABLE(FORCEWAKE_RESERVE)); Blindly toggling another bit to encourage the first to succeed. Why stop at 1 extra bit???? If the wait_ack_set(RSVD) fails, you might as well abort? Other than my face hitting the desk, the logic makes sense to me: Assert second fw bit; check original bit; clear second fw bit (so we don't keep fw longer than required). This basically also has the side-effect of making the timeout 12x longer. > + } while (timeout && --retry); > + > + return timeout; > +} > + > +static inline void > +fw_domain_wait_ack_clear_reserve(const struct drm_i915_private *i915, > + const struct intel_uncore_forcewake_domain *d) > +{ > + bool timeout; bool err; or none at all. timeout tends to be a value we pass around telling us how long until the timeout; not the status, which would be timedout. > + > + timeout = wait_ack_clear(i915, d, FORCEWAKE_KERNEL); > + if (likely(!timeout)) > + return; > + > + timeout = fw_domain_reserve_fallback(i915, d, ACK_CLEAR); > + if (timeout) > + fw_domain_wait_ack_clear(i915, d); Ok, one last try and raise an error if it fails. > +} > + > static inline void > fw_domain_get(struct drm_i915_private *i915, > const struct intel_uncore_forcewake_domain *d) > @@ -91,14 +154,27 @@ static inline void > fw_domain_wait_ack(const struct drm_i915_private *i915, > const struct intel_uncore_forcewake_domain *d) > { > - if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & > - FORCEWAKE_KERNEL), > - FORCEWAKE_ACK_TIMEOUT_MS)) > + if (wait_ack_set(i915, d, FORCEWAKE_KERNEL)) > DRM_ERROR("%s: timed out waiting for forcewake ack request.\n", > intel_uncore_forcewake_domain_to_str(d->id)); > } > > static inline void > +fw_domain_wait_ack_set_reserve(const struct drm_i915_private *i915, > + const struct intel_uncore_forcewake_domain *d) > +{ > + bool timeout; > + > + timeout = wait_ack_set(i915, d, FORCEWAKE_KERNEL); > + if (likely(!timeout)) > + return; > + > + timeout = fw_domain_reserve_fallback(i915, d, ACK_SET); > + if (timeout) > + fw_domain_wait_ack(i915, d); > +} > + > +static inline void > fw_domain_put(const struct drm_i915_private *i915, > const struct intel_uncore_forcewake_domain *d) > { > @@ -125,6 +201,26 @@ fw_domains_get(struct drm_i915_private *i915, enum forcewake_domains fw_domains) > } > > static void > +fw_domains_get_with_reserve(struct drm_i915_private *i915, > + enum forcewake_domains fw_domains) > +{ > + struct intel_uncore_forcewake_domain *d; > + unsigned int tmp; > + > + GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains); > + > + for_each_fw_domain_masked(d, fw_domains, i915, tmp) { > + fw_domain_wait_ack_clear_reserve(i915, d); > + fw_domain_get(i915, d); > + } > + > + for_each_fw_domain_masked(d, fw_domains, i915, tmp) > + fw_domain_wait_ack_set_reserve(i915, d); > + > + i915->uncore.fw_domains_active |= fw_domains; Ok. > +} > + > +static void > fw_domains_put(struct drm_i915_private *i915, enum forcewake_domains fw_domains) > { > struct intel_uncore_forcewake_domain *d; > @@ -1142,7 +1238,9 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) > } > > if (INTEL_GEN(dev_priv) >= 9) { > - dev_priv->uncore.funcs.force_wake_get = fw_domains_get; > + /* WaRsForcewakeAddDelayForAck:skl,bxt,kbl,glk,cfl,cnl */ > + dev_priv->uncore.funcs.force_wake_get = > + fw_domains_get_with_reserve; > dev_priv->uncore.funcs.force_wake_put = fw_domains_put; > fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, > FORCEWAKE_RENDER_GEN9, > -- > 2.11.0 > _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx