Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > Quoting Mika Kuoppala (2017-10-26 15:01:44) >> There is a possibility on gen9 hardware to miss the forcewake ack >> message. The recommended workaround is to use another free >> bit and toggle it until original bit is successfully acknowledged. >> >> The workaround is a bit misleadingly named as WaRsForcewakeAddDelayForAck. >> The reason for naming is most likely that workaround was named before >> the most recent recommendation evolved to use the reserve bit. >> >> Some future gen9 revs might or might not fix the underlying issue but >> the fallback to reserve bit dance can be considered as harmless: >> without the ack timeout we never reach the reserve bit forcewake. >> Thus as of now we adopt a blanket approach for all gen9 and leave >> the bypassing the reserve bit approach for future patches if >> corresponding hw revisions do appear. >> >> Commit 83e3337204b2 ("drm/i915: Increase maximum polling time to 50ms >> for forcewake request/clear ack") did increase the forcewake timeout. >> If the issue was a delayed ack, future work could include finding >> a suitable timeout value both for primary ack and reserve toggle >> to reduce the worst case latency. >> >> References: HSDES #1604254524 >> References: https://bugs.freedesktop.org/show_bug.cgi?id=102051 >> Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> >> Cc: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx> >> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> >> Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> >> Cc: Sagar Arun Kamble <sagar.a.kamble@xxxxxxxxx> >> Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> >> --- >> drivers/gpu/drm/i915/i915_reg.h | 5 +- >> drivers/gpu/drm/i915/intel_uncore.c | 112 +++++++++++++++++++++++++++++++++--- >> 2 files changed, 108 insertions(+), 9 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h >> index f138eae82bf0..c04c634af5ae 100644 >> --- a/drivers/gpu/drm/i915/i915_reg.h >> +++ b/drivers/gpu/drm/i915/i915_reg.h >> @@ -7774,8 +7774,9 @@ enum { >> #define FORCEWAKE_ACK_MEDIA_GEN9 _MMIO(0x0D88) >> #define FORCEWAKE_ACK_RENDER_GEN9 _MMIO(0x0D84) >> #define FORCEWAKE_ACK_BLITTER_GEN9 _MMIO(0x130044) >> -#define FORCEWAKE_KERNEL 0x1 >> -#define FORCEWAKE_USER 0x2 >> +#define FORCEWAKE_KERNEL BIT(0) >> +#define FORCEWAKE_USER BIT(1) >> +#define FORCEWAKE_RESERVE BIT(12) > > Why 12? How about 15? > I just mimiced the pseudocode from an errata sheet slavishly. AFAIK 15 should work as well as 12 so 15. > FORCEWAKE_KERNEL2 or FORCEWAKE_KERNEL_FALLBACK KERNEL2 would imply similar mechanism. Fallback I like more as this is just under the hood toggling for waking up the real bit. > > Reserved tends to imply future hw bits. So I think s/reserve/fallback/ > works throughout the patch. > Yup, fallback is better. >> #define FORCEWAKE_MT_ACK _MMIO(0x130040) >> #define ECOBUS _MMIO(0xa180) >> #define FORCEWAKE_MT_ENABLE (1<<5) >> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c >> index 20e3c65c0999..fc6d090244c5 100644 >> --- a/drivers/gpu/drm/i915/intel_uncore.c >> +++ b/drivers/gpu/drm/i915/intel_uncore.c >> @@ -69,17 +69,80 @@ fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d) >> HRTIMER_MODE_REL); >> } >> >> +static inline bool >> +wait_ack_clear(const struct drm_i915_private *i915, >> + const struct intel_uncore_forcewake_domain *d, >> + const u32 ack) >> +{ >> + return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack) == 0, >> + FORCEWAKE_ACK_TIMEOUT_MS); >> +} >> + >> +static inline bool >> +wait_ack_set(const struct drm_i915_private *i915, >> + const struct intel_uncore_forcewake_domain *d, >> + const u32 ack) >> +{ >> + return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack), >> + FORCEWAKE_ACK_TIMEOUT_MS); >> +} > > Let's make these one function, to cut down on the wait_for() expansions. > > Keeping the wait_ack_clear/wait_ack_set wrappers. > > static inline int > __wait_for_ack(i915, d, ack, value) > { > return wait_for_atomic(((__raw_i915_read32(i915, d->reg_ack) & ack)) == value, > FORCEWAKE_ACK_TIMEOUT_MS); > } > > static inline int wait_ack_clear() { return __wait_for_ack(i915, d, ack, 0)); > static inline int wait_ack_set() { return __wait_for_ack(i915, d, ack, ack)); > >> static inline void >> fw_domain_wait_ack_clear(const struct drm_i915_private *i915, >> const struct intel_uncore_forcewake_domain *d) >> { >> - if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & >> - FORCEWAKE_KERNEL) == 0, >> - FORCEWAKE_ACK_TIMEOUT_MS)) >> + if (wait_ack_clear(i915, d, FORCEWAKE_KERNEL)) >> DRM_ERROR("%s: timed out waiting for forcewake ack to clear.\n", >> intel_uncore_forcewake_domain_to_str(d->id)); >> } >> >> +enum ack_type { >> + ACK_CLEAR = 0, >> + ACK_SET >> +}; >> + >> +static bool >> +fw_domain_reserve_fallback(const struct drm_i915_private *i915, >> + const struct intel_uncore_forcewake_domain *d, >> + const enum ack_type type) >> +{ >> + bool timeout; >> + int retry = 10; >> + >> + /* Fallback to toggle another fw bit to wake up the gpu */ > > The comment needs some work; the first bit itself was meant to wake the > gpu, so why is this bit any more likely to work? > Yeah, this needs rework. It is more like toggling an unrelated bit will make the 'lost or pending' ack message for the KERNEL bit to materialize. I thought that should we also retoggle the KERNEL one but again, this patch mimics the pseudocode I found 1:1. There are hints that both delaying wait and a regtogging of the original bit was used at some point of time as a workaround. This is _supposed_ to be the latest recommendation to combat the issue. Efforts and success to reproduce would give us venue to measure if it is the best known. >> + do { >> + wait_ack_clear(i915, d, FORCEWAKE_RESERVE); >> + __raw_i915_write32(i915, d->reg_set, >> + _MASKED_BIT_ENABLE(FORCEWAKE_RESERVE)); >> + wait_ack_set(i915, d, FORCEWAKE_RESERVE); >> + >> + if (type == ACK_CLEAR) >> + timeout = wait_ack_clear(i915, d, FORCEWAKE_KERNEL); >> + else >> + timeout = wait_ack_set(i915, d, FORCEWAKE_KERNEL); >> + >> + __raw_i915_write32(i915, d->reg_set, >> + _MASKED_BIT_DISABLE(FORCEWAKE_RESERVE)); > > Blindly toggling another bit to encourage the first to succeed. > > Why stop at 1 extra bit???? Again 1:1 mimic of pseudocode. What we could do instead of this toggle hammering, is to go through all unreserved bits until one ack appears on any and be happy. > > If the wait_ack_set(RSVD) fails, you might as well abort? > For me it looks like this toggle hammering will kick the state machine to spit out the first ack message that was somehow lost. I did again follow the recomennded procedure precisely. > Other than my face hitting the desk, the logic makes sense to me: > > Assert second fw bit; check original bit; clear second fw bit (so we > don't keep fw longer than required). > > This basically also has the side-effect of making the timeout 12x > longer. > In the worst case yes. But the first toggle should already untangle the mess. Probability that we end up retrying all the way through is very unlikely. Now that I have said it, it will happen :) Thank you for the comments. -Mika >> + } while (timeout && --retry); >> + >> + return timeout; >> +} >> + >> +static inline void >> +fw_domain_wait_ack_clear_reserve(const struct drm_i915_private *i915, >> + const struct intel_uncore_forcewake_domain *d) >> +{ >> + bool timeout; > bool err; or none at all. > > timeout tends to be a value we pass around telling us how long until the > timeout; not the status, which would be timedout. > >> + >> + timeout = wait_ack_clear(i915, d, FORCEWAKE_KERNEL); >> + if (likely(!timeout)) >> + return; >> + >> + timeout = fw_domain_reserve_fallback(i915, d, ACK_CLEAR); >> + if (timeout) >> + fw_domain_wait_ack_clear(i915, d); > > Ok, one last try and raise an error if it fails. > >> +} >> + >> static inline void >> fw_domain_get(struct drm_i915_private *i915, >> const struct intel_uncore_forcewake_domain *d) >> @@ -91,14 +154,27 @@ static inline void >> fw_domain_wait_ack(const struct drm_i915_private *i915, >> const struct intel_uncore_forcewake_domain *d) >> { >> - if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & >> - FORCEWAKE_KERNEL), >> - FORCEWAKE_ACK_TIMEOUT_MS)) >> + if (wait_ack_set(i915, d, FORCEWAKE_KERNEL)) >> DRM_ERROR("%s: timed out waiting for forcewake ack request.\n", >> intel_uncore_forcewake_domain_to_str(d->id)); >> } >> >> static inline void >> +fw_domain_wait_ack_set_reserve(const struct drm_i915_private *i915, >> + const struct intel_uncore_forcewake_domain *d) >> +{ >> + bool timeout; >> + >> + timeout = wait_ack_set(i915, d, FORCEWAKE_KERNEL); >> + if (likely(!timeout)) >> + return; >> + >> + timeout = fw_domain_reserve_fallback(i915, d, ACK_SET); >> + if (timeout) >> + fw_domain_wait_ack(i915, d); >> +} >> + >> +static inline void >> fw_domain_put(const struct drm_i915_private *i915, >> const struct intel_uncore_forcewake_domain *d) >> { >> @@ -125,6 +201,26 @@ fw_domains_get(struct drm_i915_private *i915, enum forcewake_domains fw_domains) >> } >> >> static void >> +fw_domains_get_with_reserve(struct drm_i915_private *i915, >> + enum forcewake_domains fw_domains) >> +{ >> + struct intel_uncore_forcewake_domain *d; >> + unsigned int tmp; >> + >> + GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains); >> + >> + for_each_fw_domain_masked(d, fw_domains, i915, tmp) { >> + fw_domain_wait_ack_clear_reserve(i915, d); >> + fw_domain_get(i915, d); >> + } >> + >> + for_each_fw_domain_masked(d, fw_domains, i915, tmp) >> + fw_domain_wait_ack_set_reserve(i915, d); >> + >> + i915->uncore.fw_domains_active |= fw_domains; > > Ok. > >> +} >> + >> +static void >> fw_domains_put(struct drm_i915_private *i915, enum forcewake_domains fw_domains) >> { >> struct intel_uncore_forcewake_domain *d; >> @@ -1142,7 +1238,9 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) >> } >> >> if (INTEL_GEN(dev_priv) >= 9) { >> - dev_priv->uncore.funcs.force_wake_get = fw_domains_get; >> + /* WaRsForcewakeAddDelayForAck:skl,bxt,kbl,glk,cfl,cnl */ >> + dev_priv->uncore.funcs.force_wake_get = >> + fw_domains_get_with_reserve; >> dev_priv->uncore.funcs.force_wake_put = fw_domains_put; >> fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, >> FORCEWAKE_RENDER_GEN9, >> -- >> 2.11.0 >> _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx