On Wed, Apr 05, 2023 at 09:45:21PM -0700, Ashutosh Dixit wrote: > On dGfx, the PL1 power limit being enabled and set to a low value results > in a low GPU operating freq. It also negates the freq raise operation which > is done before GuC firmware load. As a result GuC firmware load can time > out. Such timeouts were seen in the GL #8062 bug below (where the PL1 power > limit was enabled and set to a low value). Therefore disable the PL1 power > limit when allowed by HW when loading GuC firmware. > > v2: > - Take mutex (to disallow writes to power1_max) across GuC reset/fw load > - Add hwm_power_max_restore to error return code path > > v3 (Jani N): > - Add/remove explanatory comments > - Function renames > - Type corrections > - Locking annotation > > v4: > - Don't hold the lock across GuC reset (Rodrigo) > - New locking scheme (suggested by Rodrigo) > - Eliminate rpm_get in power_max_disable/restore, not needed (Tvrtko) > > Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062 > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@xxxxxxxxx> > --- > drivers/gpu/drm/i915/gt/uc/intel_uc.c | 9 ++++++ > drivers/gpu/drm/i915/i915_hwmon.c | 40 +++++++++++++++++++++++++++ > drivers/gpu/drm/i915/i915_hwmon.h | 7 +++++ > 3 files changed, 56 insertions(+) > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c > index 4ccb4be4c9cba..aa8e35a5636a0 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c > @@ -18,6 +18,7 @@ > #include "intel_uc.h" > > #include "i915_drv.h" > +#include "i915_hwmon.h" > > static const struct intel_uc_ops uc_ops_off; > static const struct intel_uc_ops uc_ops_on; > @@ -461,6 +462,7 @@ static int __uc_init_hw(struct intel_uc *uc) > struct intel_guc *guc = &uc->guc; > struct intel_huc *huc = &uc->huc; > int ret, attempts; > + bool pl1en; we need to initialize this to make warn free builds happy... what's our default btw? false? true? we need to read it back? > > GEM_BUG_ON(!intel_uc_supports_guc(uc)); > GEM_BUG_ON(!intel_uc_wants_guc(uc)); > @@ -491,6 +493,9 @@ static int __uc_init_hw(struct intel_uc *uc) > else > attempts = 1; > > + /* Disable a potentially low PL1 power limit to allow freq to be raised */ > + i915_hwmon_power_max_disable(gt->i915, &pl1en); > + > intel_rps_raise_unslice(&uc_to_gt(uc)->rps); > > while (attempts--) { > @@ -547,6 +552,8 @@ static int __uc_init_hw(struct intel_uc *uc) > intel_rps_lower_unslice(&uc_to_gt(uc)->rps); > } > > + i915_hwmon_power_max_restore(gt->i915, pl1en); > + > guc_info(guc, "submission %s\n", str_enabled_disabled(intel_uc_uses_guc_submission(uc))); > guc_info(guc, "SLPC %s\n", str_enabled_disabled(intel_uc_uses_guc_slpc(uc))); > > @@ -563,6 +570,8 @@ static int __uc_init_hw(struct intel_uc *uc) > /* Return GT back to RPn */ > intel_rps_lower_unslice(&uc_to_gt(uc)->rps); > > + i915_hwmon_power_max_restore(gt->i915, pl1en); > + > __uc_sanitize(uc); > > if (!ret) { > diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c > index 7f44e809ca155..9ab8971679fe3 100644 > --- a/drivers/gpu/drm/i915/i915_hwmon.c > +++ b/drivers/gpu/drm/i915/i915_hwmon.c > @@ -50,6 +50,7 @@ struct hwm_drvdata { > struct hwm_energy_info ei; /* Energy info for energy1_input */ > char name[12]; > int gt_n; > + bool reset_in_progress; > }; > > struct i915_hwmon { > @@ -400,6 +401,10 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) > u32 nval; > > mutex_lock(&hwmon->hwmon_lock); > + if (hwmon->ddat.reset_in_progress) { > + ret = -EAGAIN; > + goto unlock; > + } > wakeref = intel_runtime_pm_get(ddat->uncore->rpm); > > /* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */ > @@ -421,6 +426,7 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) > PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval); > exit: > intel_runtime_pm_put(ddat->uncore->rpm, wakeref); > +unlock: > mutex_unlock(&hwmon->hwmon_lock); > return ret; > } > @@ -472,6 +478,40 @@ hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int chan, long val) > } > } > > +void i915_hwmon_power_max_disable(struct drm_i915_private *i915, bool *old) > +{ > + struct i915_hwmon *hwmon = i915->hwmon; > + u32 r; > + > + if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) > + return; > + > + mutex_lock(&hwmon->hwmon_lock); > + > + hwmon->ddat.reset_in_progress = true; > + r = intel_uncore_rmw(hwmon->ddat.uncore, hwmon->rg.pkg_rapl_limit, > + PKG_PWR_LIM_1_EN, 0); > + *old = !!(r & PKG_PWR_LIM_1_EN); > + > + mutex_unlock(&hwmon->hwmon_lock); > +} > + > +void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old) > +{ > + struct i915_hwmon *hwmon = i915->hwmon; > + > + if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) > + return; > + > + mutex_lock(&hwmon->hwmon_lock); > + > + intel_uncore_rmw(hwmon->ddat.uncore, hwmon->rg.pkg_rapl_limit, > + PKG_PWR_LIM_1_EN, old ? PKG_PWR_LIM_1_EN : 0); > + hwmon->ddat.reset_in_progress = false; > + > + mutex_unlock(&hwmon->hwmon_lock); > +} you could have combined both functions in a i915_hwmon_power_max_set(struct drm_i915_private *i915, bool val, bool *old) then pass NULL to old on the restoration times and have if (old) *old = !!(r & PKG_PWR_LIM_1_EN); But really up to you here, the current code is clear to follow imho so, with the pl1en initialization fixed: Reviewed-by: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx> > + > static umode_t > hwm_energy_is_visible(const struct hwm_drvdata *ddat, u32 attr) > { > diff --git a/drivers/gpu/drm/i915/i915_hwmon.h b/drivers/gpu/drm/i915/i915_hwmon.h > index 7ca9cf2c34c96..0fcb7de844061 100644 > --- a/drivers/gpu/drm/i915/i915_hwmon.h > +++ b/drivers/gpu/drm/i915/i915_hwmon.h > @@ -7,14 +7,21 @@ > #ifndef __I915_HWMON_H__ > #define __I915_HWMON_H__ > > +#include <linux/types.h> > + > struct drm_i915_private; > +struct intel_gt; > > #if IS_REACHABLE(CONFIG_HWMON) > void i915_hwmon_register(struct drm_i915_private *i915); > void i915_hwmon_unregister(struct drm_i915_private *i915); > +void i915_hwmon_power_max_disable(struct drm_i915_private *i915, bool *old); > +void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old); > #else > static inline void i915_hwmon_register(struct drm_i915_private *i915) { }; > static inline void i915_hwmon_unregister(struct drm_i915_private *i915) { }; > +static inline void i915_hwmon_power_max_disable(struct drm_i915_private *i915, bool *old) { }; > +static inline void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old) { }; > #endif > > #endif /* __I915_HWMON_H__ */ > -- > 2.38.0 >