On dGfx, the PL1 power limit being enabled and set to a low value results in a low GPU operating freq. It also negates the freq raise operation which is done before GuC firmware load. As a result GuC firmware load can time out. Such timeouts were seen in the GL #8062 bug below (where the PL1 power limit was enabled and set to a low value). Therefore disable the PL1 power limit when allowed by HW when loading GuC firmware. v2: - Take mutex (to disallow writes to power1_max) across GuC reset/fw load - Add hwm_power_max_restore to error return code path v3 (Jani N): - Add/remove explanatory comments - Function renames - Type corrections - Locking annotation Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062 Signed-off-by: Ashutosh Dixit <ashutosh.dixit@xxxxxxxxx> --- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 9 +++++++ drivers/gpu/drm/i915/i915_hwmon.c | 39 +++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_hwmon.h | 7 +++++ 3 files changed, 55 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 4ccb4be4c9cba..aa8e35a5636a0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -18,6 +18,7 @@ #include "intel_uc.h" #include "i915_drv.h" +#include "i915_hwmon.h" static const struct intel_uc_ops uc_ops_off; static const struct intel_uc_ops uc_ops_on; @@ -461,6 +462,7 @@ static int __uc_init_hw(struct intel_uc *uc) struct intel_guc *guc = &uc->guc; struct intel_huc *huc = &uc->huc; int ret, attempts; + bool pl1en; GEM_BUG_ON(!intel_uc_supports_guc(uc)); GEM_BUG_ON(!intel_uc_wants_guc(uc)); @@ -491,6 +493,9 @@ static int __uc_init_hw(struct intel_uc *uc) else attempts = 1; + /* Disable a potentially low PL1 power limit to allow freq to be raised */ + i915_hwmon_power_max_disable(gt->i915, &pl1en); + intel_rps_raise_unslice(&uc_to_gt(uc)->rps); while (attempts--) { @@ -547,6 +552,8 @@ static int __uc_init_hw(struct intel_uc *uc) intel_rps_lower_unslice(&uc_to_gt(uc)->rps); } + i915_hwmon_power_max_restore(gt->i915, pl1en); + guc_info(guc, "submission %s\n", str_enabled_disabled(intel_uc_uses_guc_submission(uc))); guc_info(guc, "SLPC %s\n", str_enabled_disabled(intel_uc_uses_guc_slpc(uc))); @@ -563,6 +570,8 @@ static int __uc_init_hw(struct intel_uc *uc) /* Return GT back to RPn */ intel_rps_lower_unslice(&uc_to_gt(uc)->rps); + i915_hwmon_power_max_restore(gt->i915, pl1en); + __uc_sanitize(uc); if (!ret) { diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index ee63a8fd88fc1..769b5bda4d53f 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -444,6 +444,45 @@ hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int chan, long val) } } +void i915_hwmon_power_max_disable(struct drm_i915_private *i915, bool *old) + __acquires(i915->hwmon->hwmon_lock) +{ + struct i915_hwmon *hwmon = i915->hwmon; + intel_wakeref_t wakeref; + u32 r; + + if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) + return; + + /* Take mutex to prevent concurrent hwm_power_max_write */ + mutex_lock(&hwmon->hwmon_lock); + + with_intel_runtime_pm(hwmon->ddat.uncore->rpm, wakeref) + r = intel_uncore_rmw(hwmon->ddat.uncore, + hwmon->rg.pkg_rapl_limit, + PKG_PWR_LIM_1_EN, 0); + + *old = !!(r & PKG_PWR_LIM_1_EN); +} + +void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old) + __releases(i915->hwmon->hwmon_lock) +{ + struct i915_hwmon *hwmon = i915->hwmon; + intel_wakeref_t wakeref; + + if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) + return; + + with_intel_runtime_pm(hwmon->ddat.uncore->rpm, wakeref) + intel_uncore_rmw(hwmon->ddat.uncore, + hwmon->rg.pkg_rapl_limit, + PKG_PWR_LIM_1_EN, + old ? PKG_PWR_LIM_1_EN : 0); + + mutex_unlock(&hwmon->hwmon_lock); +} + static umode_t hwm_energy_is_visible(const struct hwm_drvdata *ddat, u32 attr) { diff --git a/drivers/gpu/drm/i915/i915_hwmon.h b/drivers/gpu/drm/i915/i915_hwmon.h index 7ca9cf2c34c96..0fcb7de844061 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.h +++ b/drivers/gpu/drm/i915/i915_hwmon.h @@ -7,14 +7,21 @@ #ifndef __I915_HWMON_H__ #define __I915_HWMON_H__ +#include <linux/types.h> + struct drm_i915_private; +struct intel_gt; #if IS_REACHABLE(CONFIG_HWMON) void i915_hwmon_register(struct drm_i915_private *i915); void i915_hwmon_unregister(struct drm_i915_private *i915); +void i915_hwmon_power_max_disable(struct drm_i915_private *i915, bool *old); +void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old); #else static inline void i915_hwmon_register(struct drm_i915_private *i915) { }; static inline void i915_hwmon_unregister(struct drm_i915_private *i915) { }; +static inline void i915_hwmon_power_max_disable(struct drm_i915_private *i915, bool *old) { }; +static inline void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old) { }; #endif #endif /* __I915_HWMON_H__ */ -- 2.38.0