On 09/03/2023 03:46, Ashutosh Dixit wrote:
Expose intel_rps_read_actual_frequency_fw to read the actual freq without
taking forcewake for use by PMU. The code is refactored to use a common set
of functions across sysfs and PMU. Using common functions with sysfs in PMU
solves the issues of missing support for MTL and missing support for older
generations (prior to Gen6). It also future proofs the PMU where sometimes
code has been updated for sysfs and PMU has been missed.
Fixes: 22009b6dad66 ("drm/i915/mtl: Modify CAGF functions for MTL")
So not DG1 and above?
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8280
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@xxxxxxxxx>
---
drivers/gpu/drm/i915/gt/intel_rps.c | 46 +++++++++++++++++++----------
drivers/gpu/drm/i915/gt/intel_rps.h | 2 +-
drivers/gpu/drm/i915/i915_pmu.c | 10 +++----
3 files changed, 36 insertions(+), 22 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index 4d0dc9de23f9..3957c5ee5cba 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -2046,16 +2046,6 @@ void intel_rps_sanitize(struct intel_rps *rps)
rps_disable_interrupts(rps);
}
-u32 intel_rps_read_rpstat_fw(struct intel_rps *rps)
-{
- struct drm_i915_private *i915 = rps_to_i915(rps);
- i915_reg_t rpstat;
-
- rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1;
-
- return intel_uncore_read_fw(rps_to_gt(rps)->uncore, rpstat);
-}
-
u32 intel_rps_read_rpstat(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
@@ -2089,10 +2079,11 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
return cagf;
}
-static u32 read_cagf(struct intel_rps *rps)
+static u32 __read_cagf(struct intel_rps *rps, bool take_fw)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
struct intel_uncore *uncore = rps_to_uncore(rps);
+ i915_reg_t r = INVALID_MMIO_REG;
u32 freq;
/*
@@ -2100,22 +2091,30 @@ static u32 read_cagf(struct intel_rps *rps)
* registers will return 0 freq when GT is in RC6
*/
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
- freq = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1);
+ r = MTL_MIRROR_TARGET_WP1;
} else if (GRAPHICS_VER(i915) >= 12) {
- freq = intel_uncore_read(uncore, GEN12_RPSTAT1);
+ r = GEN12_RPSTAT1;
} else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
vlv_punit_get(i915);
freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
vlv_punit_put(i915);
+ goto exit;
} else if (GRAPHICS_VER(i915) >= 6) {
- freq = intel_uncore_read(uncore, GEN6_RPSTAT1);
+ r = GEN6_RPSTAT1;
} else {
- freq = intel_uncore_read(uncore, MEMSTAT_ILK);
+ r = MEMSTAT_ILK;
}
+ freq = take_fw ? intel_uncore_read(uncore, r) : intel_uncore_read_fw(uncore, r);
+exit:
return intel_rps_get_cagf(rps, freq);
}
+static u32 read_cagf(struct intel_rps *rps)
+{
+ return __read_cagf(rps, true);
+}
+
u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
{
struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
@@ -2128,6 +2127,23 @@ u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
return freq;
}
+static u32 read_cagf_fw(struct intel_rps *rps)
+{
+ return __read_cagf(rps, false);
+}
+
+u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps)
+{
+ struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
+ intel_wakeref_t wakeref;
+ u32 freq = 0;
+
+ with_intel_runtime_pm_if_in_use(rpm, wakeref)
When called from i915_pmu.c::frequency sample() above seems redundant
since there we already are under intel_gt_pm_get_if_awake. Perhaps it is
not a huge deal but it is nevertheless wasteful.
Also, maybe I am a bit rusty, but more fundamentally, wouldn't this be
adding a _very_ atypical pattern of a _fw function which grabs rpm? I'd
expect they all assume it's already held since the forcewake is already
held.
Am I missing the reason why it is needed?
Regards,
Tvrtko
+ freq = intel_gpu_freq(rps, read_cagf_fw(rps));
+
+ return freq;
+}
+
u32 intel_rps_read_punit_req(struct intel_rps *rps)
{
struct intel_uncore *uncore = rps_to_uncore(rps);
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h
index c622962c6bef..2d5b3ef58606 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps.h
@@ -39,6 +39,7 @@ int intel_gpu_freq(struct intel_rps *rps, int val);
int intel_freq_opcode(struct intel_rps *rps, int val);
u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1);
u32 intel_rps_read_actual_frequency(struct intel_rps *rps);
+u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps);
u32 intel_rps_get_requested_frequency(struct intel_rps *rps);
u32 intel_rps_get_min_frequency(struct intel_rps *rps);
u32 intel_rps_get_min_raw_freq(struct intel_rps *rps);
@@ -52,7 +53,6 @@ u32 intel_rps_get_rpn_frequency(struct intel_rps *rps);
u32 intel_rps_read_punit_req(struct intel_rps *rps);
u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps);
u32 intel_rps_read_rpstat(struct intel_rps *rps);
-u32 intel_rps_read_rpstat_fw(struct intel_rps *rps);
void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps);
void intel_rps_raise_unslice(struct intel_rps *rps);
void intel_rps_lower_unslice(struct intel_rps *rps);
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index a76c5ce9513d..7ece883a7d95 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -392,14 +392,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
* case we assume the system is running at the intended
* frequency. Fortunately, the read should rarely fail!
*/
- val = intel_rps_read_rpstat_fw(rps);
- if (val)
- val = intel_rps_get_cagf(rps, val);
- else
- val = rps->cur_freq;
+ val = intel_rps_read_actual_frequency_fw(rps);
+ if (!val)
+ val = intel_gpu_freq(rps, rps->cur_freq);
add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
- intel_gpu_freq(rps, val), period_ns / 1000);
+ val, period_ns / 1000);
}
if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) {