ARM cores which implement the Activity Monitor Unit (AMU) use Functional Fixed Hardware (FFH) to map AMU counters to Delivered_Counter and Reference_Counter registers. Each sysreg is read separately with a smp_call_function_single call. So, total four IPI's are used, one per register. Due to this, the AMU's core counter and constant counter sampling can happen at a non-consistent time interval if an IPI is handled late. This results in unstable frequency value from "cpuinfo_cur_req" node sometimes. To fix, queue work on target CPU to read all counters synchronously in sequence. This helps to remove the inter-IPI latency and make sure that both the counters are sampled at a close time interval. Without this change we observed that the re-generated value of CPU Frequency from AMU counters sometimes deviates by ~25% as the counters are read at non-determenistic time. Currently, kept the change specific to Tegra241. It can be applied to other SoC's having AMU if same issue is observed. Signed-off-by: Sumit Gupta <sumitg@xxxxxxxxxx> --- drivers/cpufreq/cppc_cpufreq.c | 53 +++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 5e6a132a525e..52b93ac6225e 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -46,6 +46,8 @@ static bool boost_supported; /* default 2usec delay between sampling */ static unsigned int sampling_delay_us = 2; +static bool get_rate_use_wq; + static void cppc_check_hisi_workaround(void); static void cppc_nvidia_workaround(void); @@ -99,6 +101,12 @@ struct cppc_freq_invariance { static DEFINE_PER_CPU(struct cppc_freq_invariance, cppc_freq_inv); static struct kthread_worker *kworker_fie; +struct feedback_ctrs { + u32 cpu; + struct cppc_perf_fb_ctrs fb_ctrs_t0; + struct cppc_perf_fb_ctrs fb_ctrs_t1; +}; + static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu); static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data, struct cppc_perf_fb_ctrs *fb_ctrs_t0, @@ -851,28 +859,44 @@ static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data, return (reference_perf * delta_delivered) / delta_reference; } +static int cppc_get_perf_ctrs_sync(void *fb_ctrs) +{ + struct feedback_ctrs *ctrs = fb_ctrs; + int ret; + + ret = cppc_get_perf_ctrs(ctrs->cpu, &(ctrs->fb_ctrs_t0)); + if (ret) + return ret; + + udelay(sampling_delay_us); + + ret = cppc_get_perf_ctrs(ctrs->cpu, &(ctrs->fb_ctrs_t1)); + if (ret) + return ret; + + return ret; +} + static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) { - struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0}; struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); struct cppc_cpudata *cpu_data = policy->driver_data; + struct feedback_ctrs fb_ctrs = {0}; u64 delivered_perf; int ret; cpufreq_cpu_put(policy); + fb_ctrs.cpu = cpu; - ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0); - if (ret) - return ret; - - udelay(sampling_delay_us); - - ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1); + if (get_rate_use_wq) + ret = smp_call_on_cpu(cpu, cppc_get_perf_ctrs_sync, &fb_ctrs, false); + else + ret = cppc_get_perf_ctrs_sync(&fb_ctrs); if (ret) return ret; - delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0, - &fb_ctrs_t1); + delivered_perf = cppc_perf_from_fbctrs(cpu_data, &(fb_ctrs.fb_ctrs_t0), + &(fb_ctrs.fb_ctrs_t1)); return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf); } @@ -953,7 +977,16 @@ static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu) static void cppc_nvidia_workaround(void) { + int cpu; + sampling_delay_us = 25; + +#ifdef CONFIG_ARM64_AMU_EXTN + cpu = get_cpu_with_amu_feat(); + + if (cpu < nr_cpu_ids) + get_rate_use_wq = true; +#endif } static void cppc_check_hisi_workaround(void) -- 2.17.1