Hi Ionela, Overall I think this approach is much better, and apart from a few nits below this is looking pretty good. On 2/11/20 6:45 PM, Ionela Voinescu wrote: > @@ -120,4 +121,188 @@ int __init parse_acpi_topology(void) > } > #endif > > +#ifdef CONFIG_ARM64_AMU_EXTN > > +#undef pr_fmt > +#define pr_fmt(fmt) "AMU: " fmt > + > +static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale); > +static DEFINE_PER_CPU(u64, arch_const_cycles_prev); > +static DEFINE_PER_CPU(u64, arch_core_cycles_prev); > +static cpumask_var_t amu_fie_cpus; > + > +/* Obtain max frequency (in KHz) as reported by hardware */ > +__weak unsigned int cpu_get_max_freq(unsigned int cpu) > +{ > + return 0; > +} > + > +#ifdef CONFIG_CPU_FREQ > +/* Replace max frequency getter with cpufreq based function */ > +#define cpu_get_max_freq cpufreq_get_hw_max_freq > +#endif > + > +/* Initialize counter reference per-cpu variables for the current CPU */ > +void init_cpu_freq_invariance_counters(void) > +{ > + this_cpu_write(arch_core_cycles_prev, > + read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0)); > + this_cpu_write(arch_const_cycles_prev, > + read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0)); > +} > + > +static int validate_cpu_freq_invariance_counters(int cpu) > +{ > + u64 max_freq_hz, ratio; > + > + if (!cpu_has_amu_feat(cpu)) { > + pr_debug("CPU%d: counters are not supported.\n", cpu); > + return -EINVAL; > + } > + > + if (unlikely(!per_cpu(arch_const_cycles_prev, cpu) || > + !per_cpu(arch_core_cycles_prev, cpu))) { > + pr_debug("CPU%d: cycle counters are not enabled.\n", cpu); > + return -EINVAL; > + } > + > + /* Convert maximum frequency from KHz to Hz and validate */ > + max_freq_hz = cpu_get_max_freq(cpu) * 1000; > + if (unlikely(!max_freq_hz)) { > + pr_debug("CPU%d: invalid maximum frequency.\n", cpu); > + return -EINVAL; > + } > + > + /* > + * Pre-compute the fixed ratio between the frequency of the constant > + * counter and the maximum frequency of the CPU. > + * > + * const_freq > + * arch_max_freq_scale = ---------------- * SCHED_CAPACITY_SCALE² > + * cpuinfo_max_freq > + * > + * We use a factor of 2 * SCHED_CAPACITY_SHIFT -> SCHED_CAPACITY_SCALE² > + * in order to ensure a good resolution for arch_max_freq_scale for > + * very low arch timer frequencies (up to the KHz range which should be ^^^^^ <pedantic hat on>: s/up to/down to/ > + * unlikely). > + */ > + ratio = (u64)arch_timer_get_rate() << (2 * SCHED_CAPACITY_SHIFT); > + ratio = div64_u64(ratio, max_freq_hz); > + if (!ratio) { > + pr_err("System timer frequency too low.\n"); Should that be a WARN_ONCE() instead? If the arch timer freq is too low, we'll end up spamming this message, since we go through this for all CPUs. > + return -EINVAL; > + } > + > + per_cpu(arch_max_freq_scale, cpu) = (unsigned long)ratio; > + It occurred to me that this isn't strictly speaking a per-CPU information as it only depends on the max possible frequency. Not really worth bothering about though, I think. > + return 0; > +} > + > +static inline int > +enable_policy_freq_counters(int cpu, cpumask_var_t valid_cpus) > +{ > + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); > + > + if (!policy) { > + pr_debug("CPU%d: No cpufreq policy found.\n", cpu); > + return false; > + } > + > + if (cpumask_subset(policy->related_cpus, valid_cpus)) { > + cpumask_or(amu_fie_cpus, policy->related_cpus, > + amu_fie_cpus); > + pr_info("CPUs[%*pbl]: counters will be used for FIE.", > + cpumask_pr_args(amu_fie_cpus)); Could we have a single print of all CPUs in one go? AIUI this will generate a line per cpufreq policy. Maybe just something at the tail of init_amu_fie(): if (!cpumask_empty(amu_fie_cpus)) pr_info(<blah>); > + } > + > + cpufreq_cpu_put(policy); > + > + return true; > +} > + > +static int __init init_amu_fie(void) > +{ > + cpumask_var_t valid_cpus; > + bool have_policy = false; > + int cpu; > + > + if (!zalloc_cpumask_var(&valid_cpus, GFP_KERNEL) || > + !zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL)) > + return -ENOMEM; > + > + for_each_possible_cpu(cpu) { > + if (validate_cpu_freq_invariance_counters(cpu)) > + continue; > + cpumask_set_cpu(cpu, valid_cpus); > + have_policy = enable_policy_freq_counters(cpu, valid_cpus) || > + have_policy; What about: have_policy |= enable_policy_freq_counters(cpu, valid_cpus); > + } > + > + if (!have_policy) { > + /* > + * If we are not restricted by cpufreq policies, we only enable > + * the use of the AMU feature for FIE if all CPUs support AMU. > + * Otherwise, enable_policy_freq_counters has already enabled > + * policy cpus. > + */ > + if (cpumask_equal(valid_cpus, cpu_possible_mask)) { Mmm so I'm thinking what we want here is the cpu_present_mask rather than the possible one. This is very corner-casy, but I think that if we fail to boot a secondary, we'll have it possible but not present. While at it you could make the loop only target present CPUs, but I think the one bit that matters is this check right here (!present should fail at validate_cpu_freq_invariance_counters()). > + cpumask_or(amu_fie_cpus, amu_fie_cpus, valid_cpus); > + pr_info("CPUs[%*pbl]: counters will be used for FIE.", > + cpumask_pr_args(amu_fie_cpus)); > + } > + } > + > + free_cpumask_var(valid_cpus); > + > + return 0; > +} > +late_initcall_sync(init_amu_fie); > + > +bool topology_cpu_freq_counters(struct cpumask *cpus) > +{ > + return cpumask_available(amu_fie_cpus) && > + cpumask_subset(cpus, amu_fie_cpus); > +} > + > +void topology_scale_freq_tick(void) > +{ > + u64 prev_core_cnt, prev_const_cnt; > + u64 core_cnt, const_cnt, scale; > + int cpu = smp_processor_id(); > + > + if (!cpumask_available(amu_fie_cpus) || > + !cpumask_test_cpu(cpu, amu_fie_cpus)) > + return; It might be a good idea to have a static key to gate our entry into this function - that way we can lessen our impact on older platforms (without AMUs) running a recent kernel with CONFIG_ARM64_AMU_EXTN=y. x86 does just that, if you look at their arch_scale_freq_tick() implementation. FWIW I don't think we should bother with playing with the key counter to count AMU-enabled CPUs, just enable it at startup if we have > 1 such CPU and let the cpumask drive the rest. In your check here, the static key check could replace the cpumask_available() check. The static key could also be used for topology_cpu_freq_counters(). > + > + const_cnt = read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0); > + core_cnt = read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0); > + prev_const_cnt = this_cpu_read(arch_const_cycles_prev); > + prev_core_cnt = this_cpu_read(arch_core_cycles_prev); > + > + if (unlikely(core_cnt <= prev_core_cnt || > + const_cnt <= prev_const_cnt)) > + goto store_and_exit; > + > + /* > + * /\core arch_max_freq_scale > + * scale = ------- * -------------------- > + * /\const SCHED_CAPACITY_SCALE > + * > + * We shift by SCHED_CAPACITY_SHIFT (divide by SCHED_CAPACITY_SCALE) > + * in order to compensate for the SCHED_CAPACITY_SCALE² factor in > + * arch_max_freq_scale (used to ensure its resolution) while keeping > + * the scale value in the 0-SCHED_CAPACITY_SCALE capacity range. > + */ A simple "See validate_cpu_freq_invariance_counters() for details on the scale factor" would suffice wrt the shifting details. > + scale = core_cnt - prev_core_cnt; > + scale *= this_cpu_read(arch_max_freq_scale); > + scale = div64_u64(scale >> SCHED_CAPACITY_SHIFT, > + const_cnt - prev_const_cnt); > + > + scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE); > + this_cpu_write(freq_scale, (unsigned long)scale); > + > +store_and_exit: > + this_cpu_write(arch_core_cycles_prev, core_cnt); > + this_cpu_write(arch_const_cycles_prev, const_cnt); > +} > +#endif /* CONFIG_ARM64_AMU_EXTN */ > diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c > index 1eb81f113786..1ab2b7503d63 100644 > --- a/drivers/base/arch_topology.c > +++ b/drivers/base/arch_topology.c > @@ -29,6 +29,14 @@ void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq, > unsigned long scale; > int i; > > + /* > + * If the use of counters for FIE is enabled, just return as we don't > + * want to update the scale factor with information from CPUFREQ. > + * Instead the scale factor will be updated from arch_scale_freq_tick. > + */ > + if (arch_cpu_freq_counters(cpus)) > + return; > + > scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq; > > for_each_cpu(i, cpus) > diff --git a/include/linux/topology.h b/include/linux/topology.h > index eb2fe6edd73c..397aad6ae163 100644 > --- a/include/linux/topology.h > +++ b/include/linux/topology.h > @@ -227,5 +227,12 @@ static inline const struct cpumask *cpu_cpu_mask(int cpu) > return cpumask_of_node(cpu_to_node(cpu)); > } > > +#ifndef arch_cpu_freq_counters > +static __always_inline > +bool arch_cpu_freq_counters(struct cpumask *cpus) > +{ > + return false; > +} > +#endif > > #endif /* _LINUX_TOPOLOGY_H */ >