Provide to the scheduler a feedback about the temporary max available capacity. Unlike arch_update_thermal_pressure, this doesn't need to be filtered as the pressure will happen for dozens ms or more. Signed-off-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx> --- drivers/cpufreq/cpufreq.c | 48 +++++++++++++++++++++++++++++++++++++++ include/linux/cpufreq.h | 10 ++++++++ 2 files changed, 58 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 44db4f59c4cc..7d5f71be8d29 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2563,6 +2563,50 @@ int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu) } EXPORT_SYMBOL(cpufreq_get_policy); +DEFINE_PER_CPU(unsigned long, cpufreq_pressure); +EXPORT_PER_CPU_SYMBOL_GPL(cpufreq_pressure); + +/** + * cpufreq_update_pressure() - Update cpufreq pressure for CPUs + * @cpus : The related CPUs for which max capacity has been reduced + * @capped_freq : The maximum allowed frequency that CPUs can run at + * + * Update the value of cpufreq pressure for all @cpus in the mask. The + * cpumask should include all (online+offline) affected CPUs, to avoid + * operating on stale data when hot-plug is used for some CPUs. The + * @capped_freq reflects the currently allowed max CPUs frequency due to + * freq_qos capping. It might be also a boost frequency value, which is bigger + * than the internal 'capacity_freq_ref' max frequency. In such case the + * pressure value should simply be removed, since this is an indication that + * there is no capping. The @capped_freq must be provided in kHz. + */ +static void cpufreq_update_pressure(const struct cpumask *cpus, + unsigned long capped_freq) +{ + unsigned long max_capacity, capacity, pressure; + u32 max_freq; + int cpu; + + cpu = cpumask_first(cpus); + max_capacity = arch_scale_cpu_capacity(cpu); + max_freq = arch_scale_freq_ref(cpu); + + /* + * Handle properly the boost frequencies, which should simply clean + * the thermal pressure value. + */ + if (max_freq <= capped_freq) + capacity = max_capacity; + else + capacity = mult_frac(max_capacity, capped_freq, max_freq); + + pressure = max_capacity - capacity; + + + for_each_cpu(cpu, cpus) + WRITE_ONCE(per_cpu(cpufreq_pressure, cpu), pressure); +} + /** * cpufreq_set_policy - Modify cpufreq policy parameters. * @policy: Policy object to modify. @@ -2584,6 +2628,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, { struct cpufreq_policy_data new_data; struct cpufreq_governor *old_gov; + struct cpumask *cpus; int ret; memcpy(&new_data.cpuinfo, &policy->cpuinfo, sizeof(policy->cpuinfo)); @@ -2618,6 +2663,9 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, policy->max = __resolve_freq(policy, policy->max, CPUFREQ_RELATION_H); trace_cpu_frequency_limits(policy); + cpus = policy->related_cpus; + cpufreq_update_pressure(cpus, policy->max); + policy->cached_target_freq = UINT_MAX; pr_debug("new min and max freqs are %u - %u kHz\n", diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index afda5f24d3dd..b1d97edd3253 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -241,6 +241,12 @@ struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy); void cpufreq_enable_fast_switch(struct cpufreq_policy *policy); void cpufreq_disable_fast_switch(struct cpufreq_policy *policy); bool has_target_index(void); + +DECLARE_PER_CPU(unsigned long, cpufreq_pressure); +static inline unsigned long cpufreq_get_pressure(int cpu) +{ + return per_cpu(cpufreq_pressure, cpu); +} #else static inline unsigned int cpufreq_get(unsigned int cpu) { @@ -263,6 +269,10 @@ static inline bool cpufreq_supports_freq_invariance(void) return false; } static inline void disable_cpufreq(void) { } +static inline unsigned long cpufreq_get_pressure(int cpu) +{ + return 0; +} #endif #ifdef CONFIG_CPU_FREQ_STAT -- 2.34.1