Re: Patch "sched/topology: Add a new arch_scale_freq_ref() method" has been added to the 6.6-stable tree

Vincent Guittot <vincent.guittot@xxxxxxxxxx> · Fri, 2 Feb 2024 09:37:18 +0100

Hi Sasha,

On Thu, 1 Feb 2024 at 18:09, Sasha Levin <sashal@xxxxxxxxxx> wrote:
>
> This is a note to let you know that I've just added the patch titled
>
>     sched/topology: Add a new arch_scale_freq_ref() method
>
> to the 6.6-stable tree which can be found at:
>     http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary
>
> The filename of the patch is:
>      sched-topology-add-a-new-arch_scale_freq_ref-method.patch
> and it can be found in the queue-6.6 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable@xxxxxxxxxxxxxxx> know about it.

What is the reason for backporting this patch ?

This patch belongs to a larger patchset and I haven't seen all other
ones queued as well.
https://lore.kernel.org/lkml/20231211104855.558096-1-vincent.guittot@xxxxxxxxxx/

Furthermore, there is a fix going one that has been queued for next v6.8-rc
https://lore.kernel.org/lkml/20240117190545.596057-1-vincent.guittot@xxxxxxxxxx/

All this to say that I'm not sure this patch is a good candidate for
being backported in v6.6 or v6.7

>
>
>
> commit b84e7f7bcc91d2815e2ce356dc4211c904eff4a9
> Author: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
> Date:   Mon Dec 11 11:48:49 2023 +0100
>
>     sched/topology: Add a new arch_scale_freq_ref() method
>
>     [ Upstream commit 9942cb22ea458c34fa17b73d143ea32d4df1caca ]
>
>     Create a new method to get a unique and fixed max frequency. Currently
>     cpuinfo.max_freq or the highest (or last) state of performance domain are
>     used as the max frequency when computing the frequency for a level of
>     utilization, but:
>
>       - cpuinfo_max_freq can change at runtime. boost is one example of
>         such change.
>
>       - cpuinfo.max_freq and last item of the PD can be different leading to
>         different results between cpufreq and energy model.
>
>     We need to save the reference frequency that has been used when computing
>     the CPUs capacity and use this fixed and coherent value to convert between
>     frequency and CPU's capacity.
>
>     In fact, we already save the frequency that has been used when computing
>     the capacity of each CPU. We extend the precision to save kHz instead of
>     MHz currently and we modify the type to be aligned with other variables
>     used when converting frequency to capacity and the other way.
>
>     [ mingo: Minor edits. ]
>
>     Signed-off-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
>     Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
>     Tested-by: Lukasz Luba <lukasz.luba@xxxxxxx>
>     Reviewed-by: Lukasz Luba <lukasz.luba@xxxxxxx>
>     Acked-by: Sudeep Holla <sudeep.holla@xxxxxxx>
>     Link: https://lore.kernel.org/r/20231211104855.558096-2-vincent.guittot@xxxxxxxxxx
>     Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>
>
> diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h
> index c7d2510e5a78..853c4f81ba4a 100644
> --- a/arch/arm/include/asm/topology.h
> +++ b/arch/arm/include/asm/topology.h
> @@ -13,6 +13,7 @@
>  #define arch_set_freq_scale topology_set_freq_scale
>  #define arch_scale_freq_capacity topology_get_freq_scale
>  #define arch_scale_freq_invariant topology_scale_freq_invariant
> +#define arch_scale_freq_ref topology_get_freq_ref
>  #endif
>
>  /* Replace task scheduler's default cpu-invariant accounting */
> diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
> index 9fab663dd2de..a323b109b9c4 100644
> --- a/arch/arm64/include/asm/topology.h
> +++ b/arch/arm64/include/asm/topology.h
> @@ -23,6 +23,7 @@ void update_freq_counters_refs(void);
>  #define arch_set_freq_scale topology_set_freq_scale
>  #define arch_scale_freq_capacity topology_get_freq_scale
>  #define arch_scale_freq_invariant topology_scale_freq_invariant
> +#define arch_scale_freq_ref topology_get_freq_ref
>
>  #ifdef CONFIG_ACPI_CPPC_LIB
>  #define arch_init_invariance_cppc topology_init_cpu_capacity_cppc
> diff --git a/arch/riscv/include/asm/topology.h b/arch/riscv/include/asm/topology.h
> index e316ab3b77f3..61183688bdd5 100644
> --- a/arch/riscv/include/asm/topology.h
> +++ b/arch/riscv/include/asm/topology.h
> @@ -9,6 +9,7 @@
>  #define arch_set_freq_scale            topology_set_freq_scale
>  #define arch_scale_freq_capacity       topology_get_freq_scale
>  #define arch_scale_freq_invariant      topology_scale_freq_invariant
> +#define arch_scale_freq_ref            topology_get_freq_ref
>
>  /* Replace task scheduler's default cpu-invariant accounting */
>  #define arch_scale_cpu_capacity        topology_get_cpu_scale
> diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
> index b741b5ba82bd..0c9ae5b157b1 100644
> --- a/drivers/base/arch_topology.c
> +++ b/drivers/base/arch_topology.c
> @@ -19,6 +19,7 @@
>  #include <linux/init.h>
>  #include <linux/rcupdate.h>
>  #include <linux/sched.h>
> +#include <linux/units.h>
>
>  #define CREATE_TRACE_POINTS
>  #include <trace/events/thermal_pressure.h>
> @@ -26,7 +27,8 @@
>  static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data);
>  static struct cpumask scale_freq_counters_mask;
>  static bool scale_freq_invariant;
> -static DEFINE_PER_CPU(u32, freq_factor) = 1;
> +DEFINE_PER_CPU(unsigned long, capacity_freq_ref) = 1;
> +EXPORT_PER_CPU_SYMBOL_GPL(capacity_freq_ref);
>
>  static bool supports_scale_freq_counters(const struct cpumask *cpus)
>  {
> @@ -170,9 +172,9 @@ DEFINE_PER_CPU(unsigned long, thermal_pressure);
>   * operating on stale data when hot-plug is used for some CPUs. The
>   * @capped_freq reflects the currently allowed max CPUs frequency due to
>   * thermal capping. It might be also a boost frequency value, which is bigger
> - * than the internal 'freq_factor' max frequency. In such case the pressure
> - * value should simply be removed, since this is an indication that there is
> - * no thermal throttling. The @capped_freq must be provided in kHz.
> + * than the internal 'capacity_freq_ref' max frequency. In such case the
> + * pressure value should simply be removed, since this is an indication that
> + * there is no thermal throttling. The @capped_freq must be provided in kHz.
>   */
>  void topology_update_thermal_pressure(const struct cpumask *cpus,
>                                       unsigned long capped_freq)
> @@ -183,10 +185,7 @@ void topology_update_thermal_pressure(const struct cpumask *cpus,
>
>         cpu = cpumask_first(cpus);
>         max_capacity = arch_scale_cpu_capacity(cpu);
> -       max_freq = per_cpu(freq_factor, cpu);
> -
> -       /* Convert to MHz scale which is used in 'freq_factor' */
> -       capped_freq /= 1000;
> +       max_freq = arch_scale_freq_ref(cpu);
>
>         /*
>          * Handle properly the boost frequencies, which should simply clean
> @@ -279,13 +278,13 @@ void topology_normalize_cpu_scale(void)
>
>         capacity_scale = 1;
>         for_each_possible_cpu(cpu) {
> -               capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
> +               capacity = raw_capacity[cpu] * per_cpu(capacity_freq_ref, cpu);
>                 capacity_scale = max(capacity, capacity_scale);
>         }
>
>         pr_debug("cpu_capacity: capacity_scale=%llu\n", capacity_scale);
>         for_each_possible_cpu(cpu) {
> -               capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu);
> +               capacity = raw_capacity[cpu] * per_cpu(capacity_freq_ref, cpu);
>                 capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT,
>                         capacity_scale);
>                 topology_set_cpu_scale(cpu, capacity);
> @@ -321,15 +320,15 @@ bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
>                         cpu_node, raw_capacity[cpu]);
>
>                 /*
> -                * Update freq_factor for calculating early boot cpu capacities.
> +                * Update capacity_freq_ref for calculating early boot CPU capacities.
>                  * For non-clk CPU DVFS mechanism, there's no way to get the
>                  * frequency value now, assuming they are running at the same
> -                * frequency (by keeping the initial freq_factor value).
> +                * frequency (by keeping the initial capacity_freq_ref value).
>                  */
>                 cpu_clk = of_clk_get(cpu_node, 0);
>                 if (!PTR_ERR_OR_ZERO(cpu_clk)) {
> -                       per_cpu(freq_factor, cpu) =
> -                               clk_get_rate(cpu_clk) / 1000;
> +                       per_cpu(capacity_freq_ref, cpu) =
> +                               clk_get_rate(cpu_clk) / HZ_PER_KHZ;
>                         clk_put(cpu_clk);
>                 }
>         } else {
> @@ -411,7 +410,7 @@ init_cpu_capacity_callback(struct notifier_block *nb,
>         cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus);
>
>         for_each_cpu(cpu, policy->related_cpus)
> -               per_cpu(freq_factor, cpu) = policy->cpuinfo.max_freq / 1000;
> +               per_cpu(capacity_freq_ref, cpu) = policy->cpuinfo.max_freq;
>
>         if (cpumask_empty(cpus_to_visit)) {
>                 topology_normalize_cpu_scale();
> diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
> index a07b510e7dc5..32c24ff4f2a8 100644
> --- a/include/linux/arch_topology.h
> +++ b/include/linux/arch_topology.h
> @@ -27,6 +27,13 @@ static inline unsigned long topology_get_cpu_scale(int cpu)
>
>  void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity);
>
> +DECLARE_PER_CPU(unsigned long, capacity_freq_ref);
> +
> +static inline unsigned long topology_get_freq_ref(int cpu)
> +{
> +       return per_cpu(capacity_freq_ref, cpu);
> +}
> +
>  DECLARE_PER_CPU(unsigned long, arch_freq_scale);
>
>  static inline unsigned long topology_get_freq_scale(int cpu)
> diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
> index 67b573d5bf28..9671b7234684 100644
> --- a/include/linux/sched/topology.h
> +++ b/include/linux/sched/topology.h
> @@ -275,6 +275,14 @@ void arch_update_thermal_pressure(const struct cpumask *cpus,
>  { }
>  #endif
>
> +#ifndef arch_scale_freq_ref
> +static __always_inline
> +unsigned int arch_scale_freq_ref(int cpu)
> +{
> +       return 0;
> +}
> +#endif
> +
>  static inline int task_node(const struct task_struct *p)
>  {
>         return cpu_to_node(task_cpu(p));