Re: [Patch v2 2/2] ACPI: processor: reduce CPUFREQ thermal reduction pctg for Tegra241

"Rafael J. Wysocki" <rafael@xxxxxxxxxx> · Tue, 3 Oct 2023 21:37:25 +0200

On Wed, Sep 13, 2023 at 6:47 PM Sumit Gupta <sumitg@xxxxxxxxxx> wrote:
>
> From: Srikar Srimath Tirumala <srikars@xxxxxxxxxx>
>
> Current implementation of processor_thermal performs software throttling
> in fixed steps of "20%" which can be too coarse for some platforms.
> We observed some performance gain after reducing the throttle percentage.
> Change the CPUFREQ thermal reduction percentage and maximum thermal steps
> to be configurable. Also, update the default values of both for Nvidia
> Tegra241 (Grace) SoC. The thermal reduction percentage is reduced to "5%"
> and accordingly the maximum number of thermal steps are increased as they
> are derived from the reduction percentage.
>
> Signed-off-by: Srikar Srimath Tirumala <srikars@xxxxxxxxxx>
> Signed-off-by: Sumit Gupta <sumitg@xxxxxxxxxx>
> ---
>  drivers/acpi/processor_thermal.c | 41 +++++++++++++++++++++++++++++---
>  1 file changed, 38 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c
> index b7c6287eccca..30f2801abce6 100644
> --- a/drivers/acpi/processor_thermal.c
> +++ b/drivers/acpi/processor_thermal.c
> @@ -26,7 +26,16 @@
>   */
>
>  #define CPUFREQ_THERMAL_MIN_STEP 0
> -#define CPUFREQ_THERMAL_MAX_STEP 3
> +
> +static int cpufreq_thermal_max_step = 3;

__read_mostly I suppose?

> +
> +/*
> + * Minimum throttle percentage for processor_thermal cooling device.

+ *

> + * The processor_thermal driver uses it to calculate the percentage amount by
> + * which cpu frequency must be reduced for each cooling state. This is also used
> + * to calculate the maximum number of throttling steps or cooling states.
> + */
> +static int cpufreq_thermal_pctg = 20;

__read_mostly here too?

>
>  static DEFINE_PER_CPU(unsigned int, cpufreq_thermal_reduction_pctg);
>
> @@ -71,7 +80,7 @@ static int cpufreq_get_max_state(unsigned int cpu)
>         if (!cpu_has_cpufreq(cpu))
>                 return 0;
>
> -       return CPUFREQ_THERMAL_MAX_STEP;
> +       return cpufreq_thermal_max_step;
>  }
>
>  static int cpufreq_get_cur_state(unsigned int cpu)
> @@ -113,7 +122,8 @@ static int cpufreq_set_cur_state(unsigned int cpu, int state)
>                 if (!policy)
>                         return -EINVAL;
>
> -               max_freq = (policy->cpuinfo.max_freq * (100 - reduction_pctg(i) * 20)) / 100;
> +               max_freq = (policy->cpuinfo.max_freq *
> +                           (100 - reduction_pctg(i) * cpufreq_thermal_pctg)) / 100;
>
>                 cpufreq_cpu_put(policy);
>
> @@ -126,10 +136,35 @@ static int cpufreq_set_cur_state(unsigned int cpu, int state)
>         return 0;
>  }
>

#ifdef CONFIG_HAVE_ARM_SMCCC_DISCOVERY

> +#define SMCCC_SOC_ID_T241      0x036b0241
> +
> +void acpi_thermal_cpufreq_config_nvidia(void)

static void ?

> +{
> +#ifdef CONFIG_HAVE_ARM_SMCCC_DISCOVERY
> +       s32 soc_id = arm_smccc_get_soc_id_version();
> +
> +       /* Check JEP106 code for NVIDIA Tegra241 chip (036b:0241) */
> +       if ((soc_id < 0) || (soc_id != SMCCC_SOC_ID_T241))

Inner parens are redundant.

> +               return;
> +
> +       /* Reduce the CPUFREQ Thermal reduction percentage to 5% */
> +       cpufreq_thermal_pctg = 5;
> +
> +       /*
> +        * Derive the MAX_STEP from minimum throttle percentage so that the reduction
> +        * percentage doesn't end up becoming negative. Also, cap the MAX_STEP so that
> +        * the CPU performance doesn't become 0.
> +        */
> +       cpufreq_thermal_max_step = ((100 / cpufreq_thermal_pctg) - 1);

Outer parens are redundant.

> +#endif
> +}

#else
static inline void void acpi_thermal_cpufreq_config_nvidia(void) {}
#endif

> +
>  void acpi_thermal_cpufreq_init(struct cpufreq_policy *policy)
>  {
>         unsigned int cpu;
>
> +       acpi_thermal_cpufreq_config_nvidia();
> +
>         for_each_cpu(cpu, policy->related_cpus) {
>                 struct acpi_processor *pr = per_cpu(processors, cpu);
>                 int ret;
> --

And patch [1/2] needs to be rebased on the current ACPI thermal
material in linux-next.

Thanks!