On Sat, Oct 14, 2023 at 12:55 PM Sumit Gupta <sumitg@xxxxxxxxxx> wrote: > > From: Srikar Srimath Tirumala <srikars@xxxxxxxxxx> > > Current implementation of processor_thermal performs software throttling > in fixed steps of "20%" which can be too coarse for some platforms. > We observed some performance gain after reducing the throttle percentage. > Change the CPUFREQ thermal reduction percentage and maximum thermal steps > to be configurable. Also, update the default values of both for Nvidia > Tegra241 (Grace) SoC. The thermal reduction percentage is reduced to "5%" > and accordingly the maximum number of thermal steps are increased as they > are derived from the reduction percentage. > > Signed-off-by: Srikar Srimath Tirumala <srikars@xxxxxxxxxx> > Co-developed-by: Sumit Gupta <sumitg@xxxxxxxxxx> > Signed-off-by: Sumit Gupta <sumitg@xxxxxxxxxx> > --- > drivers/acpi/arm64/Makefile | 1 + > drivers/acpi/arm64/thermal_cpufreq.c | 20 ++++++++++++++++ > drivers/acpi/processor_thermal.c | 35 +++++++++++++++++++++++++--- > include/linux/acpi.h | 9 +++++++ > 4 files changed, 62 insertions(+), 3 deletions(-) > create mode 100644 drivers/acpi/arm64/thermal_cpufreq.c > > diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile > index 143debc1ba4a..3f181d8156cc 100644 > --- a/drivers/acpi/arm64/Makefile > +++ b/drivers/acpi/arm64/Makefile > @@ -5,3 +5,4 @@ obj-$(CONFIG_ACPI_GTDT) += gtdt.o > obj-$(CONFIG_ACPI_APMT) += apmt.o > obj-$(CONFIG_ARM_AMBA) += amba.o > obj-y += dma.o init.o > +obj-$(CONFIG_ACPI) += thermal_cpufreq.o > diff --git a/drivers/acpi/arm64/thermal_cpufreq.c b/drivers/acpi/arm64/thermal_cpufreq.c > new file mode 100644 > index 000000000000..de834fb013e7 > --- /dev/null > +++ b/drivers/acpi/arm64/thermal_cpufreq.c > @@ -0,0 +1,20 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +#include <linux/acpi.h> > + > +#ifdef CONFIG_HAVE_ARM_SMCCC_DISCOVERY > +#define SMCCC_SOC_ID_T241 0x036b0241 > + > +int acpi_thermal_cpufreq_pctg(void) > +{ > + s32 soc_id = arm_smccc_get_soc_id_version(); > + > + /* > + * Check JEP106 code for NVIDIA Tegra241 chip (036b:0241) and > + * reduce the CPUFREQ Thermal reduction percentage to 5%. > + */ > + if (soc_id == SMCCC_SOC_ID_T241) > + return 5; > + > + return 0; > +} > +#endif This part needs an ACK from the ARM folks. > diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c > index b7c6287eccca..52f316e4e260 100644 > --- a/drivers/acpi/processor_thermal.c > +++ b/drivers/acpi/processor_thermal.c > @@ -26,7 +26,16 @@ > */ > > #define CPUFREQ_THERMAL_MIN_STEP 0 > -#define CPUFREQ_THERMAL_MAX_STEP 3 > + > +static int cpufreq_thermal_max_step __read_mostly = 3; > + > +/* > + * Minimum throttle percentage for processor_thermal cooling device. > + * The processor_thermal driver uses it to calculate the percentage amount by > + * which cpu frequency must be reduced for each cooling state. This is also used > + * to calculate the maximum number of throttling steps or cooling states. > + */ > +static int cpufreq_thermal_pctg __read_mostly = 20; I'd call this cpufreq_thermal_reduction_step, because the value multiplied by it already is in percent. > > static DEFINE_PER_CPU(unsigned int, cpufreq_thermal_reduction_pctg); > > @@ -71,7 +80,7 @@ static int cpufreq_get_max_state(unsigned int cpu) > if (!cpu_has_cpufreq(cpu)) > return 0; > > - return CPUFREQ_THERMAL_MAX_STEP; > + return cpufreq_thermal_max_step; > } > > static int cpufreq_get_cur_state(unsigned int cpu) > @@ -113,7 +122,8 @@ static int cpufreq_set_cur_state(unsigned int cpu, int state) > if (!policy) > return -EINVAL; > > - max_freq = (policy->cpuinfo.max_freq * (100 - reduction_pctg(i) * 20)) / 100; > + max_freq = (policy->cpuinfo.max_freq * > + (100 - reduction_pctg(i) * cpufreq_thermal_pctg)) / 100; > > cpufreq_cpu_put(policy); > > @@ -126,10 +136,29 @@ static int cpufreq_set_cur_state(unsigned int cpu, int state) > return 0; > } > > +static void acpi_thermal_cpufreq_config(void) > +{ > + int cpufreq_pctg = acpi_thermal_cpufreq_pctg(); > + > + if (!cpufreq_pctg) > + return; > + > + cpufreq_thermal_pctg = cpufreq_pctg; > + > + /* > + * Derive the MAX_STEP from minimum throttle percentage so that the reduction > + * percentage doesn't end up becoming negative. Also, cap the MAX_STEP so that > + * the CPU performance doesn't become 0. > + */ > + cpufreq_thermal_max_step = (100 / cpufreq_thermal_pctg) - 1; Why don't you use the local variable in the expression on the right-hand side? Also please note that the formula doesn't allow the default combination of reduction_step and max_step to be produced which is a bit odd. What would be wrong with max_step = 60 / reduction_step? > +} > + > void acpi_thermal_cpufreq_init(struct cpufreq_policy *policy) > { > unsigned int cpu; > > + acpi_thermal_cpufreq_config(); > + > for_each_cpu(cpu, policy->related_cpus) { > struct acpi_processor *pr = per_cpu(processors, cpu); > int ret; > diff --git a/include/linux/acpi.h b/include/linux/acpi.h > index ba3f601b6e3d..407617670221 100644 > --- a/include/linux/acpi.h > +++ b/include/linux/acpi.h > @@ -1541,4 +1541,13 @@ static inline void acpi_device_notify(struct device *dev) { } > static inline void acpi_device_notify_remove(struct device *dev) { } > #endif > > +#ifdef CONFIG_HAVE_ARM_SMCCC_DISCOVERY > +int acpi_thermal_cpufreq_pctg(void); > +#else > +static inline int acpi_thermal_cpufreq_pctg(void) > +{ > + return 0; > +} > +#endif > + This can go into drivers/acpi/internal.h as far as I'm concerned. > #endif /*_LINUX_ACPI_H*/ > --