Re: [PATCH 4/4] sched: Rename arch_update_thermal_pressure into arch_update_hw_pressure

Lukasz Luba <lukasz.luba@xxxxxxx> · Thu, 14 Dec 2023 08:31:19 +0000

On 12/12/23 14:27, Vincent Guittot wrote:
Now that cpufreq provides a pressure value to the scheduler, rename
arch_update_thermal_pressure into hw pressure to reflect that it returns
a pressure applied by HW with a high frequency and which needs filtering.

I would elaborte this meaning 'filtering' here. Something like:
'... high frequency and which needs filtering to smooth the singal and
get an average value. That reflects available capacity of the CPU in
longer period'

This pressure is not always related to thermal mitigation but can also be
generated by max current limitation as an example.

Signed-off-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
---
  arch/arm/include/asm/topology.h               |  6 ++---
  arch/arm64/include/asm/topology.h             |  6 ++---
  drivers/base/arch_topology.c                  | 26 +++++++++----------
  drivers/cpufreq/qcom-cpufreq-hw.c             |  4 +--
  include/linux/arch_topology.h                 |  8 +++---
  include/linux/sched/topology.h                |  8 +++---
  .../{thermal_pressure.h => hw_pressure.h}     | 14 +++++-----
  include/trace/events/sched.h                  |  2 +-
  init/Kconfig                                  | 12 ++++-----
  kernel/sched/core.c                           |  8 +++---
  kernel/sched/fair.c                           | 12 ++++-----
  kernel/sched/pelt.c                           | 18 ++++++-------
  kernel/sched/pelt.h                           | 16 ++++++------
  kernel/sched/sched.h                          |  4 +--
  14 files changed, 72 insertions(+), 72 deletions(-)
  rename include/trace/events/{thermal_pressure.h => hw_pressure.h} (55%)

diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h
index 853c4f81ba4a..e175e8596b5d 100644
--- a/arch/arm/include/asm/topology.h
+++ b/arch/arm/include/asm/topology.h
@@ -22,9 +22,9 @@
  /* Enable topology flag updates */
  #define arch_update_cpu_topology topology_update_cpu_topology
  
-/* Replace task scheduler's default thermal pressure API */
-#define arch_scale_thermal_pressure topology_get_thermal_pressure
-#define arch_update_thermal_pressure	topology_update_thermal_pressure
+/* Replace task scheduler's default hw pressure API */
+#define arch_scale_hw_pressure topology_get_hw_pressure
+#define arch_update_hw_pressure	topology_update_hw_pressure
  
  #else
  
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index a323b109b9c4..a427650bdfba 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -35,9 +35,9 @@ void update_freq_counters_refs(void);
  /* Enable topology flag updates */
  #define arch_update_cpu_topology topology_update_cpu_topology
  
-/* Replace task scheduler's default thermal pressure API */
-#define arch_scale_thermal_pressure topology_get_thermal_pressure
-#define arch_update_thermal_pressure	topology_update_thermal_pressure
+/* Replace task scheduler's default hw pressure API */

s/hw/HW/ ?

+#define arch_scale_hw_pressure topology_get_hw_pressure
+#define arch_update_hw_pressure	topology_update_hw_pressure
  
  #include <asm-generic/topology.h>
  
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 0906114963ff..3d8dc9d5c3ad 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -22,7 +22,7 @@
  #include <linux/units.h>
  
  #define CREATE_TRACE_POINTS
-#include <trace/events/thermal_pressure.h>
+#include <trace/events/hw_pressure.h>
  
  static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data);
  static struct cpumask scale_freq_counters_mask;
@@ -160,26 +160,26 @@ void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
  	per_cpu(cpu_scale, cpu) = capacity;
  }
  
-DEFINE_PER_CPU(unsigned long, thermal_pressure);
+DEFINE_PER_CPU(unsigned long, hw_pressure);
  
  /**
- * topology_update_thermal_pressure() - Update thermal pressure for CPUs
+ * topology_update_hw_pressure() - Update hw pressure for CPUs

same here: HW?

   * @cpus        : The related CPUs for which capacity has been reduced
   * @capped_freq : The maximum allowed frequency that CPUs can run at
   *
- * Update the value of thermal pressure for all @cpus in the mask. The
+ * Update the value of hw pressure for all @cpus in the mask. The

HW?

   * cpumask should include all (online+offline) affected CPUs, to avoid
   * operating on stale data when hot-plug is used for some CPUs. The
   * @capped_freq reflects the currently allowed max CPUs frequency due to
- * thermal capping. It might be also a boost frequency value, which is bigger
+ * hw capping. It might be also a boost frequency value, which is bigger

HW?

   * than the internal 'capacity_freq_ref' max frequency. In such case the
   * pressure value should simply be removed, since this is an indication that
- * there is no thermal throttling. The @capped_freq must be provided in kHz.
+ * there is no hw throttling. The @capped_freq must be provided in kHz.

HW?

   */
-void topology_update_thermal_pressure(const struct cpumask *cpus,
+void topology_update_hw_pressure(const struct cpumask *cpus,
  				      unsigned long capped_freq)
  {
-	unsigned long max_capacity, capacity, th_pressure;
+	unsigned long max_capacity, capacity, hw_pressure;
  	u32 max_freq;
  	int cpu;
  
@@ -189,21 +189,21 @@ void topology_update_thermal_pressure(const struct cpumask *cpus,
  
  	/*
  	 * Handle properly the boost frequencies, which should simply clean
-	 * the thermal pressure value.
+	 * the hw pressure value.

HW?

  	 */
  	if (max_freq <= capped_freq)
  		capacity = max_capacity;
  	else
  		capacity = mult_frac(max_capacity, capped_freq, max_freq);
  
-	th_pressure = max_capacity - capacity;
+	hw_pressure = max_capacity - capacity;
  
-	trace_thermal_pressure_update(cpu, th_pressure);
+	trace_hw_pressure_update(cpu, hw_pressure);
  
  	for_each_cpu(cpu, cpus)
-		WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure);
+		WRITE_ONCE(per_cpu(hw_pressure, cpu), hw_pressure);
  }
-EXPORT_SYMBOL_GPL(topology_update_thermal_pressure);
+EXPORT_SYMBOL_GPL(topology_update_hw_pressure);
  
  static ssize_t cpu_capacity_show(struct device *dev,
  				 struct device_attribute *attr,
diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
index 70b0f21968a0..a619202ba81d 100644
--- a/drivers/cpufreq/qcom-cpufreq-hw.c
+++ b/drivers/cpufreq/qcom-cpufreq-hw.c
@@ -347,8 +347,8 @@ static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data)
  
  	throttled_freq = freq_hz / HZ_PER_KHZ;
  
-	/* Update thermal pressure (the boost frequencies are accepted) */
-	arch_update_thermal_pressure(policy->related_cpus, throttled_freq);
+	/* Update hw pressure (the boost frequencies are accepted) */

HW?

+	arch_update_hw_pressure(policy->related_cpus, throttled_freq);
  

[snip]

  
  	if (housekeeping_cpu(cpu, HK_TYPE_TICK))
@@ -5669,8 +5669,8 @@ void scheduler_tick(void)
  	rq_lock(rq, &rf);
  
  	update_rq_clock(rq);
-	thermal_pressure = arch_scale_thermal_pressure(cpu_of(rq));
-	update_thermal_load_avg(rq_clock_thermal(rq), rq, thermal_pressure);
+	hw_pressure = arch_scale_hw_pressure(cpu_of(rq));
+	update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure);

We switch to task clock here, could you tell me why?
Don't we have to maintain the boot command parameter for the shift?

  	curr->sched_class->task_tick(rq, curr, 0);
  	if (sched_feat(LATENCY_WARN))
  		resched_latency = cpu_resched_latency(rq);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 11d3be829302..07050955d6e0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4936,7 +4936,7 @@ static inline unsigned long get_actual_cpu_capacity(int cpu)
  {
  	unsigned long capacity = arch_scale_cpu_capacity(cpu);
  
-	capacity -= max(thermal_load_avg(cpu_rq(cpu)), cpufreq_get_pressure(cpu));
+	capacity -= max(hw_load_avg(cpu_rq(cpu)), cpufreq_get_pressure(cpu));
  
  	return capacity;
  }
@@ -4968,7 +4968,7 @@ static inline int util_fits_cpu(unsigned long util,
  	 * Similarly if a task is capped to arch_scale_cpu_capacity(little_cpu), it
  	 * should fit a little cpu even if there's some pressure.
  	 *
-	 * Only exception is for thermal pressure since it has a direct impact
+	 * Only exception is for hw or cpufreq pressure since it has a direct impact

HW?

  	 * on available OPP of the system.
  	 *
  	 * We honour it for uclamp_min only as a drop in performance level
@@ -9224,7 +9224,7 @@ static inline bool others_have_blocked(struct rq *rq)
  	if (READ_ONCE(rq->avg_dl.util_avg))
  		return true;
  
-	if (thermal_load_avg(rq))
+	if (hw_load_avg(rq))
  		return true;
  
  #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
@@ -9256,7 +9256,7 @@ static bool __update_blocked_others(struct rq *rq, bool *done)
  {
  	const struct sched_class *curr_class;
  	u64 now = rq_clock_pelt(rq);
-	unsigned long thermal_pressure;
+	unsigned long hw_pressure;
  	bool decayed;
  
  	/*
@@ -9265,11 +9265,11 @@ static bool __update_blocked_others(struct rq *rq, bool *done)
  	 */
  	curr_class = rq->curr->sched_class;
  
-	thermal_pressure = arch_scale_thermal_pressure(cpu_of(rq));
+	hw_pressure = arch_scale_hw_pressure(cpu_of(rq));
  
  	decayed = update_rt_rq_load_avg(now, rq, curr_class == &rt_sched_class) |
  		  update_dl_rq_load_avg(now, rq, curr_class == &dl_sched_class) |
-		  update_thermal_load_avg(rq_clock_thermal(rq), rq, thermal_pressure) |
+		  update_hw_load_avg(now, rq, hw_pressure) |

Here also the rq_clock_thermal() is not used anymore. Shouldn't we
remove the rq_clock_thermal() if not used anymore (I cannot se this in
the patch)?

  		  update_irq_load_avg(rq, 0);
  
  	if (others_have_blocked(rq))

[snip]

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e58a54bda77d..7eaf186d071e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1078,8 +1078,8 @@ struct rq {
  #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
  	struct sched_avg	avg_irq;
  #endif
-#ifdef CONFIG_SCHED_THERMAL_PRESSURE
-	struct sched_avg	avg_thermal;
+#ifdef CONFIG_SCHED_HW_PRESSURE
+	struct sched_avg	avg_hw;
  #endif
  	u64			idle_stamp;
  	u64			avg_idle;

I don't see that rq_clock_thermal() removal in this header.
Maybe I miss some patch?

BTW, I like the name 'HW pressure' for this information/signal.