On Thu, 14 Dec 2023 at 09:30, Lukasz Luba <lukasz.luba@xxxxxxx> wrote: > > > On 12/12/23 14:27, Vincent Guittot wrote: > > Now that cpufreq provides a pressure value to the scheduler, rename > > arch_update_thermal_pressure into hw pressure to reflect that it returns > > a pressure applied by HW with a high frequency and which needs filtering. > > I would elaborte this meaning 'filtering' here. Something like: > '... high frequency and which needs filtering to smooth the singal and > get an average value. That reflects available capacity of the CPU in > longer period' Ok I will update the commit message to provide more details > > > This pressure is not always related to thermal mitigation but can also be > > generated by max current limitation as an example. > > > > Signed-off-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx> > > --- > > arch/arm/include/asm/topology.h | 6 ++--- > > arch/arm64/include/asm/topology.h | 6 ++--- > > drivers/base/arch_topology.c | 26 +++++++++---------- > > drivers/cpufreq/qcom-cpufreq-hw.c | 4 +-- > > include/linux/arch_topology.h | 8 +++--- > > include/linux/sched/topology.h | 8 +++--- > > .../{thermal_pressure.h => hw_pressure.h} | 14 +++++----- > > include/trace/events/sched.h | 2 +- > > init/Kconfig | 12 ++++----- > > kernel/sched/core.c | 8 +++--- > > kernel/sched/fair.c | 12 ++++----- > > kernel/sched/pelt.c | 18 ++++++------- > > kernel/sched/pelt.h | 16 ++++++------ > > kernel/sched/sched.h | 4 +-- > > 14 files changed, 72 insertions(+), 72 deletions(-) > > rename include/trace/events/{thermal_pressure.h => hw_pressure.h} (55%) > > > > diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h > > index 853c4f81ba4a..e175e8596b5d 100644 > > --- a/arch/arm/include/asm/topology.h > > +++ b/arch/arm/include/asm/topology.h > > @@ -22,9 +22,9 @@ > > /* Enable topology flag updates */ > > #define arch_update_cpu_topology topology_update_cpu_topology > > > > -/* Replace task scheduler's default thermal pressure API */ > > -#define arch_scale_thermal_pressure topology_get_thermal_pressure > > -#define arch_update_thermal_pressure topology_update_thermal_pressure > > +/* Replace task scheduler's default hw pressure API */ > > +#define arch_scale_hw_pressure topology_get_hw_pressure > > +#define arch_update_hw_pressure topology_update_hw_pressure > > > > #else > > > > diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h > > index a323b109b9c4..a427650bdfba 100644 > > --- a/arch/arm64/include/asm/topology.h > > +++ b/arch/arm64/include/asm/topology.h > > @@ -35,9 +35,9 @@ void update_freq_counters_refs(void); > > /* Enable topology flag updates */ > > #define arch_update_cpu_topology topology_update_cpu_topology > > > > -/* Replace task scheduler's default thermal pressure API */ > > -#define arch_scale_thermal_pressure topology_get_thermal_pressure > > -#define arch_update_thermal_pressure topology_update_thermal_pressure > > +/* Replace task scheduler's default hw pressure API */ > > s/hw/HW/ ? > > > +#define arch_scale_hw_pressure topology_get_hw_pressure > > +#define arch_update_hw_pressure topology_update_hw_pressure > > > > #include <asm-generic/topology.h> > > > > diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c > > index 0906114963ff..3d8dc9d5c3ad 100644 > > --- a/drivers/base/arch_topology.c > > +++ b/drivers/base/arch_topology.c > > @@ -22,7 +22,7 @@ > > #include <linux/units.h> > > > > #define CREATE_TRACE_POINTS > > -#include <trace/events/thermal_pressure.h> > > +#include <trace/events/hw_pressure.h> > > > > static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data); > > static struct cpumask scale_freq_counters_mask; > > @@ -160,26 +160,26 @@ void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity) > > per_cpu(cpu_scale, cpu) = capacity; > > } > > > > -DEFINE_PER_CPU(unsigned long, thermal_pressure); > > +DEFINE_PER_CPU(unsigned long, hw_pressure); > > > > /** > > - * topology_update_thermal_pressure() - Update thermal pressure for CPUs > > + * topology_update_hw_pressure() - Update hw pressure for CPUs > > same here: HW? > > > * @cpus : The related CPUs for which capacity has been reduced > > * @capped_freq : The maximum allowed frequency that CPUs can run at > > * > > - * Update the value of thermal pressure for all @cpus in the mask. The > > + * Update the value of hw pressure for all @cpus in the mask. The > > HW? > > > * cpumask should include all (online+offline) affected CPUs, to avoid > > * operating on stale data when hot-plug is used for some CPUs. The > > * @capped_freq reflects the currently allowed max CPUs frequency due to > > - * thermal capping. It might be also a boost frequency value, which is bigger > > + * hw capping. It might be also a boost frequency value, which is bigger > > HW? > > > * than the internal 'capacity_freq_ref' max frequency. In such case the > > * pressure value should simply be removed, since this is an indication that > > - * there is no thermal throttling. The @capped_freq must be provided in kHz. > > + * there is no hw throttling. The @capped_freq must be provided in kHz. > > HW? > > > */ > > -void topology_update_thermal_pressure(const struct cpumask *cpus, > > +void topology_update_hw_pressure(const struct cpumask *cpus, > > unsigned long capped_freq) > > { > > - unsigned long max_capacity, capacity, th_pressure; > > + unsigned long max_capacity, capacity, hw_pressure; > > u32 max_freq; > > int cpu; > > > > @@ -189,21 +189,21 @@ void topology_update_thermal_pressure(const struct cpumask *cpus, > > > > /* > > * Handle properly the boost frequencies, which should simply clean > > - * the thermal pressure value. > > + * the hw pressure value. > > HW? > > > */ > > if (max_freq <= capped_freq) > > capacity = max_capacity; > > else > > capacity = mult_frac(max_capacity, capped_freq, max_freq); > > > > - th_pressure = max_capacity - capacity; > > + hw_pressure = max_capacity - capacity; > > > > - trace_thermal_pressure_update(cpu, th_pressure); > > + trace_hw_pressure_update(cpu, hw_pressure); > > > > for_each_cpu(cpu, cpus) > > - WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure); > > + WRITE_ONCE(per_cpu(hw_pressure, cpu), hw_pressure); > > } > > -EXPORT_SYMBOL_GPL(topology_update_thermal_pressure); > > +EXPORT_SYMBOL_GPL(topology_update_hw_pressure); > > > > static ssize_t cpu_capacity_show(struct device *dev, > > struct device_attribute *attr, > > diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c > > index 70b0f21968a0..a619202ba81d 100644 > > --- a/drivers/cpufreq/qcom-cpufreq-hw.c > > +++ b/drivers/cpufreq/qcom-cpufreq-hw.c > > @@ -347,8 +347,8 @@ static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data) > > > > throttled_freq = freq_hz / HZ_PER_KHZ; > > > > - /* Update thermal pressure (the boost frequencies are accepted) */ > > - arch_update_thermal_pressure(policy->related_cpus, throttled_freq); > > + /* Update hw pressure (the boost frequencies are accepted) */ > > HW? > > > + arch_update_hw_pressure(policy->related_cpus, throttled_freq); > > > > [snip] > > > > > if (housekeeping_cpu(cpu, HK_TYPE_TICK)) > > @@ -5669,8 +5669,8 @@ void scheduler_tick(void) > > rq_lock(rq, &rf); > > > > update_rq_clock(rq); > > - thermal_pressure = arch_scale_thermal_pressure(cpu_of(rq)); > > - update_thermal_load_avg(rq_clock_thermal(rq), rq, thermal_pressure); > > + hw_pressure = arch_scale_hw_pressure(cpu_of(rq)); > > + update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure); > > We switch to task clock here, could you tell me why? > Don't we have to maintain the boot command parameter for the shift? This should have been part of the patch5 that I finally removed. IMO, the additional time shift with rq_clock_thermal is no more needed now that we have 2 separates signals > > > curr->sched_class->task_tick(rq, curr, 0); > > if (sched_feat(LATENCY_WARN)) > > resched_latency = cpu_resched_latency(rq); > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > > index 11d3be829302..07050955d6e0 100644 > > --- a/kernel/sched/fair.c > > +++ b/kernel/sched/fair.c > > @@ -4936,7 +4936,7 @@ static inline unsigned long get_actual_cpu_capacity(int cpu) > > { > > unsigned long capacity = arch_scale_cpu_capacity(cpu); > > > > - capacity -= max(thermal_load_avg(cpu_rq(cpu)), cpufreq_get_pressure(cpu)); > > + capacity -= max(hw_load_avg(cpu_rq(cpu)), cpufreq_get_pressure(cpu)); > > > > return capacity; > > } > > @@ -4968,7 +4968,7 @@ static inline int util_fits_cpu(unsigned long util, > > * Similarly if a task is capped to arch_scale_cpu_capacity(little_cpu), it > > * should fit a little cpu even if there's some pressure. > > * > > - * Only exception is for thermal pressure since it has a direct impact > > + * Only exception is for hw or cpufreq pressure since it has a direct impact > > HW? > > > * on available OPP of the system. > > * > > * We honour it for uclamp_min only as a drop in performance level > > @@ -9224,7 +9224,7 @@ static inline bool others_have_blocked(struct rq *rq) > > if (READ_ONCE(rq->avg_dl.util_avg)) > > return true; > > > > - if (thermal_load_avg(rq)) > > + if (hw_load_avg(rq)) > > return true; > > > > #ifdef CONFIG_HAVE_SCHED_AVG_IRQ > > @@ -9256,7 +9256,7 @@ static bool __update_blocked_others(struct rq *rq, bool *done) > > { > > const struct sched_class *curr_class; > > u64 now = rq_clock_pelt(rq); > > - unsigned long thermal_pressure; > > + unsigned long hw_pressure; > > bool decayed; > > > > /* > > @@ -9265,11 +9265,11 @@ static bool __update_blocked_others(struct rq *rq, bool *done) > > */ > > curr_class = rq->curr->sched_class; > > > > - thermal_pressure = arch_scale_thermal_pressure(cpu_of(rq)); > > + hw_pressure = arch_scale_hw_pressure(cpu_of(rq)); > > > > decayed = update_rt_rq_load_avg(now, rq, curr_class == &rt_sched_class) | > > update_dl_rq_load_avg(now, rq, curr_class == &dl_sched_class) | > > - update_thermal_load_avg(rq_clock_thermal(rq), rq, thermal_pressure) | > > + update_hw_load_avg(now, rq, hw_pressure) | > > Here also the rq_clock_thermal() is not used anymore. Shouldn't we > remove the rq_clock_thermal() if not used anymore (I cannot se this in > the patch)? > > > update_irq_load_avg(rq, 0); > > > > if (others_have_blocked(rq)) > > [snip] > > > diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h > > index e58a54bda77d..7eaf186d071e 100644 > > --- a/kernel/sched/sched.h > > +++ b/kernel/sched/sched.h > > @@ -1078,8 +1078,8 @@ struct rq { > > #ifdef CONFIG_HAVE_SCHED_AVG_IRQ > > struct sched_avg avg_irq; > > #endif > > -#ifdef CONFIG_SCHED_THERMAL_PRESSURE > > - struct sched_avg avg_thermal; > > +#ifdef CONFIG_SCHED_HW_PRESSURE > > + struct sched_avg avg_hw; > > #endif > > u64 idle_stamp; > > u64 avg_idle; > > I don't see that rq_clock_thermal() removal in this header. > Maybe I miss some patch? > > BTW, I like the name 'HW pressure' for this information/signal.