On Fri, Feb 25, 2022 at 9:46 AM Rafael J. Wysocki <rjw@xxxxxxxxxxxxx> wrote: > > On Thursday, February 24, 2022 9:08:30 AM CET Feng Tang wrote: ... > > > So it looks like a new mechanism is needed for that. > > > > If you think idle class is not the right place to solve it, I can > > also help testing new patches. > > So I have the appended experimental patch to address this issue that's not > been tested at all. Caveat emptor. Hi Rafael, O.K., you gave fair warning. The patch applied fine. It does not compile for me. The function cpuidle_update_retain_tick does not exist. Shouldn't it be somewhere in cpuidle.c? I used the function cpuidle_disable_device as a template for searching and comparing. Because all of my baseline results are with kernel 5.17-rc3, that is what I am still using. Error: ld: drivers/cpufreq/intel_pstate.o: in function `intel_pstate_update_perf_ctl': intel_pstate.c:(.text+0x2520): undefined reference to `cpuidle_update_retain_tick' ... Doug > > I've been looking for something relatively low-overhead and taking all of the > dependencies into account. > > --- > drivers/cpufreq/intel_pstate.c | 40 ++++++++++++++++++++++++++++--------- > drivers/cpuidle/governors/ladder.c | 6 +++-- > drivers/cpuidle/governors/menu.c | 2 + > drivers/cpuidle/governors/teo.c | 3 ++ > include/linux/cpuidle.h | 4 +++ > 5 files changed, 44 insertions(+), 11 deletions(-) > > Index: linux-pm/drivers/cpuidle/governors/menu.c > =================================================================== > --- linux-pm.orig/drivers/cpuidle/governors/menu.c > +++ linux-pm/drivers/cpuidle/governors/menu.c > @@ -284,6 +284,8 @@ static int menu_select(struct cpuidle_dr > if (unlikely(delta < 0)) { > delta = 0; > delta_tick = 0; > + } else if (dev->retain_tick) { > + delta = delta_tick; > } > data->next_timer_ns = delta; > > Index: linux-pm/drivers/cpuidle/governors/teo.c > =================================================================== > --- linux-pm.orig/drivers/cpuidle/governors/teo.c > +++ linux-pm/drivers/cpuidle/governors/teo.c > @@ -308,6 +308,9 @@ static int teo_select(struct cpuidle_dri > cpu_data->time_span_ns = local_clock(); > > duration_ns = tick_nohz_get_sleep_length(&delta_tick); > + if (dev->retain_tick) > + duration_ns = delta_tick; > + > cpu_data->sleep_length_ns = duration_ns; > > /* Check if there is any choice in the first place. */ > Index: linux-pm/include/linux/cpuidle.h > =================================================================== > --- linux-pm.orig/include/linux/cpuidle.h > +++ linux-pm/include/linux/cpuidle.h > @@ -93,6 +93,7 @@ struct cpuidle_device { > unsigned int registered:1; > unsigned int enabled:1; > unsigned int poll_time_limit:1; > + unsigned int retain_tick:1; > unsigned int cpu; > ktime_t next_hrtimer; > > @@ -172,6 +173,8 @@ extern int cpuidle_play_dead(void); > extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev); > static inline struct cpuidle_device *cpuidle_get_device(void) > {return __this_cpu_read(cpuidle_devices); } > + > +extern void cpuidle_update_retain_tick(bool val); > #else > static inline void disable_cpuidle(void) { } > static inline bool cpuidle_not_available(struct cpuidle_driver *drv, > @@ -211,6 +214,7 @@ static inline int cpuidle_play_dead(void > static inline struct cpuidle_driver *cpuidle_get_cpu_driver( > struct cpuidle_device *dev) {return NULL; } > static inline struct cpuidle_device *cpuidle_get_device(void) {return NULL; } > +static inline void cpuidle_update_retain_tick(bool val) { } > #endif > > #ifdef CONFIG_CPU_IDLE > Index: linux-pm/drivers/cpufreq/intel_pstate.c > =================================================================== > --- linux-pm.orig/drivers/cpufreq/intel_pstate.c > +++ linux-pm/drivers/cpufreq/intel_pstate.c > @@ -19,6 +19,7 @@ > #include <linux/list.h> > #include <linux/cpu.h> > #include <linux/cpufreq.h> > +#include <linux/cpuidle.h> > #include <linux/sysfs.h> > #include <linux/types.h> > #include <linux/fs.h> > @@ -1970,6 +1971,30 @@ static inline void intel_pstate_cppc_set > } > #endif /* CONFIG_ACPI_CPPC_LIB */ > > +static void intel_pstate_update_perf_ctl(struct cpudata *cpu) > +{ > + int pstate = cpu->pstate.current_pstate; > + > + /* > + * Avoid stopping the scheduler tick from cpuidle on CPUs in turbo > + * P-states to prevent them from getting back to the high frequency > + * right away after getting out of deep idle. > + */ > + cpuidle_update_retain_tick(pstate > cpu->pstate.max_pstate); > + wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate)); > +} > + > +static void intel_pstate_update_perf_ctl_wrapper(void *cpu_data) > +{ > + intel_pstate_update_perf_ctl(cpu_data); > +} > + > +static void intel_pstate_update_perf_ctl_on_cpu(struct cpudata *cpu) > +{ > + smp_call_function_single(cpu->cpu, intel_pstate_update_perf_ctl_wrapper, > + cpu, 1); > +} > + > static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) > { > trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); > @@ -1979,8 +2004,7 @@ static void intel_pstate_set_pstate(stru > * the CPU being updated, so force the register update to run on the > * right CPU. > */ > - wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL, > - pstate_funcs.get_val(cpu, pstate)); > + intel_pstate_update_perf_ctl_on_cpu(cpu); > } > > static void intel_pstate_set_min_pstate(struct cpudata *cpu) > @@ -2256,7 +2280,7 @@ static void intel_pstate_update_pstate(s > return; > > cpu->pstate.current_pstate = pstate; > - wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate)); > + intel_pstate_update_perf_ctl(cpu); > } > > static void intel_pstate_adjust_pstate(struct cpudata *cpu) > @@ -2843,11 +2867,9 @@ static void intel_cpufreq_perf_ctl_updat > u32 target_pstate, bool fast_switch) > { > if (fast_switch) > - wrmsrl(MSR_IA32_PERF_CTL, > - pstate_funcs.get_val(cpu, target_pstate)); > + intel_pstate_update_perf_ctl(cpu); > else > - wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL, > - pstate_funcs.get_val(cpu, target_pstate)); > + intel_pstate_update_perf_ctl_on_cpu(cpu); > } > > static int intel_cpufreq_update_pstate(struct cpufreq_policy *policy, > @@ -2857,6 +2879,8 @@ static int intel_cpufreq_update_pstate(s > int old_pstate = cpu->pstate.current_pstate; > > target_pstate = intel_pstate_prepare_request(cpu, target_pstate); > + cpu->pstate.current_pstate = target_pstate; > + > if (hwp_active) { > int max_pstate = policy->strict_target ? > target_pstate : cpu->max_perf_ratio; > @@ -2867,8 +2891,6 @@ static int intel_cpufreq_update_pstate(s > intel_cpufreq_perf_ctl_update(cpu, target_pstate, fast_switch); > } > > - cpu->pstate.current_pstate = target_pstate; > - > intel_cpufreq_trace(cpu, fast_switch ? INTEL_PSTATE_TRACE_FAST_SWITCH : > INTEL_PSTATE_TRACE_TARGET, old_pstate); > > Index: linux-pm/drivers/cpuidle/governors/ladder.c > =================================================================== > --- linux-pm.orig/drivers/cpuidle/governors/ladder.c > +++ linux-pm/drivers/cpuidle/governors/ladder.c > @@ -61,10 +61,10 @@ static inline void ladder_do_selection(s > * ladder_select_state - selects the next state to enter > * @drv: cpuidle driver > * @dev: the CPU > - * @dummy: not used > + * @stop_tick: Whether or not to stop the scheduler tick > */ > static int ladder_select_state(struct cpuidle_driver *drv, > - struct cpuidle_device *dev, bool *dummy) > + struct cpuidle_device *dev, bool *stop_tick) > { > struct ladder_device *ldev = this_cpu_ptr(&ladder_devices); > struct ladder_device_state *last_state; > @@ -73,6 +73,8 @@ static int ladder_select_state(struct cp > s64 latency_req = cpuidle_governor_latency_req(dev->cpu); > s64 last_residency; > > + *stop_tick = !dev->retain_tick; > + > /* Special case when user has set very strict latency requirement */ > if (unlikely(latency_req == 0)) { > ladder_do_selection(dev, ldev, last_idx, 0); > > >