Re: [PATCH] cpufreq: ondemand: Change the calculation of target frequency

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Sorry for joining the party so late.. I was running away from reviewing it :(

On 31 May 2013 02:37, Stratos Karafotis <stratosk@xxxxxxxxxxxx> wrote:
> Ondemand calculates load in terms of frequency and increases it only
> if the load_freq is greater than up_threshold multiplied by current
> or average frequency. This seems to produce oscillations of frequency
> between min and max because, for example, a relatively small load can
> easily saturate minimum frequency and lead the CPU to max. Then, the
> CPU will decrease back to min due to a small load_freq.
>
> This patch changes the calculation method of load and target frequency
> considering 2 points:
> - Load computation should be independent from current or average
> measured frequency. For example an absolute load 80% at 100MHz is not
> necessarily equivalent to 8% at 1000MHz in the next sampling interval.
> - Target frequency should be increased to any value of frequency table
> proportional to absolute load, instead to only the max.
>
> The patch also removes the unused code as a result of the above changes.
>
> Tested on Intel i7-3770 CPU @ 3.40GHz and on Quad core 1500MHz Krait.
> Phoronix benchmark of Linux Kernel Compilation 3.1 test shows an
> increase ~1.5% in performance. cpufreq_stats (time_in_state) shows
> that middle frequencies are used more, with this patch. Highest
> and lowest frequencies were used less by ~9%
>
> Signed-off-by: Stratos Karafotis <stratosk@xxxxxxxxxxxx>
> ---
>  arch/x86/include/asm/processor.h   | 29 ----------------------
>  drivers/cpufreq/Makefile           |  2 +-
>  drivers/cpufreq/acpi-cpufreq.c     |  5 ----
>  drivers/cpufreq/cpufreq.c          | 21 ----------------
>  drivers/cpufreq/cpufreq_governor.c | 10 +-------
>  drivers/cpufreq/cpufreq_governor.h |  1 -
>  drivers/cpufreq/cpufreq_ondemand.c | 39 ++++++-----------------------
>  drivers/cpufreq/mperf.c            | 51 --------------------------------------
>  drivers/cpufreq/mperf.h            |  9 -------
>  include/linux/cpufreq.h            |  6 -----
>  10 files changed, 9 insertions(+), 164 deletions(-)
>  delete mode 100644 drivers/cpufreq/mperf.c
>  delete mode 100644 drivers/cpufreq/mperf.h

I believe you should have removed other users of getavg() in a separate
patch and also cc'd relevant people so that you can some review comments
from  them.

> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> index 22224b3..2874a3b 100644
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -941,35 +941,6 @@ extern int set_tsc_mode(unsigned int val);
>
>  extern u16 amd_get_nb_id(int cpu);
>
> -struct aperfmperf {
> -       u64 aperf, mperf;
> -};
> -
> -static inline void get_aperfmperf(struct aperfmperf *am)
> -{
> -       WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_APERFMPERF));
> -
> -       rdmsrl(MSR_IA32_APERF, am->aperf);
> -       rdmsrl(MSR_IA32_MPERF, am->mperf);
> -}
> -
> -#define APERFMPERF_SHIFT 10
> -
> -static inline
> -unsigned long calc_aperfmperf_ratio(struct aperfmperf *old,
> -                                   struct aperfmperf *new)
> -{
> -       u64 aperf = new->aperf - old->aperf;
> -       u64 mperf = new->mperf - old->mperf;
> -       unsigned long ratio = aperf;
> -
> -       mperf >>= APERFMPERF_SHIFT;
> -       if (mperf)
> -               ratio = div64_u64(aperf, mperf);
> -
> -       return ratio;
> -}
> -
>  extern unsigned long arch_align_stack(unsigned long sp);
>  extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
>
> diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
> index 315b923..4519fb1 100644
> --- a/drivers/cpufreq/Makefile
> +++ b/drivers/cpufreq/Makefile
> @@ -23,7 +23,7 @@ obj-$(CONFIG_GENERIC_CPUFREQ_CPU0)    += cpufreq-cpu0.o
>  # powernow-k8 can load then. ACPI is preferred to all other hardware-specific drivers.
>  # speedstep-* is preferred over p4-clockmod.
>
> -obj-$(CONFIG_X86_ACPI_CPUFREQ)         += acpi-cpufreq.o mperf.o
> +obj-$(CONFIG_X86_ACPI_CPUFREQ)         += acpi-cpufreq.o
>  obj-$(CONFIG_X86_POWERNOW_K8)          += powernow-k8.o
>  obj-$(CONFIG_X86_PCC_CPUFREQ)          += pcc-cpufreq.o
>  obj-$(CONFIG_X86_POWERNOW_K6)          += powernow-k6.o
> diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
> index 11b8b4b..0025cdd 100644
> --- a/drivers/cpufreq/acpi-cpufreq.c
> +++ b/drivers/cpufreq/acpi-cpufreq.c
> @@ -45,7 +45,6 @@
>  #include <asm/msr.h>
>  #include <asm/processor.h>
>  #include <asm/cpufeature.h>
> -#include "mperf.h"
>
>  MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
>  MODULE_DESCRIPTION("ACPI Processor P-States Driver");
> @@ -842,10 +841,6 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
>         /* notify BIOS that we exist */
>         acpi_processor_notify_smm(THIS_MODULE);
>
> -       /* Check for APERF/MPERF support in hardware */
> -       if (boot_cpu_has(X86_FEATURE_APERFMPERF))
> -               acpi_cpufreq_driver.getavg = cpufreq_get_measured_perf;
> -
>         pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
>         for (i = 0; i < perf->state_count; i++)
>                 pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n",
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index 2d53f47..04ceddc 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -1502,27 +1502,6 @@ no_policy:
>  }
>  EXPORT_SYMBOL_GPL(cpufreq_driver_target);
>
> -int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
> -{
> -       int ret = 0;
> -
> -       if (cpufreq_disabled())
> -               return ret;
> -
> -       if (!cpufreq_driver->getavg)
> -               return 0;
> -
> -       policy = cpufreq_cpu_get(policy->cpu);
> -       if (!policy)
> -               return -EINVAL;
> -
> -       ret = cpufreq_driver->getavg(policy, cpu);
> -
> -       cpufreq_cpu_put(policy);
> -       return ret;
> -}
> -EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
> -
>  /*
>   * when "event" is CPUFREQ_GOV_LIMITS
>   */
> diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
> index 5af40ad..eeab30a 100644
> --- a/drivers/cpufreq/cpufreq_governor.c
> +++ b/drivers/cpufreq/cpufreq_governor.c
> @@ -97,7 +97,7 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
>
>         policy = cdbs->cur_policy;
>
> -       /* Get Absolute Load (in terms of freq for ondemand gov) */
> +       /* Get Absolute Load */
>         for_each_cpu(j, policy->cpus) {
>                 struct cpu_dbs_common_info *j_cdbs;
>                 u64 cur_wall_time, cur_idle_time;
> @@ -148,14 +148,6 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
>
>                 load = 100 * (wall_time - idle_time) / wall_time;
>
> -               if (dbs_data->cdata->governor == GOV_ONDEMAND) {
> -                       int freq_avg = __cpufreq_driver_getavg(policy, j);
> -                       if (freq_avg <= 0)
> -                               freq_avg = policy->cur;
> -
> -                       load *= freq_avg;
> -               }
> -
>                 if (load > max_load)
>                         max_load = load;
>         }
> diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
> index e16a961..e899c11 100644
> --- a/drivers/cpufreq/cpufreq_governor.h
> +++ b/drivers/cpufreq/cpufreq_governor.h
> @@ -169,7 +169,6 @@ struct od_dbs_tuners {
>         unsigned int sampling_rate;
>         unsigned int sampling_down_factor;
>         unsigned int up_threshold;
> -       unsigned int adj_up_threshold;
>         unsigned int powersave_bias;
>         unsigned int io_is_busy;
>  };
> diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
> index 4b9bb5d..c1e6d3e 100644
> --- a/drivers/cpufreq/cpufreq_ondemand.c
> +++ b/drivers/cpufreq/cpufreq_ondemand.c
> @@ -29,11 +29,9 @@
>  #include "cpufreq_governor.h"
>
>  /* On-demand governor macros */
> -#define DEF_FREQUENCY_DOWN_DIFFERENTIAL                (10)
>  #define DEF_FREQUENCY_UP_THRESHOLD             (80)
>  #define DEF_SAMPLING_DOWN_FACTOR               (1)
>  #define MAX_SAMPLING_DOWN_FACTOR               (100000)
> -#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL      (3)
>  #define MICRO_FREQUENCY_UP_THRESHOLD           (95)
>  #define MICRO_FREQUENCY_MIN_SAMPLE_RATE                (10000)
>  #define MIN_FREQUENCY_UP_THRESHOLD             (11)
> @@ -159,14 +157,10 @@ static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq)
>
>  /*
>   * Every sampling_rate, we check, if current idle time is less than 20%
> - * (default), then we try to increase frequency. Every sampling_rate, we look
> - * for the lowest frequency which can sustain the load while keeping idle time
> - * over 30%. If such a frequency exist, we try to decrease to this frequency.
> - *
> - * Any frequency increase takes it to the maximum frequency. Frequency reduction
> - * happens at minimum steps of 5% (default) of current frequency
> + * (default), then we try to increase frequency. Else, we adjust the frequency
> + * proportional to load.
>   */
> -static void od_check_cpu(int cpu, unsigned int load_freq)
> +static void od_check_cpu(int cpu, unsigned int load)
>  {
>         struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
>         struct cpufreq_policy *policy = dbs_info->cdbs.cur_policy;
> @@ -176,29 +170,17 @@ static void od_check_cpu(int cpu, unsigned int load_freq)
>         dbs_info->freq_lo = 0;
>
>         /* Check for frequency increase */
> -       if (load_freq > od_tuners->up_threshold * policy->cur) {
> +       if (load > od_tuners->up_threshold) {
>                 /* If switching to max speed, apply sampling_down_factor */
>                 if (policy->cur < policy->max)
>                         dbs_info->rate_mult =
>                                 od_tuners->sampling_down_factor;
>                 dbs_freq_increase(policy, policy->max);
>                 return;
> -       }
> -
> -       /* Check for frequency decrease */
> -       /* if we cannot reduce the frequency anymore, break out early */
> -       if (policy->cur == policy->min)
> -               return;
> -
> -       /*
> -        * The optimal frequency is the frequency that is the lowest that can
> -        * support the current CPU usage without triggering the up policy. To be
> -        * safe, we focus 10 points under the threshold.
> -        */
> -       if (load_freq < od_tuners->adj_up_threshold
> -                       * policy->cur) {
> +       } else {
> +               /* Calculate the next frequency proportional to load */
>                 unsigned int freq_next;
> -               freq_next = load_freq / od_tuners->adj_up_threshold;
> +               freq_next = load * policy->max / 100;
>
>                 /* No longer fully busy, reset rate_mult */
>                 dbs_info->rate_mult = 1;
> @@ -372,9 +354,6 @@ static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf,
>                         input < MIN_FREQUENCY_UP_THRESHOLD) {
>                 return -EINVAL;
>         }
> -       /* Calculate the new adj_up_threshold */
> -       od_tuners->adj_up_threshold += input;
> -       od_tuners->adj_up_threshold -= od_tuners->up_threshold;
>
>         od_tuners->up_threshold = input;
>         return count;
> @@ -523,8 +502,6 @@ static int od_init(struct dbs_data *dbs_data)
>         if (idle_time != -1ULL) {
>                 /* Idle micro accounting is supported. Use finer thresholds */
>                 tuners->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD;
> -               tuners->adj_up_threshold = MICRO_FREQUENCY_UP_THRESHOLD -
> -                       MICRO_FREQUENCY_DOWN_DIFFERENTIAL;
>                 /*
>                  * In nohz/micro accounting case we set the minimum frequency
>                  * not depending on HZ, but fixed (very low). The deferred
> @@ -533,8 +510,6 @@ static int od_init(struct dbs_data *dbs_data)
>                 dbs_data->min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE;
>         } else {
>                 tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD;
> -               tuners->adj_up_threshold = DEF_FREQUENCY_UP_THRESHOLD -
> -                       DEF_FREQUENCY_DOWN_DIFFERENTIAL;
>
>                 /* For correct statistics, we need 10 ticks for each measure */
>                 dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO *
> diff --git a/drivers/cpufreq/mperf.c b/drivers/cpufreq/mperf.c
> deleted file mode 100644
> index 911e193..0000000
> --- a/drivers/cpufreq/mperf.c
> +++ /dev/null
> @@ -1,51 +0,0 @@
> -#include <linux/kernel.h>
> -#include <linux/smp.h>
> -#include <linux/module.h>
> -#include <linux/init.h>
> -#include <linux/cpufreq.h>
> -#include <linux/slab.h>
> -
> -#include "mperf.h"
> -
> -static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf);
> -
> -/* Called via smp_call_function_single(), on the target CPU */
> -static void read_measured_perf_ctrs(void *_cur)
> -{
> -       struct aperfmperf *am = _cur;
> -
> -       get_aperfmperf(am);
> -}
> -
> -/*
> - * Return the measured active (C0) frequency on this CPU since last call
> - * to this function.
> - * Input: cpu number
> - * Return: Average CPU frequency in terms of max frequency (zero on error)
> - *
> - * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance
> - * over a period of time, while CPU is in C0 state.
> - * IA32_MPERF counts at the rate of max advertised frequency
> - * IA32_APERF counts at the rate of actual CPU frequency
> - * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
> - * no meaning should be associated with absolute values of these MSRs.
> - */
> -unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy,
> -                                       unsigned int cpu)
> -{
> -       struct aperfmperf perf;
> -       unsigned long ratio;
> -       unsigned int retval;
> -
> -       if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1))
> -               return 0;
> -
> -       ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf);
> -       per_cpu(acfreq_old_perf, cpu) = perf;
> -
> -       retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT;
> -
> -       return retval;
> -}
> -EXPORT_SYMBOL_GPL(cpufreq_get_measured_perf);
> -MODULE_LICENSE("GPL");
> diff --git a/drivers/cpufreq/mperf.h b/drivers/cpufreq/mperf.h
> deleted file mode 100644
> index 5dbf295..0000000
> --- a/drivers/cpufreq/mperf.h
> +++ /dev/null
> @@ -1,9 +0,0 @@
> -/*
> - *  (c) 2010 Advanced Micro Devices, Inc.
> - *  Your use of this code is subject to the terms and conditions of the
> - *  GNU general public license version 2. See "COPYING" or
> - *  http://www.gnu.org/licenses/gpl.html
> - */
> -
> -unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy,
> -                                       unsigned int cpu);
> diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
> index 037d36a..60fcb42 100644
> --- a/include/linux/cpufreq.h
> +++ b/include/linux/cpufreq.h
> @@ -211,10 +211,6 @@ extern int __cpufreq_driver_target(struct cpufreq_policy *policy,
>                                    unsigned int target_freq,
>                                    unsigned int relation);
>
> -
> -extern int __cpufreq_driver_getavg(struct cpufreq_policy *policy,
> -                                  unsigned int cpu);
> -
>  int cpufreq_register_governor(struct cpufreq_governor *governor);
>  void cpufreq_unregister_governor(struct cpufreq_governor *governor);
>
> @@ -254,8 +250,6 @@ struct cpufreq_driver {
>         unsigned int    (*get)  (unsigned int cpu);
>
>         /* optional */
> -       unsigned int (*getavg)  (struct cpufreq_policy *policy,
> -                                unsigned int cpu);
>         int     (*bios_limit)   (int cpu, unsigned int *limit);
>
>         int     (*exit)         (struct cpufreq_policy *policy);
> --
> 1.8.1.4
>
--
To unsubscribe from this list: send the line "unsubscribe cpufreq" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel Devel]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Forum]     [Linux SCSI]

  Powered by Linux