From: Vaidyanathan Srinivasan <svaidy@xxxxxxxxxxxxxxxxxx> Backend driver to dynamically set voltage and frequency on IBM POWER non-virtualized platforms. Power management SPRs are used to set the required PState. This driver works in conjunction with cpufreq governors like 'ondemand' to provide a demand based frequency and voltage setting on IBM POWER non-virtualized platforms. PState table is obtained from OPAL v3 firmware through device tree. powernv_cpufreq back-end driver would parse the relevant device-tree nodes and initialise the cpufreq subsystem on powernv platform. The code was originally written by svaidy@xxxxxxxxxxxxxxxxxx. Over time it was modified to accomodate bug-fixes as well as updates to the the cpu-freq core. Relevant portions of the change logs corresponding to those modifications are noted below: * The policy->cpus needs to be populated in a hotplug-invariant manner instead of using cpu_sibling_mask() which varies with cpu-hotplug. This is because the cpufreq core code copies this content into policy->related_cpus mask which should not vary on cpu-hotplug. [Authored by srivatsa.bhat@xxxxxxxxxxxxxxxxxx] * Create a helper routine that can return the cpu-frequency for the corresponding pstate_id. Also, cache the values of the pstate_max, pstate_min and pstate_nominal and nr_pstates in a static structure so that they can be reused in the future to perform any validations. [Authored by ego@xxxxxxxxxxxxxxxxxx] * Create a driver attribute named cpuinfo_nominal_freq which creates a sysfs read-only file named cpuinfo_nominal_freq. Export the frequency corresponding to the nominal_pstate through this interface. Nominal frequency is the highest non-turbo frequency for the platform. This is generally used for setting governor policies from user space for optimal energy efficiency. [Authored by ego@xxxxxxxxxxxxxxxxxx] * Implement a powernv_cpufreq_get(unsigned int cpu) method which will return the current operating frequency. Export this via the sysfs interface cpuinfo_cur_freq by setting powernv_cpufreq_driver.get to powernv_cpufreq_get(). [Authored by ego@xxxxxxxxxxxxxxxxxx] [Change log updated by ego@xxxxxxxxxxxxxxxxxx] Reviewed-by: Preeti U Murthy <preeti@xxxxxxxxxxxxxxxxxx> Signed-off-by: Vaidyanathan Srinivasan <svaidy@xxxxxxxxxxxxxxxxxx> Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@xxxxxxxxxxxxxxxxxx> Signed-off-by: Anton Blanchard <anton@xxxxxxxxx> Signed-off-by: Gautham R. Shenoy <ego@xxxxxxxxxxxxxxxxxx> --- arch/powerpc/include/asm/reg.h | 4 + drivers/cpufreq/Kconfig.powerpc | 8 + drivers/cpufreq/Makefile | 1 + drivers/cpufreq/powernv-cpufreq.c | 342 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 355 insertions(+) create mode 100644 drivers/cpufreq/powernv-cpufreq.c diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 1a36b8e..2189f8f 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -271,6 +271,10 @@ #define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */ #define SPRN_IC 0x350 /* Virtual Instruction Count */ #define SPRN_VTB 0x351 /* Virtual Time Base */ +#define SPRN_PMICR 0x354 /* Power Management Idle Control Reg */ +#define SPRN_PMSR 0x355 /* Power Management Status Reg */ +#define SPRN_PMCR 0x374 /* Power Management Control Register */ + /* HFSCR and FSCR bit numbers are the same */ #define FSCR_TAR_LG 8 /* Enable Target Address Register */ #define FSCR_EBB_LG 7 /* Enable Event Based Branching */ diff --git a/drivers/cpufreq/Kconfig.powerpc b/drivers/cpufreq/Kconfig.powerpc index ca0021a..72564b7 100644 --- a/drivers/cpufreq/Kconfig.powerpc +++ b/drivers/cpufreq/Kconfig.powerpc @@ -54,3 +54,11 @@ config PPC_PASEMI_CPUFREQ help This adds the support for frequency switching on PA Semi PWRficient processors. + +config POWERNV_CPUFREQ + tristate "CPU frequency scaling for IBM POWERNV platform" + depends on PPC_POWERNV + default y + help + This adds support for CPU frequency switching on IBM POWERNV + platform diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 7494565..0dbb963 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -86,6 +86,7 @@ obj-$(CONFIG_PPC_CORENET_CPUFREQ) += ppc-corenet-cpufreq.o obj-$(CONFIG_CPU_FREQ_PMAC) += pmac32-cpufreq.o obj-$(CONFIG_CPU_FREQ_PMAC64) += pmac64-cpufreq.o obj-$(CONFIG_PPC_PASEMI_CPUFREQ) += pasemi-cpufreq.o +obj-$(CONFIG_POWERNV_CPUFREQ) += powernv-cpufreq.o ################################################################################## # Other platform drivers diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c new file mode 100644 index 0000000..e1e5197 --- /dev/null +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -0,0 +1,342 @@ +/* + * POWERNV cpufreq driver for the IBM POWER processors + * + * (C) Copyright IBM 2014 + * + * Author: Vaidyanathan Srinivasan <svaidy at linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#define pr_fmt(fmt) "powernv-cpufreq: " fmt + +#include <linux/kernel.h> +#include <linux/sysfs.h> +#include <linux/cpumask.h> +#include <linux/module.h> +#include <linux/cpufreq.h> +#include <linux/smp.h> +#include <linux/of.h> + +#include <asm/cputhreads.h> +#include <asm/reg.h> + +#define POWERNV_MAX_PSTATES 256 + +static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1]; +static int powernv_pstate_ids[POWERNV_MAX_PSTATES+1]; + +/* + * Note: The set of pstates consists of contiguous integers, the + * smallest of which is indicated by powernv_pstate_info.min, the + * largest of which is indicated by powernv_pstate_info.max. + * + * The nominal pstate is the highest non-turbo pstate in this + * platform. This is indicated by powernv_pstate_info.nominal. + */ +static struct powernv_pstate_info { + int min; + int max; + int nominal; + int nr_pstates; +} powernv_pstate_info; + +/* + * Initialize the freq table based on data obtained + * from the firmware passed via device-tree + */ +static int init_powernv_pstates(void) +{ + struct device_node *power_mgt; + int i, pstate_min, pstate_max, pstate_nominal, nr_pstates = 0; + const __be32 *pstate_ids, *pstate_freqs; + u32 len_ids, len_freqs; + + power_mgt = of_find_node_by_path("/ibm,opal/power-mgt"); + if (!power_mgt) { + pr_warn("power-mgt node not found\n"); + return -ENODEV; + } + + if (of_property_read_u32(power_mgt, "ibm,pstate-min", &pstate_min)) { + pr_warn("ibm,pstate-min node not found\n"); + return -ENODEV; + } + + if (of_property_read_u32(power_mgt, "ibm,pstate-max", &pstate_max)) { + pr_warn("ibm,pstate-max node not found\n"); + return -ENODEV; + } + + if (of_property_read_u32(power_mgt, "ibm,pstate-nominal", + &pstate_nominal)) { + pr_warn("ibm,pstate-nominal not found\n"); + return -ENODEV; + } + pr_info("cpufreq pstate min %d nominal %d max %d\n", pstate_min, + pstate_nominal, pstate_max); + + pstate_ids = of_get_property(power_mgt, "ibm,pstate-ids", &len_ids); + if (!pstate_ids) { + pr_warn("ibm,pstate-ids not found\n"); + return -ENODEV; + } + + pstate_freqs = of_get_property(power_mgt, "ibm,pstate-frequencies-mhz", + &len_freqs); + if (!pstate_freqs) { + pr_warn("ibm,pstate-frequencies-mhz not found\n"); + return -ENODEV; + } + + WARN_ON(len_ids != len_freqs); + nr_pstates = min(len_ids, len_freqs) / sizeof(u32); + if (!nr_pstates) { + pr_warn("No PStates found\n"); + return -ENODEV; + } + + pr_debug("NR PStates %d\n", nr_pstates); + for (i = 0; i < nr_pstates; i++) { + u32 id = be32_to_cpu(pstate_ids[i]); + u32 freq = be32_to_cpu(pstate_freqs[i]); + + pr_debug("PState id %d freq %d MHz\n", id, freq); + powernv_freqs[i].frequency = freq * 1000; /* kHz */ + powernv_pstate_ids[i] = id; + } + /* End of list marker entry */ + powernv_freqs[i].frequency = CPUFREQ_TABLE_END; + + powernv_pstate_info.min = pstate_min; + powernv_pstate_info.max = pstate_max; + powernv_pstate_info.nominal = pstate_nominal; + powernv_pstate_info.nr_pstates = nr_pstates; + + return 0; +} + +/* Returns the CPU frequency corresponding to the pstate_id. */ +static unsigned int pstate_id_to_freq(int pstate_id) +{ + int i; + + i = powernv_pstate_info.max - pstate_id; + BUG_ON(i >= powernv_pstate_info.nr_pstates || i < 0); + + return powernv_freqs[i].frequency; +} + +/* + * cpuinfo_nominal_freq_show - Show the nominal CPU frequency as indicated by + * the firmware + */ +static ssize_t cpuinfo_nominal_freq_show(struct cpufreq_policy *policy, + char *buf) +{ + return sprintf(buf, "%u\n", + pstate_id_to_freq(powernv_pstate_info.nominal)); +} + +struct freq_attr cpufreq_freq_attr_cpuinfo_nominal_freq = + __ATTR_RO(cpuinfo_nominal_freq); + +static struct freq_attr *powernv_cpu_freq_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + &cpufreq_freq_attr_cpuinfo_nominal_freq, + NULL, +}; + +/* Helper routines */ + +/* Access helpers to power mgt SPR */ + +static inline unsigned long get_pmspr(unsigned long sprn) +{ + switch (sprn) { + case SPRN_PMCR: + return mfspr(SPRN_PMCR); + + case SPRN_PMICR: + return mfspr(SPRN_PMICR); + + case SPRN_PMSR: + return mfspr(SPRN_PMSR); + } + BUG(); +} + +static inline void set_pmspr(unsigned long sprn, unsigned long val) +{ + switch (sprn) { + case SPRN_PMCR: + mtspr(SPRN_PMCR, val); + return; + + case SPRN_PMICR: + mtspr(SPRN_PMICR, val); + return; + } + BUG(); +} + +/* + * Use objects of this type to query/update + * pstates on a remote CPU via smp_call_function. + */ +struct powernv_smp_call_data { + unsigned int freq; + int pstate_id; +}; + +/* + * powernv_read_cpu_freq: Reads the current frequency on this CPU. + * + * Called via smp_call_function. + * + * Note: The caller of the smp_call_function should pass an argument of + * the type 'struct powernv_smp_call_data *' along with this function. + * + * The current frequency on this CPU will be returned via + * ((struct powernv_smp_call_data *)arg)->freq; + */ +static void powernv_read_cpu_freq(void *arg) +{ + unsigned long pmspr_val; + s8 local_pstate_id; + struct powernv_smp_call_data *freq_data = arg; + + pmspr_val = get_pmspr(SPRN_PMSR); + + /* + * The local pstate id corresponds bits 48..55 in the PMSR. + * Note: Watch out for the sign! + */ + local_pstate_id = (pmspr_val >> 48) & 0xFF; + freq_data->pstate_id = local_pstate_id; + freq_data->freq = pstate_id_to_freq(freq_data->pstate_id); + + pr_debug("cpu %d pmsr %016lX pstate_id %d frequency %d kHz\n", + raw_smp_processor_id(), pmspr_val, freq_data->pstate_id, + freq_data->freq); +} + +/* + * powernv_cpufreq_get: Returns the CPU frequency as reported by the + * firmware for CPU 'cpu'. This value is reported through the sysfs + * file cpuinfo_cur_freq. + */ +unsigned int powernv_cpufreq_get(unsigned int cpu) +{ + struct powernv_smp_call_data freq_data; + + smp_call_function_any(cpu_sibling_mask(cpu), powernv_read_cpu_freq, + &freq_data, 1); + + return freq_data.freq; +} + +/* + * set_pstate: Sets the pstate on this CPU. + * + * This is called via an smp_call_function. + * + * The caller must ensure that freq_data is of the type + * (struct powernv_smp_call_data *) and the pstate_id which needs to be set + * on this CPU should be present in freq_data->pstate_id. + */ +static void set_pstate(void *freq_data) +{ + unsigned long val; + unsigned long pstate_ul = + ((struct powernv_smp_call_data *) freq_data)->pstate_id; + + val = get_pmspr(SPRN_PMCR); + val = val & 0x0000FFFFFFFFFFFFULL; + + pstate_ul = pstate_ul & 0xFF; + + /* Set both global(bits 56..63) and local(bits 48..55) PStates */ + val = val | (pstate_ul << 56) | (pstate_ul << 48); + + pr_debug("Setting cpu %d pmcr to %016lX\n", + raw_smp_processor_id(), val); + set_pmspr(SPRN_PMCR, val); +} + +/* + * powernv_cpufreq_target_index: Sets the frequency corresponding to + * the cpufreq table entry indexed by new_index on the cpus in the + * mask policy->cpus + */ +static int powernv_cpufreq_target_index(struct cpufreq_policy *policy, + unsigned int new_index) +{ + struct powernv_smp_call_data freq_data; + + freq_data.pstate_id = powernv_pstate_ids[new_index]; + + /* + * Use smp_call_function to send IPI and execute the + * mtspr on target CPU. We could do that without IPI + * if current CPU is within policy->cpus (core) + */ + smp_call_function_any(policy->cpus, set_pstate, &freq_data, 1); + + return 0; +} + +static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy) +{ + int base, i; + + base = cpu_first_thread_sibling(policy->cpu); + + for (i = 0; i < threads_per_core; i++) + cpumask_set_cpu(base + i, policy->cpus); + + return cpufreq_table_validate_and_show(policy, powernv_freqs); +} + +static struct cpufreq_driver powernv_cpufreq_driver = { + .name = "powernv-cpufreq", + .flags = CPUFREQ_CONST_LOOPS, + .init = powernv_cpufreq_cpu_init, + .verify = cpufreq_generic_frequency_table_verify, + .target_index = powernv_cpufreq_target_index, + .get = powernv_cpufreq_get, + .attr = powernv_cpu_freq_attr, +}; + +static int __init powernv_cpufreq_init(void) +{ + int rc = 0; + + /* Discover pstates from device tree and init */ + rc = init_powernv_pstates(); + if (rc) { + pr_info("powernv-cpufreq disabled. System does not support PState control\n"); + return rc; + } + + return cpufreq_register_driver(&powernv_cpufreq_driver); +} +module_init(powernv_cpufreq_init); + +static void __exit powernv_cpufreq_exit(void) +{ + cpufreq_unregister_driver(&powernv_cpufreq_driver); +} +module_exit(powernv_cpufreq_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Vaidyanathan Srinivasan <svaidy at linux.vnet.ibm.com>"); -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe cpufreq" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html