On Tue, 2021-08-03 at 12:43 +0530, Viresh Kumar wrote: > On 30-07-21, 00:08, Hector Yuan wrote: > > From: "Hector.Yuan" <hector.yuan@xxxxxxxxxxxx> > > > > Add cpufreq HW support > > Please write a proper commit log, what you are adding and which SoCs > it will apply to. Also add a full stop (.) at the end. > OK, I will write down more details > > Signed-off-by: Hector.Yuan <hector.yuan@xxxxxxxxxxxx> > > --- > > drivers/cpufreq/Kconfig.arm | 12 ++ > > drivers/cpufreq/Makefile | 1 + > > drivers/cpufreq/mediatek-cpufreq-hw.c | 357 +++++++++++++++++++++++++++++++++ > > include/linux/cpufreq.h | 39 ++++ > > The changes to cpufreq.h should be done in a separate patch. > OK, will separate .h to another patch > > diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c > > new file mode 100644 > > index 0000000..ca50a3a > > --- /dev/null > > +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c > > @@ -0,0 +1,357 @@ > > +// SPDX-License-Identifier: GPL-2.0 > > +/* > > + * Copyright (c) 2020 MediaTek Inc. > > + */ > > + > > +#include <linux/bitfield.h> > > +#include <linux/cpufreq.h> > > +#include <linux/energy_model.h> > > +#include <linux/init.h> > > +#include <linux/iopoll.h> > > +#include <linux/kernel.h> > > +#include <linux/module.h> > > +#include <linux/of_address.h> > > +#include <linux/of_platform.h> > > +#include <linux/pm_qos.h> > > +#include <linux/slab.h> > > + > > +#define LUT_MAX_ENTRIES 32U > > +#define LUT_FREQ GENMASK(11, 0) > > +#define LUT_ROW_SIZE 0x4 > > +#define CPUFREQ_HW_STATUS BIT(0) > > +#define SVS_HW_STATUS BIT(1) > > +#define POLL_USEC 1000 > > +#define TIMEOUT_USEC 300000 > > + > > +enum { > > + REG_FREQ_LUT_TABLE, > > + REG_FREQ_ENABLE, > > + REG_FREQ_PERF_STATE, > > + REG_FREQ_HW_STATE, > > + REG_EM_POWER_TBL, > > + REG_FREQ_LATENCY, > > + > > + REG_ARRAY_SIZE, > > +}; > > + > > +struct cpufreq_mtk { > > + struct cpufreq_frequency_table *table; > > + void __iomem *reg_bases[REG_ARRAY_SIZE]; > > + int nr_opp; > > + cpumask_t related_cpus; > > +}; > > + > > +static const u16 cpufreq_mtk_offsets[REG_ARRAY_SIZE] = { > > + [REG_FREQ_LUT_TABLE] = 0x0, > > + [REG_FREQ_ENABLE] = 0x84, > > + [REG_FREQ_PERF_STATE] = 0x88, > > + [REG_FREQ_HW_STATE] = 0x8c, > > + [REG_EM_POWER_TBL] = 0x90, > > + [REG_FREQ_LATENCY] = 0x110, > > +}; > > + > > +static struct cpufreq_mtk *mtk_freq_domain_map[NR_CPUS]; > > + > > +static int __maybe_unused > > +mtk_cpufreq_get_cpu_power(unsigned long *mW, > > + unsigned long *KHz, struct device *cpu_dev) > > +{ > > + struct cpufreq_mtk *c; > > + struct cpufreq_policy *policy; > > + int i; > > + > > + policy = cpufreq_cpu_get_raw(cpu_dev->id); > > + if (!policy) > > + return 0; > > + > > + c = mtk_freq_domain_map[policy->cpu]; > > + > > + for (i = 0; i < c->nr_opp; i++) { > > + if (c->table[i].frequency < *KHz) > > + break; > > + } > > + i--; > > + > > + *KHz = c->table[i].frequency; > > + *mW = readl_relaxed(c->reg_bases[REG_EM_POWER_TBL] + > > + i * LUT_ROW_SIZE) / 1000; > > + > > + return 0; > > +} > > + > > +static int mtk_cpufreq_hw_target_index(struct cpufreq_policy *policy, > > + unsigned int index) > > +{ > > + struct cpufreq_mtk *c = policy->driver_data; > > + > > + writel_relaxed(index, c->reg_bases[REG_FREQ_PERF_STATE]); > > + > > + return 0; > > +} > > + > > +static unsigned int mtk_cpufreq_hw_get(unsigned int cpu) > > +{ > > + struct cpufreq_mtk *c; > > + struct cpufreq_policy *policy; > > + unsigned int index; > > + > > + policy = cpufreq_cpu_get_raw(cpu); > > + if (!policy) > > + return 0; > > + > > + c = mtk_freq_domain_map[policy->cpu]; > > + > > + index = readl_relaxed(c->reg_bases[REG_FREQ_PERF_STATE]); > > + index = min(index, LUT_MAX_ENTRIES - 1); > > + > > + return c->table[index].frequency; > > +} > > + > > +static unsigned int mtk_cpufreq_hw_fast_switch(struct cpufreq_policy *policy, > > + unsigned int target_freq) > > +{ > > + struct cpufreq_mtk *c = policy->driver_data; > > + unsigned int index; > > + > > + index = cpufreq_table_find_index_dl(policy, target_freq); > > + > > + writel_relaxed(index, c->reg_bases[REG_FREQ_PERF_STATE]); > > + > > + return policy->freq_table[index].frequency; > > +} > > + > > +static int mtk_cpu_create_freq_table(struct platform_device *pdev, > > + struct cpufreq_mtk *c) > > +{ > > + struct device *dev = &pdev->dev; > > + void __iomem *base_table; > > + u32 data, i, freq, prev_freq = 0; > > + > > + c->table = devm_kcalloc(dev, LUT_MAX_ENTRIES + 1, > > + sizeof(*c->table), GFP_KERNEL); > > + if (!c->table) > > + return -ENOMEM; > > + > > + base_table = c->reg_bases[REG_FREQ_LUT_TABLE]; > > + > > + for (i = 0; i < LUT_MAX_ENTRIES; i++) { > > + data = readl_relaxed(base_table + (i * LUT_ROW_SIZE)); > > + freq = FIELD_GET(LUT_FREQ, data) * 1000; > > + > > + if (freq == prev_freq) > > + break; > > + > > + c->table[i].frequency = freq; > > + > > + dev_dbg(dev, "index=%d freq=%d\n", > > + i, c->table[i].frequency); > > Won't this fit in a single line ? > OK, will modify to single line > > + > > + prev_freq = freq; > > + } > > + > > + c->table[i].frequency = CPUFREQ_TABLE_END; > > + c->nr_opp = i; > > + > > + return 0; > > +} > > + > > +static int mtk_cpu_resources_init(struct platform_device *pdev, > > + unsigned int cpu, int index, > > + const u16 *offsets) > > +{ > > + struct cpufreq_mtk *c; > > + struct device *dev = &pdev->dev; > > + int ret, i; > > + void __iomem *base; > > + > > + if (mtk_freq_domain_map[cpu]) > > This should not happen anymore, isn't it ? > Will remove cpu map. > > + return 0; > > + > > + c = devm_kzalloc(dev, sizeof(*c), GFP_KERNEL); > > + if (!c) > > + return -ENOMEM; > > + > > + base = devm_platform_ioremap_resource(pdev, index); > > + if (IS_ERR(base)) > > + return PTR_ERR(base); > > + > > + for (i = REG_FREQ_LUT_TABLE; i < REG_ARRAY_SIZE; i++) > > + c->reg_bases[i] = base + offsets[i]; > > + > > + ret = of_perf_domain_get_sharing_cpumask(index, "performance-domains", > > Instead of parsing parsing "performance-domains" twice, I would rather > pass a CPU number here instead of index. > Sorry, could you give me more details? For now, will use index to parse per-cpu to related cpus.You mean pass policy->cpu or? Thanks. > > + "#performance-domain-cells", > > + &c->related_cpus); > > You could have directly passed policy->cpus here instead. > will replace it. > > + if (ret) { > > + dev_info(dev, "Domain-%d failed to get related CPUs\n", index); > > + return ret; > > + } > > + > > + ret = mtk_cpu_create_freq_table(pdev, c); > > + if (ret) { > > + dev_info(dev, "Domain-%d failed to create freq table\n", index); > > + return ret; > > + } > > + > > + mtk_freq_domain_map[cpu] = c; > > I will rather use policy->driver_data to store this now. > OK > > + > > + return 0; > > +} > > + > > +static int mtk_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) > > +{ > > + struct platform_device *pdev = cpufreq_get_driver_data(); > > + struct cpufreq_mtk *c; > > + struct device *cpu_dev; > > + struct em_data_callback em_cb = EM_DATA_CB(mtk_cpufreq_get_cpu_power); > > + struct pm_qos_request *qos_request; > > + struct device_node *cpu_np; > > + struct of_phandle_args args; > > + const u16 *offsets; > > + unsigned int latency; > > + int sig, pwr_hw = CPUFREQ_HW_STATUS | SVS_HW_STATUS; > > + int ret; > > It looks much more organized when the variable definitions are in > decreasing order of their length, instead of the random order as it is > right now. > OK, will sort it. > > + > > + offsets = of_device_get_match_data(&pdev->dev); > > + if (!offsets) > > + return -EINVAL; > > + > > + cpu_np = of_cpu_device_node_get(policy->cpu); > > + if (!cpu_np) { > > + dev_info(&pdev->dev, "Failed to get cpu %d device\n", > > + policy->cpu); > > + return -ENODEV; > > + } > > + > > + ret = of_parse_phandle_with_args(cpu_np, "performance-domains", > > + "#performance-domain-cells", 0, > > + &args); > > + if (ret < 0) > > What about dropping cpu_np and same later in the code as well ? > OK, Will add it. > > + return ret; > > + > > + /* Get the bases of cpufreq for domains */ > > + ret = mtk_cpu_resources_init(pdev, policy->cpu, args.args[0], offsets); > > + if (ret) { > > + dev_info(&pdev->dev, "CPUFreq resource init failed\n"); > > + return ret; > > + } > > + > > + cpu_dev = get_cpu_device(policy->cpu); > > + if (!cpu_dev) { > > + pr_err("failed to get cpu%d device\n", policy->cpu); > > + return -ENODEV; > > + } > > + > > + c = mtk_freq_domain_map[policy->cpu]; > > + if (!c) { > > + pr_err("No scaling support for CPU%d\n", policy->cpu); > > + return -ENODEV; > > + } > > + > > + cpumask_copy(policy->cpus, &c->related_cpus); > > + > > + policy->freq_table = c->table; > > + policy->driver_data = c; > > Oh you already do this, you can remove mtk_freq_domain_map array now. > OK, will remove map. > > + > > + latency = readl_relaxed(c->reg_bases[REG_FREQ_LATENCY]); > > + if (!latency) > > + latency = CPUFREQ_ETERNAL; > > + > > + /* us convert to ns */ > > + policy->cpuinfo.transition_latency = latency * 1000; > > You want to multiple CPUFREQ_ETERNAL too ? > Yes, may be different power domain with different transition latency. > > + > > + policy->fast_switch_possible = true; > > + > > + qos_request = kzalloc(sizeof(*qos_request), GFP_KERNEL); > > This is a small structure, why not allocate it on stack instead ? > For qos part, we'd like to take more time to re-consider the SW flow and put this to another patch set.Is this okay to you? > > + if (!qos_request) > > + return -ENOMEM; > > + > > + /* Let CPUs leave idle-off state for SVS CPU initializing */ > > + cpu_latency_qos_add_request(qos_request, 0); > > + > > + /* HW should be in enabled state to proceed now */ > > + writel_relaxed(0x1, c->reg_bases[REG_FREQ_ENABLE]); > > + > > + if (readl_poll_timeout(c->reg_bases[REG_FREQ_HW_STATE], sig, > > + (sig & pwr_hw) == pwr_hw, POLL_USEC, > > + TIMEOUT_USEC)) { > > + if (!(sig & CPUFREQ_HW_STATUS)) { > > + pr_info("cpufreq hardware of CPU%d is not enabled\n", > > + policy->cpu); > > + return -ENODEV; > > + } > > + > > + pr_info("SVS of CPU%d is not enabled\n", policy->cpu); > > + } > > + > > + cpu_latency_qos_remove_request(qos_request); > > + > > + em_dev_register_perf_domain(cpu_dev, c->nr_opp, &em_cb, policy->cpus, true); > > + > > + kfree(qos_request); > > Why free after registering for em ? And also move the entire qos thing > into a separate routine instead of adding it to ->init(). > If you agree, we'll consider to put it in another patch set. > > + > > + return 0; > > +} > > + > > +static int mtk_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy) > > +{ > > + struct cpufreq_mtk *c; > > + > > + c = mtk_freq_domain_map[policy->cpu]; > > + > > + /* HW should be in paused state now */ > > + writel_relaxed(0x0, c->reg_bases[REG_FREQ_ENABLE]); > > Please make sure each and every resource is freed here and in probe on > failures. > OK, will free all resources as probe. > > + > > + return 0; > > +} > > + > > +static struct cpufreq_driver cpufreq_mtk_hw_driver = { > > + .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK | > > + CPUFREQ_HAVE_GOVERNOR_PER_POLICY | > > + CPUFREQ_IS_COOLING_DEV, > > + .verify = cpufreq_generic_frequency_table_verify, > > + .target_index = mtk_cpufreq_hw_target_index, > > + .get = mtk_cpufreq_hw_get, > > + .init = mtk_cpufreq_hw_cpu_init, > > + .exit = mtk_cpufreq_hw_cpu_exit, > > + .fast_switch = mtk_cpufreq_hw_fast_switch, > > + .name = "mtk-cpufreq-hw", > > + .attr = cpufreq_generic_attr, > > +}; > > + > > +static int mtk_cpufreq_hw_driver_probe(struct platform_device *pdev) > > +{ > > + int ret; > > + > > + cpufreq_mtk_hw_driver.driver_data = pdev; > > + > > + ret = cpufreq_register_driver(&cpufreq_mtk_hw_driver); > > + if (ret) { > > + dev_err(&pdev->dev, "CPUFreq HW driver failed to register\n"); > > + return ret; > > + } > > + > > + return 0; > > You can do return ret here and drop the earlier return and its {}. > okay. > > +} > > + > > +static int mtk_cpufreq_hw_driver_remove(struct platform_device *pdev) > > +{ > > + return cpufreq_unregister_driver(&cpufreq_mtk_hw_driver); > > +} > > + > > +static const struct of_device_id mtk_cpufreq_hw_match[] = { > > + { .compatible = "mediatek,cpufreq-hw", .data = &cpufreq_mtk_offsets }, > > + {} > > +}; > > + > > +static struct platform_driver mtk_cpufreq_hw_driver = { > > + .probe = mtk_cpufreq_hw_driver_probe, > > + .remove = mtk_cpufreq_hw_driver_remove, > > + .driver = { > > + .name = "mtk-cpufreq-hw", > > + .of_match_table = mtk_cpufreq_hw_match, > > + }, > > +}; > > +module_platform_driver(mtk_cpufreq_hw_driver); > > + > > +MODULE_DESCRIPTION("Mediatek cpufreq-hw driver"); > > +MODULE_LICENSE("GPL v2"); > > You can add Module-author as well here if you want. > OK. > > diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h > > index 9fd7194..4916d70 100644 > > --- a/include/linux/cpufreq.h > > +++ b/include/linux/cpufreq.h > > @@ -13,6 +13,8 @@ > > #include <linux/completion.h> > > #include <linux/kobject.h> > > #include <linux/notifier.h> > > +#include <linux/of.h> > > +#include <linux/of_device.h> > > #include <linux/pm_qos.h> > > #include <linux/spinlock.h> > > #include <linux/sysfs.h> > > @@ -1036,6 +1038,43 @@ void arch_set_freq_scale(const struct cpumask *cpus, > > } > > #endif > > > > +#ifdef CONFIG_CPU_FREQ > > There is another CONFIG_CPU_FREQ a few lines above, please use the > same block for this routine as well. > OK, will move it to the above. > > +static inline int of_perf_domain_get_sharing_cpumask(int index, const char *list_name, > > + const char *cell_name, > > + struct cpumask *cpumask) > > +{ > > + struct device_node *cpu_np; > > + struct of_phandle_args args; > > + int cpu, ret; > > + > > + for_each_possible_cpu(cpu) { > > + cpu_np = of_cpu_device_node_get(cpu); > > + if (!cpu_np) > > + continue; > > + > > + ret = of_parse_phandle_with_args(cpu_np, list_name, > > + cell_name, 0, > > + &args); > > + > > + of_node_put(cpu_np); > > + if (ret < 0) > > + continue; > > + > > + if (index == args.args[0]) > > + cpumask_set_cpu(cpu, cpumask); > > + } > > + > > + return 0; > > +} > > +#else > > +static inline int of_perf_domain_get_sharing_cpumask(int index, const char *list_name, > > + const char *cell_name, > > + struct cpumask *cpumask) > > +{ > > + return 0; > > return -EOPNOTSUPP; > > > +} > > +#endif >