The RISC-V SBI HSM extension provides HSM suspend call which can be used by Linux RISC-V to enter platform specific low-power state. This patch adds a CPU idle driver based on RISC-V SBI calls which will populate idle states from device tree and use SBI calls to entry these idle states. Signed-off-by: Anup Patel <anup.patel@xxxxxxx> --- MAINTAINERS | 7 + drivers/cpuidle/Kconfig | 5 + drivers/cpuidle/Kconfig.riscv | 15 + drivers/cpuidle/Makefile | 4 + drivers/cpuidle/cpuidle-sbi.c | 626 ++++++++++++++++++++++++++++++++++ 5 files changed, 657 insertions(+) create mode 100644 drivers/cpuidle/Kconfig.riscv create mode 100644 drivers/cpuidle/cpuidle-sbi.c diff --git a/MAINTAINERS b/MAINTAINERS index 5108b5058502..a16b14c687b5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4796,6 +4796,13 @@ S: Supported F: drivers/cpuidle/dt_idle_genpd.c F: drivers/cpuidle/dt_idle_genpd.h +CPUIDLE DRIVER - RISC-V SBI +M: Anup Patel <anup.patel@xxxxxxx> +L: linux-pm@xxxxxxxxxxxxxxx +L: linux-riscv@xxxxxxxxxxxxxxxxxxx +S: Supported +F: drivers/cpuidle/cpuidle-sbi.c + CRAMFS FILESYSTEM M: Nicolas Pitre <nico@xxxxxxxxxxx> S: Maintained diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig index f1afe7ab6b54..ff71dd662880 100644 --- a/drivers/cpuidle/Kconfig +++ b/drivers/cpuidle/Kconfig @@ -66,6 +66,11 @@ depends on PPC source "drivers/cpuidle/Kconfig.powerpc" endmenu +menu "RISC-V CPU Idle Drivers" +depends on RISCV +source "drivers/cpuidle/Kconfig.riscv" +endmenu + config HALTPOLL_CPUIDLE tristate "Halt poll cpuidle driver" depends on X86 && KVM_GUEST diff --git a/drivers/cpuidle/Kconfig.riscv b/drivers/cpuidle/Kconfig.riscv new file mode 100644 index 000000000000..78518c26af74 --- /dev/null +++ b/drivers/cpuidle/Kconfig.riscv @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# RISC-V CPU Idle drivers +# + +config RISCV_SBI_CPUIDLE + bool "RISC-V SBI CPU idle Driver" + depends on RISCV_SBI + select DT_IDLE_STATES + select CPU_IDLE_MULTIPLE_DRIVERS + select DT_IDLE_GENPD if PM_GENERIC_DOMAINS_OF + help + Select this option to enable RISC-V SBI firmware based CPU idle + driver for RISC-V systems. This drivers also supports hierarchical + DT based layout of the idle state. diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile index 11a26cef279f..a36922c18510 100644 --- a/drivers/cpuidle/Makefile +++ b/drivers/cpuidle/Makefile @@ -35,3 +35,7 @@ obj-$(CONFIG_MIPS_CPS_CPUIDLE) += cpuidle-cps.o # POWERPC drivers obj-$(CONFIG_PSERIES_CPUIDLE) += cpuidle-pseries.o obj-$(CONFIG_POWERNV_CPUIDLE) += cpuidle-powernv.o + +############################################################################### +# RISC-V drivers +obj-$(CONFIG_RISCV_SBI_CPUIDLE) += cpuidle-sbi.o diff --git a/drivers/cpuidle/cpuidle-sbi.c b/drivers/cpuidle/cpuidle-sbi.c new file mode 100644 index 000000000000..286172b0368d --- /dev/null +++ b/drivers/cpuidle/cpuidle-sbi.c @@ -0,0 +1,626 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * RISC-V SBI CPU idle driver. + * + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + */ + +#define pr_fmt(fmt) "cpuidle-sbi: " fmt + +#include <linux/cpuidle.h> +#include <linux/cpumask.h> +#include <linux/cpu_pm.h> +#include <linux/cpu_cooling.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/slab.h> +#include <linux/platform_device.h> +#include <linux/pm_domain.h> +#include <linux/pm_runtime.h> +#include <asm/cpuidle.h> +#include <asm/sbi.h> +#include <asm/suspend.h> + +#include "dt_idle_states.h" +#include "dt_idle_genpd.h" + +struct sbi_cpuidle_data { + u32 *states; + struct device *dev; +}; + +struct sbi_domain_state { + bool available; + u32 state; +}; + +static DEFINE_PER_CPU_READ_MOSTLY(struct sbi_cpuidle_data, sbi_cpuidle_data); +static DEFINE_PER_CPU(struct sbi_domain_state, domain_state); +static bool sbi_cpuidle_use_osi; +static bool sbi_cpuidle_use_cpuhp; +static bool sbi_cpuidle_pd_allow_domain_state; + +static inline void sbi_set_domain_state(u32 state) +{ + struct sbi_domain_state *data = this_cpu_ptr(&domain_state); + + data->available = true; + data->state = state; +} + +static inline u32 sbi_get_domain_state(void) +{ + struct sbi_domain_state *data = this_cpu_ptr(&domain_state); + + return data->state; +} + +static inline void sbi_clear_domain_state(void) +{ + struct sbi_domain_state *data = this_cpu_ptr(&domain_state); + + data->available = false; +} + +static inline bool sbi_is_domain_state_available(void) +{ + struct sbi_domain_state *data = this_cpu_ptr(&domain_state); + + return data->available; +} + +static int sbi_suspend_finisher(unsigned long suspend_type, + unsigned long resume_addr, + unsigned long opaque) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_HSM, SBI_EXT_HSM_HART_SUSPEND, + suspend_type, resume_addr, opaque, 0, 0, 0); + + return (ret.error) ? sbi_err_map_linux_errno(ret.error) : 0; +} + +static int sbi_suspend(u32 state) +{ + if (state & SBI_HSM_SUSP_NON_RET_BIT) + return cpu_suspend(state, sbi_suspend_finisher); + else + return sbi_suspend_finisher(state, 0, 0); +} + +static int sbi_cpuidle_enter_state(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int idx) +{ + u32 *states = __this_cpu_read(sbi_cpuidle_data.states); + + return CPU_PM_CPU_IDLE_ENTER_PARAM(sbi_suspend, idx, states[idx]); +} + +static int __sbi_enter_domain_idle_state(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int idx, + bool s2idle) +{ + struct sbi_cpuidle_data *data = this_cpu_ptr(&sbi_cpuidle_data); + u32 *states = data->states; + struct device *pd_dev = data->dev; + u32 state; + int ret; + + ret = cpu_pm_enter(); + if (ret) + return -1; + + /* Do runtime PM to manage a hierarchical CPU toplogy. */ + rcu_irq_enter_irqson(); + if (s2idle) + dev_pm_genpd_suspend(pd_dev); + else + pm_runtime_put_sync_suspend(pd_dev); + rcu_irq_exit_irqson(); + + if (sbi_is_domain_state_available()) + state = sbi_get_domain_state(); + else + state = states[idx]; + + ret = sbi_suspend(state) ? -1 : idx; + + rcu_irq_enter_irqson(); + if (s2idle) + dev_pm_genpd_resume(pd_dev); + else + pm_runtime_get_sync(pd_dev); + rcu_irq_exit_irqson(); + + cpu_pm_exit(); + + /* Clear the domain state to start fresh when back from idle. */ + sbi_clear_domain_state(); + return ret; +} + +static int sbi_enter_domain_idle_state(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int idx) +{ + return __sbi_enter_domain_idle_state(dev, drv, idx, false); +} + +static int sbi_enter_s2idle_domain_idle_state(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int idx) +{ + return __sbi_enter_domain_idle_state(dev, drv, idx, true); +} + +static int sbi_cpuidle_cpuhp_up(unsigned int cpu) +{ + struct device *pd_dev = __this_cpu_read(sbi_cpuidle_data.dev); + + if (pd_dev) + pm_runtime_get_sync(pd_dev); + + return 0; +} + +static int sbi_cpuidle_cpuhp_down(unsigned int cpu) +{ + struct device *pd_dev = __this_cpu_read(sbi_cpuidle_data.dev); + + if (pd_dev) { + pm_runtime_put_sync(pd_dev); + /* Clear domain state to start fresh at next online. */ + sbi_clear_domain_state(); + } + + return 0; +} + +static void sbi_idle_init_cpuhp(void) +{ + int err; + + if (!sbi_cpuidle_use_cpuhp) + return; + + err = cpuhp_setup_state_nocalls(CPUHP_AP_CPU_PM_STARTING, + "cpuidle/sbi:online", + sbi_cpuidle_cpuhp_up, + sbi_cpuidle_cpuhp_down); + if (err) + pr_warn("Failed %d while setup cpuhp state\n", err); +} + +static const struct of_device_id sbi_cpuidle_state_match[] = { + { .compatible = "riscv,idle-state", + .data = sbi_cpuidle_enter_state }, + { }, +}; + +static bool sbi_suspend_state_is_valid(u32 state) +{ + if (state > SBI_HSM_SUSPEND_RET_DEFAULT && + state < SBI_HSM_SUSPEND_RET_PLATFORM) + return false; + if (state > SBI_HSM_SUSPEND_NON_RET_DEFAULT && + state < SBI_HSM_SUSPEND_NON_RET_PLATFORM) + return false; + return true; +} + +static int sbi_dt_parse_state_node(struct device_node *np, u32 *state) +{ + int err = of_property_read_u32(np, "riscv,sbi-suspend-param", state); + + if (err) { + pr_warn("%pOF missing riscv,sbi-suspend-param property\n", np); + return err; + } + + if (!sbi_suspend_state_is_valid(*state)) { + pr_warn("Invalid SBI suspend state %#x\n", *state); + return -EINVAL; + } + + return 0; +} + +static int sbi_dt_cpu_init_topology(struct cpuidle_driver *drv, + struct sbi_cpuidle_data *data, + unsigned int state_count, int cpu) +{ + /* Currently limit the hierarchical topology to be used in OSI mode. */ + if (!sbi_cpuidle_use_osi) + return 0; + + data->dev = dt_idle_attach_cpu(cpu, "sbi"); + if (IS_ERR_OR_NULL(data->dev)) + return PTR_ERR_OR_ZERO(data->dev); + + /* + * Using the deepest state for the CPU to trigger a potential selection + * of a shared state for the domain, assumes the domain states are all + * deeper states. + */ + drv->states[state_count - 1].enter = sbi_enter_domain_idle_state; + drv->states[state_count - 1].enter_s2idle = + sbi_enter_s2idle_domain_idle_state; + sbi_cpuidle_use_cpuhp = true; + + return 0; +} + +static int sbi_cpuidle_dt_init_states(struct device *dev, + struct cpuidle_driver *drv, + unsigned int cpu, + unsigned int state_count) +{ + struct sbi_cpuidle_data *data = per_cpu_ptr(&sbi_cpuidle_data, cpu); + struct device_node *state_node; + struct device_node *cpu_node; + u32 *states; + int i, ret; + + cpu_node = of_cpu_device_node_get(cpu); + if (!cpu_node) + return -ENODEV; + + states = devm_kcalloc(dev, state_count, sizeof(*states), GFP_KERNEL); + if (!states) { + ret = -ENOMEM; + goto fail; + } + + /* Parse SBI specific details from state DT nodes */ + for (i = 1; i < state_count; i++) { + state_node = of_get_cpu_state_node(cpu_node, i - 1); + if (!state_node) + break; + + ret = sbi_dt_parse_state_node(state_node, &states[i]); + of_node_put(state_node); + + if (ret) + return ret; + + pr_debug("sbi-state %#x index %d\n", states[i], i); + } + if (i != state_count) { + ret = -ENODEV; + goto fail; + } + + /* Initialize optional data, used for the hierarchical topology. */ + ret = sbi_dt_cpu_init_topology(drv, data, state_count, cpu); + if (ret < 0) + return ret; + + /* Store states in the per-cpu struct. */ + data->states = states; + +fail: + of_node_put(cpu_node); + + return ret; +} + +static void sbi_cpuidle_deinit_cpu(int cpu) +{ + struct sbi_cpuidle_data *data = per_cpu_ptr(&sbi_cpuidle_data, cpu); + + dt_idle_detach_cpu(data->dev); + sbi_cpuidle_use_cpuhp = false; +} + +static int sbi_cpuidle_init_cpu(struct device *dev, int cpu) +{ + struct cpuidle_driver *drv; + unsigned int state_count = 0; + int ret = 0; + + drv = devm_kzalloc(dev, sizeof(*drv), GFP_KERNEL); + if (!drv) + return -ENOMEM; + + drv->name = "sbi_cpuidle"; + drv->owner = THIS_MODULE; + drv->cpumask = (struct cpumask *)cpumask_of(cpu); + + /* RISC-V architectural WFI to be represented as state index 0. */ + drv->states[0].enter = sbi_cpuidle_enter_state; + drv->states[0].exit_latency = 1; + drv->states[0].target_residency = 1; + drv->states[0].power_usage = UINT_MAX; + strcpy(drv->states[0].name, "WFI"); + strcpy(drv->states[0].desc, "RISC-V WFI"); + + /* + * If no DT idle states are detected (ret == 0) let the driver + * initialization fail accordingly since there is no reason to + * initialize the idle driver if only wfi is supported, the + * default archictectural back-end already executes wfi + * on idle entry. + */ + ret = dt_init_idle_driver(drv, sbi_cpuidle_state_match, 1); + if (ret <= 0) { + pr_debug("HART%ld: failed to parse DT idle states\n", + cpuid_to_hartid_map(cpu)); + return ret ? : -ENODEV; + } + state_count = ret + 1; /* Include WFI state as well */ + + /* Initialize idle states from DT. */ + ret = sbi_cpuidle_dt_init_states(dev, drv, cpu, state_count); + if (ret) { + pr_err("HART%ld: failed to init idle states\n", + cpuid_to_hartid_map(cpu)); + return ret; + } + + ret = cpuidle_register(drv, NULL); + if (ret) + goto deinit; + + cpuidle_cooling_register(drv); + + return 0; +deinit: + sbi_cpuidle_deinit_cpu(cpu); + return ret; +} + +static void sbi_cpuidle_domain_sync_state(struct device *dev) +{ + /* + * All devices have now been attached/probed to the PM domain + * topology, hence it's fine to allow domain states to be picked. + */ + sbi_cpuidle_pd_allow_domain_state = true; +} + +#ifdef CONFIG_DT_IDLE_GENPD + +static int sbi_cpuidle_pd_power_off(struct generic_pm_domain *pd) +{ + struct genpd_power_state *state = &pd->states[pd->state_idx]; + u32 *pd_state; + + if (!state->data) + return 0; + + if (!sbi_cpuidle_pd_allow_domain_state) + return -EBUSY; + + /* OSI mode is enabled, set the corresponding domain state. */ + pd_state = state->data; + sbi_set_domain_state(*pd_state); + + return 0; +} + +struct sbi_pd_provider { + struct list_head link; + struct device_node *node; +}; + +static LIST_HEAD(sbi_pd_providers); + +static int sbi_pd_init(struct device_node *np) +{ + struct generic_pm_domain *pd; + struct sbi_pd_provider *pd_provider; + struct dev_power_governor *pd_gov; + int ret = -ENOMEM, state_count = 0; + + pd = dt_idle_pd_alloc(np, sbi_dt_parse_state_node); + if (!pd) + goto out; + + pd_provider = kzalloc(sizeof(*pd_provider), GFP_KERNEL); + if (!pd_provider) + goto free_pd; + + pd->flags |= GENPD_FLAG_IRQ_SAFE | GENPD_FLAG_CPU_DOMAIN; + + /* Allow power off when OSI is available. */ + if (sbi_cpuidle_use_osi) + pd->power_off = sbi_cpuidle_pd_power_off; + else + pd->flags |= GENPD_FLAG_ALWAYS_ON; + + /* Use governor for CPU PM domains if it has some states to manage. */ + pd_gov = state_count > 0 ? &pm_domain_cpu_gov : NULL; + + ret = pm_genpd_init(pd, pd_gov, false); + if (ret) + goto free_pd_prov; + + ret = of_genpd_add_provider_simple(np, pd); + if (ret) + goto remove_pd; + + pd_provider->node = of_node_get(np); + list_add(&pd_provider->link, &sbi_pd_providers); + + pr_debug("init PM domain %s\n", pd->name); + return 0; + +remove_pd: + pm_genpd_remove(pd); +free_pd_prov: + kfree(pd_provider); +free_pd: + dt_idle_pd_free(pd); +out: + pr_err("failed to init PM domain ret=%d %pOF\n", ret, np); + return ret; +} + +static void sbi_pd_remove(void) +{ + struct sbi_pd_provider *pd_provider, *it; + struct generic_pm_domain *genpd; + + list_for_each_entry_safe(pd_provider, it, &sbi_pd_providers, link) { + of_genpd_del_provider(pd_provider->node); + + genpd = of_genpd_remove_last(pd_provider->node); + if (!IS_ERR(genpd)) + kfree(genpd); + + of_node_put(pd_provider->node); + list_del(&pd_provider->link); + kfree(pd_provider); + } +} + +static int sbi_genpd_probe(struct device_node *np) +{ + struct device_node *node; + int ret = 0, pd_count = 0; + + if (!np) + return -ENODEV; + + /* + * Parse child nodes for the "#power-domain-cells" property and + * initialize a genpd/genpd-of-provider pair when it's found. + */ + for_each_child_of_node(np, node) { + if (!of_find_property(node, "#power-domain-cells", NULL)) + continue; + + ret = sbi_pd_init(node); + if (ret) + goto put_node; + + pd_count++; + } + + /* Bail out if not using the hierarchical CPU topology. */ + if (!pd_count) + goto no_pd; + + /* Link genpd masters/subdomains to model the CPU topology. */ + ret = dt_idle_pd_init_topology(np); + if (ret) + goto remove_pd; + + return 0; + +put_node: + of_node_put(node); +remove_pd: + sbi_pd_remove(); + pr_err("failed to create CPU PM domains ret=%d\n", ret); +no_pd: + return ret; +} + +#else + +static inline int sbi_genpd_probe(struct device_node *np) +{ + return 0; +} + +#endif + +static int sbi_cpuidle_probe(struct platform_device *pdev) +{ + int cpu, ret; + struct cpuidle_driver *drv; + struct cpuidle_device *dev; + struct device_node *np, *pds_node; + + /* Detect OSI support based on CPU DT nodes */ + sbi_cpuidle_use_osi = true; + for_each_possible_cpu(cpu) { + np = of_cpu_device_node_get(cpu); + if (np && + of_find_property(np, "power-domains", NULL) && + of_find_property(np, "power-domain-names", NULL)) { + continue; + } else { + sbi_cpuidle_use_osi = false; + break; + } + } + + /* Populate generic power domains from DT nodes */ + pds_node = of_find_node_by_path("/cpus/power-domains"); + if (pds_node) { + ret = sbi_genpd_probe(pds_node); + of_node_put(pds_node); + if (ret) + return ret; + } + + /* Initialize CPU idle driver for each CPU */ + for_each_possible_cpu(cpu) { + ret = sbi_cpuidle_init_cpu(&pdev->dev, cpu); + if (ret) { + pr_debug("HART%ld: idle driver init failed\n", + cpuid_to_hartid_map(cpu)); + goto out_fail; + } + } + + /* Setup CPU hotplut notifiers */ + sbi_idle_init_cpuhp(); + + pr_info("idle driver registered for all CPUs\n"); + + return 0; + +out_fail: + while (--cpu >= 0) { + dev = per_cpu(cpuidle_devices, cpu); + drv = cpuidle_get_cpu_driver(dev); + cpuidle_unregister(drv); + sbi_cpuidle_deinit_cpu(cpu); + } + + return ret; +} + +static struct platform_driver sbi_cpuidle_driver = { + .probe = sbi_cpuidle_probe, + .driver = { + .name = "sbi-cpuidle", + .sync_state = sbi_cpuidle_domain_sync_state, + }, +}; + +static int __init sbi_cpuidle_init(void) +{ + int ret; + struct platform_device *pdev; + + /* + * The SBI HSM suspend function is only available when: + * 1) SBI version is 0.3 or higher + * 2) SBI HSM extension is available + */ + if ((sbi_spec_version < sbi_mk_version(0, 3)) || + sbi_probe_extension(SBI_EXT_HSM) <= 0) { + pr_info("HSM suspend not available\n"); + return 0; + } + + ret = platform_driver_register(&sbi_cpuidle_driver); + if (ret) + return ret; + + pdev = platform_device_register_simple("sbi-cpuidle", + -1, NULL, 0); + if (IS_ERR(pdev)) { + platform_driver_unregister(&sbi_cpuidle_driver); + return PTR_ERR(pdev); + } + + return 0; +} +device_initcall(sbi_cpuidle_init); -- 2.25.1