+ devicetree mail list On 10/11/18 06:22, Akshay Adiga wrote: > This patch adds support for new device-tree format for idle state > description. > > Previously if a older kernel runs on a newer firmware, it may enable > all available states irrespective of its capability of handling it. > New device tree format adds a compatible flag, so that only kernel > which has the capability to handle the version of stop state will enable > it. > > Older kernel will still see stop0 and stop0_lite in older format and we > will depricate it after some time. > > 1) Idea is to bump up the version in firmware if we find a bug or > regression in stop states. A fix will be provided in linux which would > now know about the bumped up version of stop states, where as kernel > without fixes would ignore the states. > > 2) Slowly deprecate cpuidle /cpuhotplug threshold which is hard-coded > into cpuidle-powernv driver. Instead use compatible strings to indicate > if idle state is suitable for cpuidle and hotplug. > > New idle state device tree format : > power-mgt { > ... > ibm,enabled-stop-levels = <0xec000000>; > ibm,cpu-idle-state-psscr-mask = <0x0 0x3003ff 0x0 0x3003ff>; > ibm,cpu-idle-state-latencies-ns = <0x3e8 0x7d0>; > ibm,cpu-idle-state-psscr = <0x0 0x330 0x0 0x300330>; > ibm,cpu-idle-state-flags = <0x100000 0x101000>; > ibm,cpu-idle-state-residency-ns = <0x2710 0x4e20>; > ibm,idle-states { > stop4 { > flags = <0x207000>; > compatible = "ibm,state-v1", > "opal-supported"; > type = "cpuidle"; > psscr-mask = <0x0 0x3003ff>; > handle = <0x102>; > latency-ns = <0x186a0>; > residency-ns = <0x989680>; > psscr = <0x0 0x300374>; > }; > ... > stop11 { > ... > compatible = "ibm,state-v1", > "opal-supported"; > type = "cpuoffline"; > ... > }; > }; > type strings : > "cpuidle" : indicates it should be used by cpuidle-driver > "cpuoffline" : indicates it should be used by hotplug driver > > compatible strings : > "ibm,state-v1" : kernel checks if it knows about this version > "opal-supported" : indicates kernel can fall back to use opal > for stop-transitions > > Signed-off-by: Akshay Adiga <akshay.adiga@xxxxxxxxxxxxxxxxxx> > --- > > Changes from v1 : > - Code is rebased on Nick Piggin's v4 patch "powerpc/64s: reimplement book3s > idle code in C" > - Moved "cpuidle" and "cpuoffline" as seperate property called > "type" > > > arch/powerpc/include/asm/cpuidle.h | 9 ++ > arch/powerpc/platforms/powernv/idle.c | 132 +++++++++++++++++++++++++- > drivers/cpuidle/cpuidle-powernv.c | 31 ++++-- > 3 files changed, 160 insertions(+), 12 deletions(-) > > diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h > index 9844b3ded187..e920a15e797f 100644 > --- a/arch/powerpc/include/asm/cpuidle.h > +++ b/arch/powerpc/include/asm/cpuidle.h > @@ -70,14 +70,23 @@ > > #ifndef __ASSEMBLY__ > > +enum idle_state_type_t { > + CPUIDLE_TYPE, > + CPUOFFLINE_TYPE > +}; > + > +#define POWERNV_THRESHOLD_LATENCY_NS 200000 > +#define PNV_VER_NAME_LEN 32 > #define PNV_IDLE_NAME_LEN 16 > struct pnv_idle_states_t { > char name[PNV_IDLE_NAME_LEN]; > + char version[PNV_VER_NAME_LEN]; > u32 latency_ns; > u32 residency_ns; > u64 psscr_val; > u64 psscr_mask; > u32 flags; > + enum idle_state_type_t type; > bool valid; > }; > > diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c > index 96186af9e953..755918402591 100644 > --- a/arch/powerpc/platforms/powernv/idle.c > +++ b/arch/powerpc/platforms/powernv/idle.c > @@ -54,6 +54,20 @@ static bool default_stop_found; > static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1; > static u64 pnv_first_hv_loss_level = MAX_STOP_STATE + 1; > > + > +static int parse_dt_v1(struct device_node *np); > +struct stop_version_t { > + const char name[PNV_VER_NAME_LEN]; > + int (*parser_fn)(struct device_node *np); > +}; > +struct stop_version_t known_versions[] = { > + { > + .name = "ibm,state-v1", > + .parser_fn = parse_dt_v1, > + } > + }; > +const int nr_known_versions = 1; > + > /* > * psscr value and mask of the deepest stop idle state. > * Used when a cpu is offlined. > @@ -1195,6 +1209,77 @@ static void __init pnv_probe_idle_states(void) > supported_cpuidle_states |= pnv_idle_states[i].flags; > } > > +static int parse_dt_v1(struct device_node *dt_node) > +{ > + const char *temp_str; > + int rc; > + int i = nr_pnv_idle_states; > + > + if (!dt_node) { > + pr_err("Invalid device_node\n"); > + return -EINVAL; > + } > + > + rc = of_property_read_string(dt_node, "name", &temp_str); > + if (rc) { > + pr_err("error reading names rc= %d\n", rc); > + return -EINVAL; > + } > + strncpy(pnv_idle_states[i].name, temp_str, PNV_IDLE_NAME_LEN); > + rc = of_property_read_u32(dt_node, "residency-ns", > + &pnv_idle_states[i].residency_ns); > + if (rc) { > + pr_err("error reading residency rc= %d\n", rc); > + return -EINVAL; > + } > + rc = of_property_read_u32(dt_node, "latency-ns", > + &pnv_idle_states[i].latency_ns); > + if (rc) { > + pr_err("error reading latency rc= %d\n", rc); > + return -EINVAL; > + } > + rc = of_property_read_u32(dt_node, "flags", > + &pnv_idle_states[i].flags); > + if (rc) { > + pr_err("error reading flags rc= %d\n", rc); > + return -EINVAL; > + } > + > + /* We are not expecting power8 device-tree in this format */ > + rc = of_property_read_u64(dt_node, "psscr-mask", > + &pnv_idle_states[i].psscr_mask); > + if (rc) { > + pr_err("error reading psscr-mask rc= %d\n", rc); > + return -EINVAL; > + } > + rc = of_property_read_u64(dt_node, "psscr", > + &pnv_idle_states[i].psscr_val); > + if (rc) { > + pr_err("error reading psscr rc= %d\n", rc); > + return -EINVAL; > + } > + > + /* > + * TODO : save the version strings in data structure > + */ > + rc = of_property_read_string(dt_node, "type", &temp_str); > + pr_info("type = %s\n", temp_str); > + if (rc) { > + pr_err("error reading type rc= %d\n", rc); > + return -EINVAL; > + } > + if (strcmp(temp_str, "cpuidle") == 0) > + pnv_idle_states[i].type = CPUIDLE_TYPE; > + else if (strcmp(temp_str, "cpuoffline") == 0) > + pnv_idle_states[i].type = CPUOFFLINE_TYPE; > + else { > + pr_err("Invalid type skipping %s\n", > + pnv_idle_states[i].name); > + return -EINVAL; > + } > + return 0; > + > +} > /* > * This function parses device-tree and populates all the information > * into pnv_idle_states structure. It also sets up nr_pnv_idle_states > @@ -1203,8 +1288,9 @@ static void __init pnv_probe_idle_states(void) > > static int pnv_parse_cpuidle_dt(void) > { > - struct device_node *np; > + struct device_node *np, *np1, *dt_node; > int nr_idle_states, i; > + int additional_states = 0; > int rc = 0; > u32 *temp_u32; > u64 *temp_u64; > @@ -1218,8 +1304,14 @@ static int pnv_parse_cpuidle_dt(void) > nr_idle_states = of_property_count_u32_elems(np, > "ibm,cpu-idle-state-flags"); > > - pnv_idle_states = kcalloc(nr_idle_states, sizeof(*pnv_idle_states), > - GFP_KERNEL); > + np1 = of_find_node_by_path("/ibm,opal/power-mgt/ibm,idle-states"); > + if (np1) { > + for_each_child_of_node(np1, dt_node) > + additional_states++; > + } > + pr_info("states in new format : %d\n", additional_states); > + pnv_idle_states = kcalloc(nr_idle_states + additional_states, > + sizeof(*pnv_idle_states), GFP_KERNEL); > temp_u32 = kcalloc(nr_idle_states, sizeof(u32), GFP_KERNEL); > temp_u64 = kcalloc(nr_idle_states, sizeof(u64), GFP_KERNEL); > temp_string = kcalloc(nr_idle_states, sizeof(char *), GFP_KERNEL); > @@ -1298,8 +1390,40 @@ static int pnv_parse_cpuidle_dt(void) > for (i = 0; i < nr_idle_states; i++) > strlcpy(pnv_idle_states[i].name, temp_string[i], > PNV_IDLE_NAME_LEN); > + > + /* Mark states as CPUIDLE_TYPE /CPUOFFLINE for older version*/ > + for (i = 0; i < nr_idle_states; i++) { > + if (pnv_idle_states[i].latency_ns > POWERNV_THRESHOLD_LATENCY_NS) > + pnv_idle_states[i].type = CPUOFFLINE_TYPE; > + else > + pnv_idle_states[i].type = CPUIDLE_TYPE; > + } > nr_pnv_idle_states = nr_idle_states; > - rc = 0; > + /* Parsing node-based idle states device-tree format */ > + if (!np1) { > + pr_info("dt does not contain ibm,idle_states"); > + goto out; > + } > + /* Parse each child node with appropriate parser_fn */ > + for_each_child_of_node(np1, dt_node) { > + bool found_known_version = false; > + /* we don't have state falling back to opal*/ > + for (i = 0; i < nr_known_versions ; i++) { > + if (of_device_is_compatible(dt_node, known_versions[i].name)) { > + rc = known_versions[i].parser_fn(dt_node); > + if (rc) { > + pr_err("%s could not parse\n", known_versions[i].name); > + continue; > + } > + found_known_version = true; > + } > + } > + if (!found_known_version) { > + pr_info("Unsupported state, skipping all further state\n"); > + goto out; > + } > + nr_pnv_idle_states++; > + } > out: > kfree(temp_u32); > kfree(temp_u64); > diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c > index 84b1ebe212b3..a15514ebd1c3 100644 > --- a/drivers/cpuidle/cpuidle-powernv.c > +++ b/drivers/cpuidle/cpuidle-powernv.c > @@ -26,7 +26,6 @@ > * Expose only those Hardware idle states via the cpuidle framework > * that have latency value below POWERNV_THRESHOLD_LATENCY_NS. > */ > -#define POWERNV_THRESHOLD_LATENCY_NS 200000 > > static struct cpuidle_driver powernv_idle_driver = { > .name = "powernv_idle", > @@ -265,7 +264,7 @@ extern u32 pnv_get_supported_cpuidle_states(void); > static int powernv_add_idle_states(void) > { > int nr_idle_states = 1; /* Snooze */ > - int dt_idle_states; > + int dt_idle_states = 0; > u32 has_stop_states = 0; > int i; > u32 supported_flags = pnv_get_supported_cpuidle_states(); > @@ -277,14 +276,19 @@ static int powernv_add_idle_states(void) > goto out; > } > > - /* TODO: Count only states which are eligible for cpuidle */ > - dt_idle_states = nr_pnv_idle_states; > + /* Count only cpuidle states*/ > + for (i = 0; i < nr_pnv_idle_states; i++) { > + if (pnv_idle_states[i].type == CPUIDLE_TYPE) > + dt_idle_states++; > + } > + pr_info("idle states in dt = %d , states with idle flag = %d", > + nr_pnv_idle_states, dt_idle_states); > > /* > * Since snooze is used as first idle state, max idle states allowed is > * CPUIDLE_STATE_MAX -1 > */ > - if (nr_pnv_idle_states > CPUIDLE_STATE_MAX - 1) { > + if (dt_idle_states > CPUIDLE_STATE_MAX - 1) { > pr_warn("cpuidle-powernv: discovered idle states more than allowed"); > dt_idle_states = CPUIDLE_STATE_MAX - 1; > } > @@ -305,8 +309,15 @@ static int powernv_add_idle_states(void) > * Skip the platform idle state whose flag isn't in > * the supported_cpuidle_states flag mask. > */ > - if ((state->flags & supported_flags) != state->flags) > + if ((state->flags & supported_flags) != state->flags) { > + pr_warn("State %d does not have supported flag\n", i); > + continue; > + } > + if (state->type != CPUIDLE_TYPE) { > + pr_info("State %d is not idletype, it of %d type\n", i, > + state->type); > continue; > + } > /* > * If an idle state has exit latency beyond > * POWERNV_THRESHOLD_LATENCY_NS then don't use it > @@ -321,8 +332,10 @@ static int powernv_add_idle_states(void) > exit_latency = DIV_ROUND_UP(state->latency_ns, 1000); > target_residency = DIV_ROUND_UP(state->residency_ns, 1000); > > - if (has_stop_states && !(state->valid)) > + if (has_stop_states && !(state->valid)) { > + pr_warn("State %d is invalid\n", i); > continue; > + } > > if (state->flags & OPAL_PM_TIMEBASE_STOP) > stops_timebase = true; > @@ -360,8 +373,10 @@ static int powernv_add_idle_states(void) > state->psscr_mask); > } > #endif > - else > + else { > + pr_warn("cpuidle-powernv : could not add state\n"); > continue; > + } > nr_idle_states++; > } > out: >