Re: [PATCH RFC 1/4] arm64: kernel: implement DT based idle states infrastructure

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





Hi Lorenzo,



On 03/18/2014 11:20 AM, Lorenzo Pieralisi wrote:
On most common ARM systems, the low-power states a CPU can be put into are
not discoverable in HW and require device tree bindings to describe
the respective power domains, power down protocol and idle states parameters.

In order to enable DT based idle states and configure idle drivers, this
patch implements the bulk infrastructure required to parse the device tree
idle states bindings and functions to initizialize idle driver and protocol
back-ends.

Protocol back-ends (eg PSCI) must register a protocol initializer with
the idle state parser so that upon protocol detection, the parsing code
can call the back-end infrastructure to complete the idle driver
initialization.

Idle state index 0 is always initialized, ie always considered present
on all ARM platforms.

Code that initializes idle states checks the CPU idle driver cpumask so
that multiple CPU idle drivers can be initialized through it in the
kernel. The CPU idle driver cpumask defines which idle states should be
considered valid for the driver, ie idle states that are valid on a set
of cpus the idle driver manages.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@xxxxxxx>
---
  arch/arm64/Kconfig                   |   4 +
  arch/arm64/include/asm/idle_states.h |  20 ++
  arch/arm64/kernel/Makefile           |   1 +
  arch/arm64/kernel/idle_states.c      | 397 +++++++++++++++++++++++++++++++++++
  4 files changed, 422 insertions(+)
  create mode 100644 arch/arm64/include/asm/idle_states.h
  create mode 100644 arch/arm64/kernel/idle_states.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 27bbcfc..3132572 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -295,6 +295,10 @@ config ARCH_SUSPEND_POSSIBLE
  config ARM64_CPU_SUSPEND
  	def_bool PM_SLEEP

+config ARM64_IDLE_STATES
+	def_bool CPU_IDLE
+	select ARM64_CPU_SUSPEND
+
  endmenu

  menu "CPU Power Management"
diff --git a/arch/arm64/include/asm/idle_states.h b/arch/arm64/include/asm/idle_states.h
new file mode 100644
index 0000000..0b9f9ba
--- /dev/null
+++ b/arch/arm64/include/asm/idle_states.h
@@ -0,0 +1,20 @@
+#ifndef __ARM64_IDLE_STATES
+#define __ARM64_IDLE_STATES
+
+struct idle_state {
+	u32	index;
+	u32	entry_latency;
+	u32	exit_latency;
+	u32	min_residency;
+	u32	param;

Could you add a comment for this 'param' field or change the name to eg 'psci_param' ?

+	struct device_node *node;
+	struct idle_state *state;

Why is it needed ?

+	cpumask_t	cpus;

It sounds strange to have to declare this cpumask here.

+	bool	logic_state_retained;
+	bool	cache_state_retained;
+};

IMHO, there is useless duplication of structure definition / declaration.

I suggest to stick as much as possible to the cpuidle structures, that is cpuidle_state and cpuidle_driver.

Instead of using the intermediate, idle_states[CPUIDLE_STATE_MAX], use the drv->states directly and drv->cpumask. That will prevent extra copy and definition/declaration for nothing. And fill the structure directly from parse_idle_states_node / parse_idle_states.

All this comes from the need of the state ordering, right ?

+struct cpuidle_driver;
+
+int __init arm_init_idle_driver(struct cpuidle_driver *drv);
+#endif
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 2d4554b..2afc9a0 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -19,6 +19,7 @@ arm64-obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o
  arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o
  arm64-obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
  arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND)	+= sleep.o suspend.o
+arm64-obj-$(CONFIG_ARM64_IDLE_STATES)	+= idle_states.o
  arm64-obj-$(CONFIG_JUMP_LABEL)		+= jump_label.o

  obj-y					+= $(arm64-obj-y) vdso/
diff --git a/arch/arm64/kernel/idle_states.c b/arch/arm64/kernel/idle_states.c
new file mode 100644
index 0000000..0386cff
--- /dev/null
+++ b/arch/arm64/kernel/idle_states.c
@@ -0,0 +1,397 @@
+/*
+ * ARM device tree idle states parsing code.
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ * Author: Lorenzo Pieralisi <lorenzo.pieralisi@xxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/cpuidle.h>
+#include <linux/cpumask.h>
+#include <linux/cpu_pm.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+
+#include <asm/idle_states.h>
+#include <asm/psci.h>
+#include <asm/suspend.h>
+
+static struct idle_state idle_states[CPUIDLE_STATE_MAX] __initdata;
+typedef int (*idle_states_initializer)(struct cpumask *, struct idle_state *,
+				       unsigned int);
+
+struct protocol_init {
+	const char *prot;
+	idle_states_initializer prot_init;
+};
+
+static const struct protocol_init protocols[] __initconst = {
+	{}
+};
+
+static __init const struct protocol_init *get_protocol(const char *str)
+{
+	int i;
+
+	if (!str)
+		return NULL;
+
+	for (i = 0; protocols[i].prot; i++)
+		if (!strcmp(protocols[i].prot, str))
+			return &protocols[i];
+
+	return NULL;
+}
+
+static void __init idle_state_cpu_mask(int cpu, struct idle_state *idle_state,
+				       struct device_node *cn)
+{
+	int i = 0;
+	struct device_node *cpu_state;
+
+	do {
+		cpu_state = of_parse_phandle(cn, "cpu-idle-states", i++);
+		if (cpu_state && idle_state->node == cpu_state)
+			cpumask_set_cpu(cpu, &idle_state->cpus);
+		of_node_put(cpu_state);
+	} while (cpu_state);
+}
+
+static int __init parse_idle_states_node(struct device_node *parent, int cnt,
+					 const cpumask_t *cpus)
+{
+	struct device_node *state;
+	struct idle_state *idle_state;
+	int cpu;
+
+	for_each_child_of_node(parent, state) {
+
+		if (!of_device_is_compatible(state, "arm,idle-state")) {
+			pr_warn(" %s has child nodes that are not idle states\n",
+				parent->full_name);
+			continue;
+		}
+
+		idle_state = &idle_states[cnt];
+
+		pr_debug(" * %s...\n", state->full_name);
+
+		idle_state->node = state;

It is a bit confusing to use 'state' variable name here, is it possible to change to something like 'node' or whatever ?

+		/*
+		 * Check cpus on which the idle state is valid
+		 */
+		for_each_possible_cpu(cpu) {
+			struct device_node *cn;
+
+			cn = of_get_cpu_node(cpu, NULL);
+			if (!cn) {
+				pr_err("missing device node for CPU %d\n", cpu);
+				continue;
+			}
+			idle_state_cpu_mask(cpu, idle_state, cn);
+		}
+
+		/*
+		 * The driver cpumask is not a subset of cpus on which the
+		 * idle state is valid, hence the idle state is skipped for
+		 * this driver.
+		 */
+		if (!cpumask_subset(cpus, &idle_state->cpus))
+			continue;
+
+		if (of_property_read_u32(state, "index", &idle_state->index)) {
+			pr_debug(" * %s missing index property\n",
+				     state->full_name);
+			continue;
+		}
+
+		if (of_property_read_u32(state, "entry-latency-us",
+					 &idle_state->entry_latency)) {
+			pr_debug(" * %s missing entry latency property\n",
+				     state->full_name);
+			continue;
+		}
+
+		if (of_property_read_u32(state, "exit-latency-us",
+					 &idle_state->exit_latency)) {
+			pr_debug(" * %s missing exit latency property\n",
+				     state->full_name);
+			continue;
+		}
+
+		if (of_property_read_u32(state, "min-residency-us",
+					 &idle_state->min_residency)) {
+			pr_debug(" * %s missing min-residency property\n",
+				     state->full_name);
+			continue;
+		}
+
+		if (of_property_read_u32(state, "entry-method-param",
+					 &idle_state->param)) {
+			pr_debug(" * %s missing entry-method-param property\n",
+				     state->full_name);
+			continue;
+		}
+
+		if (++cnt == CPUIDLE_STATE_MAX) {
+			pr_warn("Number of parsed states equal static CPU idle state limit\n");
+			of_node_put(state);
+			break;
+		}
+	}
+
+	return cnt;
+}
+
+static int __init parse_idle_states(struct device_node *root, int cnt,
+				    const cpumask_t *cpus)
+{
+	int head_idx, curr_idx;
+	struct device_node *curr = root;
+
+	/*
+	 * Breadth-first DT idle states parsing
+	 *
+	 * Sweep idle states level in the device tree and use the
+	 * idle_states array to stash the visited nodes, as a queue.
+	 *
+	 * parse_idle_states_node() updates the idle_states array by
+	 * initializing entries, stashing the device tree node for the
+	 * corresponding state (struct idle_state.node) and incrementing
+	 * the idle states counter that is returned so that curr_idx is
+	 * kept up-to-date while descending into tree levels.
+	 *
+	 * Store the initial counter head_idx and curr_idx and use head_idx
+	 * as a queue of node indices to be visited.
+	 *
+	 * When we reach the max number of CPU idle states or
+	 * head_idx == curr_idx (empty nodes queue) we are done.
+	 */
+	head_idx = curr_idx = cnt;
+
+	do {
+		curr_idx = parse_idle_states_node(curr, curr_idx, cpus);
+		if (curr_idx == CPUIDLE_STATE_MAX || head_idx == curr_idx)
+			break;
+		/*
+		 * idle_states array is updated by parse_idle_states_node(),
+		 * we can use the initialized states as a queue of nodes
+		 * that need to be checked for their idle states siblings.
+		 * head_idx works as a pointer into the queue to get the
+		 * next node to be parsed.
+		 */
+		curr = idle_states[head_idx++].node;
+	} while (curr);
+
+	return curr_idx;
+}
+
+/*
+ * Sort states according to their index value, higher indexes
+ * imply higher power savings, as expected by the CPU idle subsystem.
+ */
+static void __init sort_states(int count)
+{
+	int i, j;
+	struct idle_state *idle_state;
+
+	/*
+	 * move pointers instead of entries
+	 */
+	for (i = 0; i < count; i++)
+		idle_states[i].state = &idle_states[i];
+
+	/*
+	 * Selection sort is acceptable here, since the
+	 * number of states is small (<=CPUIDLE_STATE_MAX)
+	 *
+	 * Index starts from 1, we know index 0 is standby wfi and
+	 * it is always the idle state with higher power consumption
+	 */
+	for (i = 1; i < count - 1; i++) {
+		u32 elem = i;
+		for (j = i+1; j < count; j++) {
+			if (idle_states[j].state->index <
+					idle_states[elem].state->index)
+				elem = j;
+		}
+		if (i != elem) {
+			idle_state = idle_states[i].state;
+			idle_states[i].state = idle_states[elem].state;
+			idle_states[elem].state = idle_state;
+		}
+	}
+}
+
+/*
+ * arm_dt_init_idle_states - Parse DT idle states and initialize the protocol
+ *			     back-end
+ *
+ * @prot: pointer to the protocol initializer. Initialized only if return code
+ *        is >0
+ * @cpus: CPU idle driver cpumask
+ *
+ * Returns:
+ *	Number of idle states detected upon success
+ *	<0 on failure
+ */
+static int __init arm_dt_init_idle_states(const struct protocol_init **prot,
+					  const cpumask_t *cpus)
+{
+	struct device_node *cpups;
+	const char *entry_method;
+	/* start from 1, stanbywfi is always there */
+	int cnt = 1, ret = 0;
+
+	if (!prot)
+		return -EINVAL;
+
+	cpups = of_find_node_by_path("/cpus/idle-states");
+
+	if (!cpups)
+		return -ENODEV;
+
+	cnt = parse_idle_states(cpups, cnt, cpus);
+
+	if (cnt == 1) {
+		ret = -ENODATA;
+		goto put_node;
+	}

If 'cnt' is always 1 when calling parse_idle_states, it would be more logical to do:

cnt = parse_idle_states(cpups, cpus);
if (cnt < 0) {
	ret = -ENODATA;
	goto put_node;
}

and change parse_idle_states consequently.

+
+	/*
+	 * idle driver expects states to sorted in terms of power
+	 * consumption
+	 */
+	sort_states(cnt);

Isn't possible to sort the states with the index when parsing the DT instead ?

+	if (of_property_read_string(cpups, "entry-method", &entry_method)) {
+		pr_warn(" * %s missing entry_method property\n",
+			    cpups->full_name);
+		ret = -EOPNOTSUPP;
+		goto put_node;
+	}
+
+	*prot = get_protocol(entry_method);
+	if (!*prot) {
+		pr_warn("Missing protocol initializer\n");
+		ret = -EOPNOTSUPP;
+		goto put_node;
+	}
+
+	pr_debug("detected %u idle states\n", cnt);
+
+put_node:
+	of_node_put(cpups);
+	return ret ? : cnt;
+}
+
+/*
+ * arm_enter_idle_state - Programs CPU to enter the specified state
+ *
+ * @dev: cpuidle device
+ * @drv: cpuidle driver
+ * @idx: state index
+ *
+ * Called from the CPUidle framework to program the device to the
+ * specified target state selected by the governor.
+ */
+static int arm_enter_idle_state(struct cpuidle_device *dev,
+				struct cpuidle_driver *drv, int idx)
+{
+	int ret;
+
+	if (!idx) {
+		/*
+		 * idle index 0 is just standby wfi, does not require CPU
+		 * to be suspended
+		 */
+		cpu_do_idle();
+		return idx;
+	}
+
+	cpu_pm_enter();
+	/*
+	 * Pass idle state index to cpu_suspend which in turn will call
+	 * the CPU ops suspend protocol with idle index as a parameter
+	 */
+	ret = cpu_suspend(idx);
+
+	cpu_pm_exit();
+
+	return ret ? : idx;

Are we sure the cpu_suspend will always return a negative value ? If the underlying returns 1, like the exynos4/5's cpuidle driver does, that will mess up the cpuidle governor with bad inputs.

+}
+
+int __init arm_init_idle_driver(struct cpuidle_driver *drv)
+{
+	int i,  idle_states_nb;
+	struct idle_state *idle_state;
+	struct cpuidle_state *s;
+	const struct protocol_init *prot;
+
+	drv->states[0].exit_latency = 1;
+	drv->states[0].target_residency = 1;
+	drv->states[0].flags = CPUIDLE_FLAG_TIME_VALID;
+	drv->states[0].enter = arm_enter_idle_state;
+	strncpy(drv->states[0].name, "ARM WFI", CPUIDLE_NAME_LEN);
+	strncpy(drv->states[0].desc, "ARM WFI", CPUIDLE_DESC_LEN);

The drv->states[0].name|desc is an array but I hope with some cleanups in the acpi cpuidle driver, we can change it to a const char *.

Is it possible to change it as:
drv->states[0].name = "ARM WFI";
drv->states[0].desc = "ARM WFI";

?

+	idle_states_nb = arm_dt_init_idle_states(&prot, drv->cpumask);
+
+	if (idle_states_nb < 0) {
+		/*
+		 * No DT based idle states detected
+		 * Initialize driver count and exit successfully.
+		 */
+		drv->state_count = 1;
+		return 0;
+	}

If we are not able to initialize the cpuidle driver, wouldn't make sense to let the default idle function instead of defining a single state driver ? If no cpuidle driver is registered, the idle mainloop will switch to the default idle function. That should save the system to enter the cpuidle framework and the governor with all its computations for nothing, no ?

+	/*
+	 * We finally have some idle states to initialize.
+	 * Driver state 0 corresponds to WFI, start from index 1 and count up
+	 * to idle_states_nb (parsed idle states + WFI).
+	 * arm_dt_init_idle_states() ensures that CPUIDLE_STATE_MAX is not
+	 * exceeded.
+	 */

I don't know the arm64 architecture but on armv7 there are some implementations which do not support the WFI instruction alone (eg. omap4), if that could happen for arm64, the driver is assuming WFI is always supported.

+	s = &drv->states[1];
+	for (i = 1; i < idle_states_nb; i++, s++) {
+		idle_state = idle_states[i].state;
+		if (!idle_state)
+			break;
+
+		strncpy(s->name, idle_state->node->name, CPUIDLE_NAME_LEN);
+		strncpy(s->desc, idle_state->node->name, CPUIDLE_DESC_LEN);

Same comment than above.

+
+		s->exit_latency =
+			idle_state->entry_latency + idle_state->exit_latency;
+		s->target_residency = idle_state->min_residency;
+		/*
+		 * TBD: flag for timers is set implicitly for now but must be
+		 * linked to power domains.
+		 */
+		if (!idle_state->logic_state_retained)
+			s->flags |= CPUIDLE_FLAG_TIMER_STOP;

The exynos use per cpu timer which are not local, so they are not shut down when 'logic_state_retained' is reached (If I refer to the calxeda driver, it should be the same). Is the TBD comment describing such case ?

+		s->flags |= CPUIDLE_FLAG_TIME_VALID;
+		s->enter = arm_enter_idle_state;
+	}
+
+	/*
+	 * If we are here, we have a protocol back-end to initialize.
+	 *
+	 * If protocol initializer fails reset states count to 1 (wfi)
+	 */
+	if (prot->prot_init(drv->cpumask, idle_states, idle_states_nb))
+		i = 1;

It should fail instead, as mentioned above with the single-state-driver comment.

+
+	pr_info("idle states initialized, prototocol: %s states count: %u",
+		prot->prot, i);
+
+	drv->state_count = i;
+	return 0;
+}


  -- Daniel


--
 <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog

--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Device Tree Compilter]     [Device Tree Spec]     [Linux Driver Backports]     [Video for Linux]     [Linux USB Devel]     [Linux PCI Devel]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Yosemite Backpacking]
  Powered by Linux