Currently when a CPU is off-lined it enters either MWAIT-based idle or, if MWAIT is not desired or supported, HLT-based idle (which places the processor in C1 state). This patch allows processors without MWAIT support to stay in states deeper than C1. Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxx> --- arch/x86/kernel/smpboot.c | 4 +++- drivers/acpi/processor_idle.c | 31 +++++++++++++++++++++++++++++++ drivers/cpuidle/cpuidle.c | 28 ++++++++++++++++++++++++++++ include/linux/cpuidle.h | 4 ++++ 4 files changed, 66 insertions(+), 1 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 66d250c..93a2a09 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -50,6 +50,7 @@ #include <linux/tboot.h> #include <linux/stackprotector.h> #include <linux/gfp.h> +#include <linux/cpuidle.h> #include <asm/acpi.h> #include <asm/desc.h> @@ -1422,7 +1423,8 @@ void native_play_dead(void) tboot_shutdown(TB_SHUTDOWN_WFS); mwait_play_dead(); /* Only returns on failure */ - hlt_play_dead(); + if (cpuidle_play_dead()) + hlt_play_dead(); } #else /* ... !CONFIG_HOTPLUG_CPU */ diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 0e8e2de..6b1d32a 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -770,6 +770,35 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev, return index; } + +/** + * acpi_idle_play_dead - enters an ACPI state for long-term idle (i.e. off-lining) + * @dev: the target CPU + * @index: the index of suggested state + */ +static int acpi_idle_play_dead(struct cpuidle_device *dev, int index) +{ + struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; + struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage); + + ACPI_FLUSH_CPU_CACHE(); + + while (1) { + + if (cx->entry_method == ACPI_CSTATE_HALT) + halt(); + else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) { + inb(cx->address); + /* See comment in acpi_idle_do_entry() */ + inl(acpi_gbl_FADT.xpm_timer_block.address); + } else + return -ENODEV; + } + + /* Never reached */ + return 0; +} + /** * acpi_idle_enter_simple - enters an ACPI state without BM handling * @dev: the target CPU @@ -1077,12 +1106,14 @@ static int acpi_processor_setup_cpuidle_states(struct acpi_processor *pr) state->flags |= CPUIDLE_FLAG_TIME_VALID; state->enter = acpi_idle_enter_c1; + state->enter_dead = acpi_idle_play_dead; drv->safe_state_index = count; break; case ACPI_STATE_C2: state->flags |= CPUIDLE_FLAG_TIME_VALID; state->enter = acpi_idle_enter_simple; + state->enter_dead = acpi_idle_play_dead; drv->safe_state_index = count; break; diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 59f4261..6979a4c 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -54,6 +54,34 @@ static void cpuidle_kick_cpus(void) {} static int __cpuidle_register_device(struct cpuidle_device *dev); /** + * cpuidle_play_dead - cpu off-lining + * + * Only returns in case of an error + */ +int cpuidle_play_dead(void) +{ + struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); + struct cpuidle_driver *drv = cpuidle_get_driver(); + int i, dead_state = -1; + int power_usage = -1; + + /* Find lowest-power state that supports long-term idle */ + for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) { + struct cpuidle_state *s = &drv->states[i]; + + if (s->power_usage < power_usage && s->enter_dead) { + power_usage = s->power_usage; + dead_state = i; + } + } + + if (dead_state != -1) + return drv->states[dead_state].enter_dead(dev, dead_state); + + return -ENODEV; +} + +/** * cpuidle_idle_call - the main idle loop * * NOTE: no locks or semaphores should be used here diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 712abcc..2662493 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -49,6 +49,8 @@ struct cpuidle_state { int (*enter) (struct cpuidle_device *dev, struct cpuidle_driver *drv, int index); + + int (*enter_dead) (struct cpuidle_device *dev, int index); }; /* Idle State Flags */ @@ -140,6 +142,7 @@ extern void cpuidle_pause_and_lock(void); extern void cpuidle_resume_and_unlock(void); extern int cpuidle_enable_device(struct cpuidle_device *dev); extern void cpuidle_disable_device(struct cpuidle_device *dev); +extern int cpuidle_play_dead(void); #else static inline void disable_cpuidle(void) { } @@ -157,6 +160,7 @@ static inline void cpuidle_resume_and_unlock(void) { } static inline int cpuidle_enable_device(struct cpuidle_device *dev) {return -ENODEV; } static inline void cpuidle_disable_device(struct cpuidle_device *dev) { } +static inline int cpuidle_play_dead(void) {return -ENODEV; } #endif -- 1.7.7 -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html