How does this look? Completely untested, of course. I do wonder if we need more memory barriers, though. An alternative would be to move everything into mwait_idle_with_hints(). -hpa
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 7b034a4057f9..6dce588f94b4 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -723,6 +723,23 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx) :: "a" (eax), "c" (ecx)); } +/* + * Issue a clflush in preparation for a monitor instruction if the CPU + * needs it. We force the address into the ax register to get a fixed + * length for the instruction, however, this is what the monitor instruction + * is going to need anyway, so it shouldn't add any additional code. + */ +static inline void clflush_monitor(const void *addr, unsigned long ecx, + unsigned long edx) +{ + alternative_input(ASM_NOP3, + "clflush (%0)", + X86_FEATURE_CLFLUSH_MONITOR, + "a" (addr)); + __monitor(addr, eax, edx); + smp_mb(); +} + extern void select_idle_routine(const struct cpuinfo_x86 *c); extern void init_amd_e400_c1e_mask(void); diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index d2b7f27781bc..b14d02354134 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -163,11 +163,7 @@ EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); void mwait_idle_with_hints(unsigned long ax, unsigned long cx) { if (!need_resched()) { - if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) - clflush((void *)¤t_thread_info()->flags); - - __monitor((void *)¤t_thread_info()->flags, 0, 0); - smp_mb(); + clflush_monitor(¤t_thread_info()->flags, 0, 0); if (!need_resched()) __mwait(ax, cx); }