4.9-stable review patch. If anyone has any objections, please let me know. ------------------ From: Andrey Ryabinin <aryabinin@xxxxxxxxxxxxx> commit 196bd485ee4f03ce4c690bfcf38138abfcd0a4bc upstream. Currently we use current_stack_pointer() function to get the value of the stack pointer register. Since commit: f5caf621ee35 ("x86/asm: Fix inline asm call constraints for Clang") ... we have a stack register variable declared. It can be used instead of current_stack_pointer() function which allows to optimize away some excessive "mov %rsp, %<dst>" instructions: -mov %rsp,%rdx -sub %rdx,%rax -cmp $0x3fff,%rax -ja ffffffff810722fd <ist_begin_non_atomic+0x2d> +sub %rsp,%rax +cmp $0x3fff,%rax +ja ffffffff810722fa <ist_begin_non_atomic+0x2a> Remove current_stack_pointer(), rename __asm_call_sp to current_stack_pointer and use it instead of the removed function. Signed-off-by: Andrey Ryabinin <aryabinin@xxxxxxxxxxxxx> Reviewed-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx> Cc: Andy Lutomirski <luto@xxxxxxxxxx> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Link: http://lkml.kernel.org/r/20170929141537.29167-1-aryabinin@xxxxxxxxxxxxx Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx> [dwmw2: We want ASM_CALL_CONSTRAINT for retpoline] Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- arch/x86/include/asm/asm.h | 11 +++++++++++ arch/x86/include/asm/thread_info.h | 11 ----------- arch/x86/kernel/irq_32.c | 6 +++--- arch/x86/kernel/traps.c | 2 +- arch/x86/mm/tlb.c | 2 +- 5 files changed, 16 insertions(+), 16 deletions(-) --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -125,4 +125,15 @@ /* For C file, we already have NOKPROBE_SYMBOL macro */ #endif +#ifndef __ASSEMBLY__ +/* + * This output constraint should be used for any inline asm which has a "call" + * instruction. Otherwise the asm may be inserted before the frame pointer + * gets set up by the containing function. If you forget to do this, objtool + * may print a "call without frame pointer save/setup" warning. + */ +register unsigned long current_stack_pointer asm(_ASM_SP); +#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) +#endif + #endif /* _ASM_X86_ASM_H */ --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -152,17 +152,6 @@ struct thread_info { */ #ifndef __ASSEMBLY__ -static inline unsigned long current_stack_pointer(void) -{ - unsigned long sp; -#ifdef CONFIG_X86_64 - asm("mov %%rsp,%0" : "=g" (sp)); -#else - asm("mov %%esp,%0" : "=g" (sp)); -#endif - return sp; -} - /* * Walks up the stack frames to make sure that the specified object is * entirely contained by a single stack frame. --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -64,7 +64,7 @@ static void call_on_stack(void *func, vo static inline void *current_stack(void) { - return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1)); + return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1)); } static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) @@ -88,7 +88,7 @@ static inline int execute_on_irq_stack(i /* Save the next esp at the bottom of the stack */ prev_esp = (u32 *)irqstk; - *prev_esp = current_stack_pointer(); + *prev_esp = current_stack_pointer; if (unlikely(overflow)) call_on_stack(print_stack_overflow, isp); @@ -139,7 +139,7 @@ void do_softirq_own_stack(void) /* Push the previous esp onto the stack */ prev_esp = (u32 *)irqstk; - *prev_esp = current_stack_pointer(); + *prev_esp = current_stack_pointer; call_on_stack(__do_softirq, isp); } --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -153,7 +153,7 @@ void ist_begin_non_atomic(struct pt_regs * from double_fault. */ BUG_ON((unsigned long)(current_top_of_stack() - - current_stack_pointer()) >= THREAD_SIZE); + current_stack_pointer) >= THREAD_SIZE); preempt_enable_no_resched(); } --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -110,7 +110,7 @@ void switch_mm_irqs_off(struct mm_struct * mapped in the new pgd, we'll double-fault. Forcibly * map it. */ - unsigned int stack_pgd_index = pgd_index(current_stack_pointer()); + unsigned int stack_pgd_index = pgd_index(current_stack_pointer); pgd_t *pgd = next->pgd + stack_pgd_index;