This is a note to let you know that I've just added the patch titled x86/entry/64: Pass SP0 directly to load_sp0() to the 4.14-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: x86-entry-64-pass-sp0-directly-to-load_sp0.patch and it can be found in the queue-4.14 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. >From da51da189a24bb9b7e2d5a123be096e51a4695a5 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski <luto@xxxxxxxxxx> Date: Thu, 2 Nov 2017 00:59:10 -0700 Subject: x86/entry/64: Pass SP0 directly to load_sp0() From: Andy Lutomirski <luto@xxxxxxxxxx> commit da51da189a24bb9b7e2d5a123be096e51a4695a5 upstream. load_sp0() had an odd signature: void load_sp0(struct tss_struct *tss, struct thread_struct *thread); Simplify it to: void load_sp0(unsigned long sp0); Also simplify a few get_cpu()/put_cpu() sequences to preempt_disable()/preempt_enable(). Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx> Reviewed-by: Borislav Petkov <bp@xxxxxxx> Cc: Borislav Petkov <bpetkov@xxxxxxx> Cc: Brian Gerst <brgerst@xxxxxxxxx> Cc: Dave Hansen <dave.hansen@xxxxxxxxx> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Link: http://lkml.kernel.org/r/2655d8b42ed940aa384fe18ee1129bbbcf730a08.1509609304.git.luto@xxxxxxxxxx Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- arch/x86/include/asm/paravirt.h | 5 ++--- arch/x86/include/asm/paravirt_types.h | 2 +- arch/x86/include/asm/processor.h | 9 ++++----- arch/x86/kernel/cpu/common.c | 4 ++-- arch/x86/kernel/process_32.c | 2 +- arch/x86/kernel/process_64.c | 2 +- arch/x86/kernel/vm86_32.c | 14 ++++++-------- arch/x86/xen/enlighten_pv.c | 7 +++---- 8 files changed, 20 insertions(+), 25 deletions(-) --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -16,10 +16,9 @@ #include <linux/cpumask.h> #include <asm/frame.h> -static inline void load_sp0(struct tss_struct *tss, - struct thread_struct *thread) +static inline void load_sp0(unsigned long sp0) { - PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread); + PVOP_VCALL1(pv_cpu_ops.load_sp0, sp0); } /* The paravirtualized CPUID instruction. */ --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -134,7 +134,7 @@ struct pv_cpu_ops { void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries); void (*free_ldt)(struct desc_struct *ldt, unsigned entries); - void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); + void (*load_sp0)(unsigned long sp0); void (*set_iopl_mask)(unsigned mask); --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -518,9 +518,9 @@ static inline void native_set_iopl_mask( } static inline void -native_load_sp0(struct tss_struct *tss, struct thread_struct *thread) +native_load_sp0(unsigned long sp0) { - tss->x86_tss.sp0 = thread->sp0; + this_cpu_write(cpu_tss.x86_tss.sp0, sp0); } static inline void native_swapgs(void) @@ -545,10 +545,9 @@ static inline unsigned long current_top_ #else #define __cpuid native_cpuid -static inline void load_sp0(struct tss_struct *tss, - struct thread_struct *thread) +static inline void load_sp0(unsigned long sp0) { - native_load_sp0(tss, thread); + native_load_sp0(sp0); } #define set_iopl_mask native_set_iopl_mask --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1570,7 +1570,7 @@ void cpu_init(void) initialize_tlbstate_and_flush(); enter_lazy_tlb(&init_mm, me); - load_sp0(t, ¤t->thread); + load_sp0(current->thread.sp0); set_tss_desc(cpu, t); load_TR_desc(); load_mm_ldt(&init_mm); @@ -1625,7 +1625,7 @@ void cpu_init(void) initialize_tlbstate_and_flush(); enter_lazy_tlb(&init_mm, curr); - load_sp0(t, thread); + load_sp0(thread->sp0); set_tss_desc(cpu, t); load_TR_desc(); load_mm_ldt(&init_mm); --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -287,7 +287,7 @@ __switch_to(struct task_struct *prev_p, * current_thread_info(). Refresh the SYSENTER configuration in * case prev or next is vm86. */ - load_sp0(tss, next); + load_sp0(next->sp0); refresh_sysenter_cs(next); this_cpu_write(cpu_current_top_of_stack, (unsigned long)task_stack_page(next_p) + --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -465,7 +465,7 @@ __switch_to(struct task_struct *prev_p, this_cpu_write(current_task, next_p); /* Reload sp0. */ - load_sp0(tss, next); + load_sp0(next->sp0); /* * Now maybe reload the debug registers and handle I/O bitmaps --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -95,7 +95,6 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval) { - struct tss_struct *tss; struct task_struct *tsk = current; struct vm86plus_struct __user *user; struct vm86 *vm86 = current->thread.vm86; @@ -147,13 +146,13 @@ void save_v86_state(struct kernel_vm86_r do_exit(SIGSEGV); } - tss = &per_cpu(cpu_tss, get_cpu()); + preempt_disable(); tsk->thread.sp0 = vm86->saved_sp0; tsk->thread.sysenter_cs = __KERNEL_CS; - load_sp0(tss, &tsk->thread); + load_sp0(tsk->thread.sp0); refresh_sysenter_cs(&tsk->thread); vm86->saved_sp0 = 0; - put_cpu(); + preempt_enable(); memcpy(®s->pt, &vm86->regs32, sizeof(struct pt_regs)); @@ -239,7 +238,6 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) { - struct tss_struct *tss; struct task_struct *tsk = current; struct vm86 *vm86 = tsk->thread.vm86; struct kernel_vm86_regs vm86regs; @@ -367,8 +365,8 @@ static long do_sys_vm86(struct vm86plus_ vm86->saved_sp0 = tsk->thread.sp0; lazy_save_gs(vm86->regs32.gs); - tss = &per_cpu(cpu_tss, get_cpu()); /* make room for real-mode segments */ + preempt_disable(); tsk->thread.sp0 += 16; if (static_cpu_has(X86_FEATURE_SEP)) { @@ -376,8 +374,8 @@ static long do_sys_vm86(struct vm86plus_ refresh_sysenter_cs(&tsk->thread); } - load_sp0(tss, &tsk->thread); - put_cpu(); + load_sp0(tsk->thread.sp0); + preempt_enable(); if (vm86->flags & VM86_SCREEN_BITMAP) mark_screen_rdonly(tsk->mm); --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -811,15 +811,14 @@ static void __init xen_write_gdt_entry_b } } -static void xen_load_sp0(struct tss_struct *tss, - struct thread_struct *thread) +static void xen_load_sp0(unsigned long sp0) { struct multicall_space mcs; mcs = xen_mc_entry(0); - MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); + MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0); xen_mc_issue(PARAVIRT_LAZY_CPU); - tss->x86_tss.sp0 = thread->sp0; + this_cpu_write(cpu_tss.x86_tss.sp0, sp0); } void xen_set_iopl_mask(unsigned mask) Patches currently in stable-queue which might be from luto@xxxxxxxxxx are queue-4.14/x86-entry-64-simplify-reg-restore-code-in-the-standard-iret-paths.patch queue-4.14/x86-cpufeatures-fix-various-details-in-the-feature-definitions.patch queue-4.14/x86-entry-64-move-the-ist-stacks-into-struct-cpu_entry_area.patch queue-4.14/x86-dumpstack-add-get_stack_info-support-for-the-sysenter-stack.patch queue-4.14/x86-asm-don-t-use-the-confusing-.ifeq-directive.patch queue-4.14/selftests-x86-ldt_gdt-add-infrastructure-to-test-set_thread_area.patch queue-4.14/x86-entry-remap-the-tss-into-the-cpu-entry-area.patch queue-4.14/x86-entry-64-paravirt-use-paravirt-safe-macro-to-access-eflags.patch queue-4.14/x86-mm-fixmap-generalize-the-gdt-fixmap-mechanism-introduce-struct-cpu_entry_area.patch queue-4.14/x86-paravirt-dont-patch-flush_tlb_single.patch queue-4.14/x86-dumpstack-handle-stack-overflow-on-all-stacks.patch queue-4.14/x86-entry-64-use-pop-instead-of-movq-in-syscall_return_via_sysret.patch queue-4.14/x86-entry-64-return-to-userspace-from-the-trampoline-stack.patch queue-4.14/x86-paravirt-provide-a-way-to-check-for-hypervisors.patch queue-4.14/xen-x86-entry-64-add-xen-nmi-trap-entry.patch queue-4.14/x86-entry-64-remove-the-restore_c_regs_and_iret-label.patch queue-4.14/x86-entry-64-create-a-per-cpu-syscall-entry-trampoline.patch queue-4.14/x86-entry-64-remove-the-restore_..._regs-infrastructure.patch queue-4.14/x86-xen-64-x86-entry-64-clean-up-sp-code-in-cpu_initialize_context.patch queue-4.14/x86-entry-add-task_top_of_stack-to-find-the-top-of-a-task-s-stack.patch queue-4.14/x86-entry-64-remove-all-remaining-direct-thread_struct-sp0-reads.patch queue-4.14/x86-boot-relocate-definition-of-the-initial-state-of-cr0.patch queue-4.14/x86-entry-64-stop-initializing-tss.sp0-at-boot.patch queue-4.14/x86-entry-64-de-xen-ify-our-nmi-code.patch queue-4.14/objtool-don-t-report-end-of-section-error-after-an-empty-unwind-hint.patch queue-4.14/x86-entry-64-pass-sp0-directly-to-load_sp0.patch queue-4.14/x86-entry-64-use-a-per-cpu-trampoline-stack-for-idt-entries.patch queue-4.14/x86-entry-64-move-swapgs-into-the-common-iret-to-usermode-path.patch queue-4.14/x86-entry-64-shorten-test-instructions.patch queue-4.14/x86-cpufeature-add-user-mode-instruction-prevention-definitions.patch queue-4.14/x86-cpufeatures-make-cpu-bugs-sticky.patch queue-4.14/x86-espfix-64-stop-assuming-that-pt_regs-is-on-the-entry-stack.patch queue-4.14/x86-traps-use-a-new-on_thread_stack-helper-to-clean-up-an-assertion.patch queue-4.14/x86-xen-fix-xen-head-elf-annotations.patch queue-4.14/x86-entry-64-remove-thread_struct-sp0.patch queue-4.14/x86-entry-move-sysenter_stack-to-the-beginning-of-struct-tss_struct.patch queue-4.14/x86-entry-64-allocate-and-enable-the-sysenter-stack.patch queue-4.14/x86-unwinder-orc-dont-bail-on-stack-overflow.patch queue-4.14/x86-head-fix-head-elf-function-annotations.patch queue-4.14/selftests-x86-ldt_gdt-run-most-existing-ldt-test-cases-against-the-gdt-as-well.patch queue-4.14/x86-entry-32-pull-the-msr_ia32_sysenter_cs-update-code-out-of-native_load_sp0.patch queue-4.14/x86-entry-64-split-the-iret-to-user-and-iret-to-kernel-paths.patch queue-4.14/x86-entry-64-shrink-paranoid_exit_restore-and-make-labels-local.patch queue-4.14/x86-head-add-unwind-hint-annotations.patch queue-4.14/x86-head-remove-unused-bad_address-code.patch queue-4.14/x86-xen-add-unwind-hint-annotations.patch queue-4.14/x86-kasan-64-teach-kasan-about-the-cpu_entry_area.patch queue-4.14/x86-head-remove-confusing-comment.patch queue-4.14/x86-entry-64-remove-the-sysenter-stack-canary.patch queue-4.14/x86-mm-kasan-don-t-use-vmemmap_populate-to-initialize-shadow.patch queue-4.14/x86-entry-gdt-put-per-cpu-gdt-remaps-in-ascending-order.patch queue-4.14/x86-entry-fix-assumptions-that-the-hw-tss-is-at-the-beginning-of-cpu_tss.patch queue-4.14/x86-cpufeatures-re-tabulate-the-x86_feature-definitions.patch queue-4.14/x86-entry-32-fix-cpu_current_top_of_stack-initialization-at-boot.patch queue-4.14/ptrace-x86-make-user_64bit_mode-available-to-32-bit-builds.patch queue-4.14/x86-entry-64-make-cpu_entry_area.tss-read-only.patch queue-4.14/x86-mm-relocate-page-fault-error-codes-to-traps.h.patch queue-4.14/x86-unwinder-handle-stack-overflows-more-gracefully.patch queue-4.14/x86-entry-64-use-pop-instead-of-mov-to-restore-regs-on-nmi-return.patch queue-4.14/x86-irq-64-print-the-offending-ip-in-the-stack-overflow-warning.patch queue-4.14/x86-entry-clean-up-the-sysenter_stack-code.patch queue-4.14/x86-entry-64-merge-the-fast-and-slow-sysret-paths.patch queue-4.14/x86-entry-64-separate-cpu_current_top_of_stack-from-tss.sp0.patch queue-4.14/x86-boot-annotate-verify_cpu-as-a-callable-function.patch queue-4.14/x86-irq-remove-an-old-outdated-comment-about-context-tracking-races.patch