Hi Greg, This was not marked for stable but it fixes a commit which has been backported to 4.14-stable. 1) 5b06bbcfc2c6 fixes ca37e57bbe0c 2) 7ee18d677989 will again fix 5b06bbcfc2c6 But you will need to add: 090edbe23ff5 ("x86/power/64: Use struct desc_ptr for the IDT in struct saved_context") and 896c80bef4d3 ("x86/power/32: Move SYSENTER MSR restoration to fix_processor_context()") to apply 7ee18d677989. All of them are attached to this mail for you. -- Regards Sudip
>From 913f0bde9d912d705b3114f422a2a6a11ff26f1f Mon Sep 17 00:00:00 2001 From: Andy Lutomirski <luto@xxxxxxxxxx> Date: Thu, 30 Nov 2017 07:57:57 -0800 Subject: [PATCH 1/4] x86/power: Fix some ordering bugs in __restore_processor_context() commit 5b06bbcfc2c621da3009da8decb7511500c293ed upstream __restore_processor_context() had a couple of ordering bugs. It restored GSBASE after calling load_gs_index(), and the latter can call into tracing code. It also tried to restore segment registers before restoring the LDT, which is straight-up wrong. Reorder the code so that we restore GSBASE, then the descriptor tables, then the segments. This fixes two bugs. First, it fixes a regression that broke resume under certain configurations due to irqflag tracing in native_load_gs_index(). Second, it fixes resume when the userspace process that initiated suspect had funny segments. The latter can be reproduced by compiling this: // SPDX-License-Identifier: GPL-2.0 /* * ldt_echo.c - Echo argv[1] while using an LDT segment */ int main(int argc, char **argv) { int ret; size_t len; char *buf; const struct user_desc desc = { .entry_number = 0, .base_addr = 0, .limit = 0xfffff, .seg_32bit = 1, .contents = 0, /* Data, grow-up */ .read_exec_only = 0, .limit_in_pages = 1, .seg_not_present = 0, .useable = 0 }; if (argc != 2) errx(1, "Usage: %s STRING", argv[0]); len = asprintf(&buf, "%s\n", argv[1]); if (len < 0) errx(1, "Out of memory"); ret = syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)); if (ret < -1) errno = -ret; if (ret) err(1, "modify_ldt"); asm volatile ("movw %0, %%es" :: "rm" ((unsigned short)7)); write(1, buf, len); return 0; } and running ldt_echo >/sys/power/mem Without the fix, the latter causes a triple fault on resume. Fixes: ca37e57bbe0c ("x86/entry/64: Add missing irqflags tracing to native_load_gs_index()") Reported-by: Jarkko Nikula <jarkko.nikula@xxxxxxxxxxxxxxx> Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx> Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Tested-by: Jarkko Nikula <jarkko.nikula@xxxxxxxxxxxxxxx> Cc: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx> Cc: Borislav Petkov <bp@xxxxxxxxx> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Link: https://lkml.kernel.org/r/6b31721ea92f51ea839e79bd97ade4a75b1eeea2.1512057304.git.luto@xxxxxxxxxx Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx> Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@xxxxxxxxx> --- arch/x86/power/cpu.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 04d5157fe7f8..a51d2dfb57d1 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -228,8 +228,20 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) load_idt((const struct desc_ptr *)&ctxt->idt_limit); #endif +#ifdef CONFIG_X86_64 /* - * segment registers + * We need GSBASE restored before percpu access can work. + * percpu access can happen in exception handlers or in complicated + * helpers like load_gs_index(). + */ + wrmsrl(MSR_GS_BASE, ctxt->gs_base); +#endif + + fix_processor_context(); + + /* + * Restore segment registers. This happens after restoring the GDT + * and LDT, which happen in fix_processor_context(). */ #ifdef CONFIG_X86_32 loadsegment(es, ctxt->es); @@ -250,13 +262,14 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) load_gs_index(ctxt->gs); asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss)); + /* + * Restore FSBASE and user GSBASE after reloading the respective + * segment selectors. + */ wrmsrl(MSR_FS_BASE, ctxt->fs_base); - wrmsrl(MSR_GS_BASE, ctxt->gs_base); wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); #endif - fix_processor_context(); - do_fpu_end(); tsc_verify_tsc_adjust(true); x86_platform.restore_sched_clock_state(); -- 2.11.0
>From 262a92780dc2fbe262cca8a8c56cf808f3555100 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski <luto@xxxxxxxxxx> Date: Thu, 14 Dec 2017 13:19:05 -0800 Subject: [PATCH 2/4] x86/power/64: Use struct desc_ptr for the IDT in struct saved_context commit 090edbe23ff57940fca7f57d9165ce57a826bd7a upstream x86_64's saved_context nonsensically used separate idt_limit and idt_base fields and then cast &idt_limit to struct desc_ptr *. This was correct (with -fno-strict-aliasing), but it's confusing, served no purpose, and required #ifdeffery. Simplify this by using struct desc_ptr directly. No change in functionality. Tested-by: Jarkko Nikula <jarkko.nikula@xxxxxxxxxxxxxxx> Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx> Acked-by: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx> Acked-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Borislav Petkov <bpetkov@xxxxxxx> Cc: Josh Poimboeuf <jpoimboe@xxxxxxxxxx> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Cc: Pavel Machek <pavel@xxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Rafael J. Wysocki <rjw@xxxxxxxxxxxxx> Cc: Zhang Rui <rui.zhang@xxxxxxxxx> Link: http://lkml.kernel.org/r/967909ce38d341b01d45eff53e278e2728a3a93a.1513286253.git.luto@xxxxxxxxxx Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx> Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@xxxxxxxxx> --- arch/x86/include/asm/suspend_64.h | 3 +-- arch/x86/power/cpu.c | 11 +---------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h index 7306e911faee..600e9e0aea51 100644 --- a/arch/x86/include/asm/suspend_64.h +++ b/arch/x86/include/asm/suspend_64.h @@ -30,8 +30,7 @@ struct saved_context { u16 gdt_pad; /* Unused */ struct desc_ptr gdt_desc; u16 idt_pad; - u16 idt_limit; - unsigned long idt_base; + struct desc_ptr idt; u16 ldt; u16 tss; unsigned long tr; diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index a51d2dfb57d1..cba2e2c3f89e 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -82,12 +82,8 @@ static void __save_processor_state(struct saved_context *ctxt) /* * descriptor tables */ -#ifdef CONFIG_X86_32 store_idt(&ctxt->idt); -#else -/* CONFIG_X86_64 */ - store_idt((struct desc_ptr *)&ctxt->idt_limit); -#endif + /* * We save it here, but restore it only in the hibernate case. * For ACPI S3 resume, this is loaded via 'early_gdt_desc' in 64-bit @@ -221,12 +217,7 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) * now restore the descriptor tables to their proper values * ltr is done i fix_processor_context(). */ -#ifdef CONFIG_X86_32 load_idt(&ctxt->idt); -#else -/* CONFIG_X86_64 */ - load_idt((const struct desc_ptr *)&ctxt->idt_limit); -#endif #ifdef CONFIG_X86_64 /* -- 2.11.0
>From e2f447c6bbaef62fbd820f2892721c7c64871847 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski <luto@xxxxxxxxxx> Date: Thu, 14 Dec 2017 13:19:06 -0800 Subject: [PATCH 3/4] x86/power/32: Move SYSENTER MSR restoration to fix_processor_context() commit 896c80bef4d3b357814a476663158aaf669d0fb3 upstream x86_64 restores system call MSRs in fix_processor_context(), and x86_32 restored them along with segment registers. The 64-bit variant makes more sense, so move the 32-bit code to match the 64-bit code. No side effects are expected to runtime behavior. Tested-by: Jarkko Nikula <jarkko.nikula@xxxxxxxxxxxxxxx> Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx> Acked-by: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx> Acked-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Borislav Petkov <bpetkov@xxxxxxx> Cc: Josh Poimboeuf <jpoimboe@xxxxxxxxxx> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Cc: Pavel Machek <pavel@xxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Rafael J. Wysocki <rjw@xxxxxxxxxxxxx> Cc: Zhang Rui <rui.zhang@xxxxxxxxx> Link: http://lkml.kernel.org/r/65158f8d7ee64dd6bbc6c1c83b3b34aaa854e3ae.1513286253.git.luto@xxxxxxxxxx Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx> Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@xxxxxxxxx> --- arch/x86/power/cpu.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index cba2e2c3f89e..8e1668470b23 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -176,6 +176,9 @@ static void fix_processor_context(void) write_gdt_entry(desc, GDT_ENTRY_TSS, &tss, DESC_TSS); syscall_init(); /* This sets MSR_*STAR and related */ +#else + if (boot_cpu_has(X86_FEATURE_SEP)) + enable_sep_cpu(); #endif load_TR_desc(); /* This does ltr */ load_mm_ldt(current->active_mm); /* This does lldt */ @@ -239,12 +242,6 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) loadsegment(fs, ctxt->fs); loadsegment(gs, ctxt->gs); loadsegment(ss, ctxt->ss); - - /* - * sysenter MSRs - */ - if (boot_cpu_has(X86_FEATURE_SEP)) - enable_sep_cpu(); #else /* CONFIG_X86_64 */ asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds)); -- 2.11.0
>From aded3723eaabab920a0c0618d9b034fa4a7ce935 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski <luto@xxxxxxxxxx> Date: Thu, 14 Dec 2017 13:19:07 -0800 Subject: [PATCH 4/4] x86/power: Make restore_processor_context() sane commit 7ee18d677989e99635027cee04c878950e0752b9 upstream My previous attempt to fix a couple of bugs in __restore_processor_context(): 5b06bbcfc2c6 ("x86/power: Fix some ordering bugs in __restore_processor_context()") ... introduced yet another bug, breaking suspend-resume. Rather than trying to come up with a minimal fix, let's try to clean it up for real. This patch fixes quite a few things: - The old code saved a nonsensical subset of segment registers. The only registers that need to be saved are those that contain userspace state or those that can't be trivially restored without percpu access working. (On x86_32, we can restore percpu access by writing __KERNEL_PERCPU to %fs. On x86_64, it's easier to save and restore the kernel's GSBASE.) With this patch, we restore hardcoded values to the kernel state where applicable and explicitly restore the user state after fixing all the descriptor tables. - We used to use an unholy mix of inline asm and C helpers for segment register access. Let's get rid of the inline asm. This fixes the reported s2ram hangs and make the code all around more logical. Analyzed-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Reported-by: Jarkko Nikula <jarkko.nikula@xxxxxxxxxxxxxxx> Reported-by: Pavel Machek <pavel@xxxxxx> Tested-by: Jarkko Nikula <jarkko.nikula@xxxxxxxxxxxxxxx> Tested-by: Pavel Machek <pavel@xxxxxx> Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx> Acked-by: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx> Acked-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Borislav Petkov <bpetkov@xxxxxxx> Cc: Josh Poimboeuf <jpoimboe@xxxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Rafael J. Wysocki <rjw@xxxxxxxxxxxxx> Cc: Zhang Rui <rui.zhang@xxxxxxxxx> Fixes: 5b06bbcfc2c6 ("x86/power: Fix some ordering bugs in __restore_processor_context()") Link: http://lkml.kernel.org/r/398ee68e5c0f766425a7b746becfc810840770ff.1513286253.git.luto@xxxxxxxxxx Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx> Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@xxxxxxxxx> --- arch/x86/include/asm/suspend_32.h | 8 +++- arch/x86/include/asm/suspend_64.h | 16 +++++++- arch/x86/power/cpu.c | 79 ++++++++++++++++++++------------------- 3 files changed, 62 insertions(+), 41 deletions(-) diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index 982c325dad33..8be6afb58471 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h @@ -12,7 +12,13 @@ /* image of the saved processor state */ struct saved_context { - u16 es, fs, gs, ss; + /* + * On x86_32, all segment registers, with the possible exception of + * gs, are saved at kernel entry in pt_regs. + */ +#ifdef CONFIG_X86_32_LAZY_GS + u16 gs; +#endif unsigned long cr0, cr2, cr3, cr4; u64 misc_enable; bool misc_enable_saved; diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h index 600e9e0aea51..a7af9f53c0cb 100644 --- a/arch/x86/include/asm/suspend_64.h +++ b/arch/x86/include/asm/suspend_64.h @@ -20,8 +20,20 @@ */ struct saved_context { struct pt_regs regs; - u16 ds, es, fs, gs, ss; - unsigned long gs_base, gs_kernel_base, fs_base; + + /* + * User CS and SS are saved in current_pt_regs(). The rest of the + * segment selectors need to be saved and restored here. + */ + u16 ds, es, fs, gs; + + /* + * Usermode FSBASE and GSBASE may not match the fs and gs selectors, + * so we save them separately. We save the kernelmode GSBASE to + * restore percpu access after resume. + */ + unsigned long kernelmode_gs_base, usermode_gs_base, fs_base; + unsigned long cr0, cr2, cr3, cr4, cr8; u64 misc_enable; bool misc_enable_saved; diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 8e1668470b23..a7d966964c6f 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -99,22 +99,18 @@ static void __save_processor_state(struct saved_context *ctxt) /* * segment registers */ -#ifdef CONFIG_X86_32 - savesegment(es, ctxt->es); - savesegment(fs, ctxt->fs); +#ifdef CONFIG_X86_32_LAZY_GS savesegment(gs, ctxt->gs); - savesegment(ss, ctxt->ss); -#else -/* CONFIG_X86_64 */ - asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds)); - asm volatile ("movw %%es, %0" : "=m" (ctxt->es)); - asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs)); - asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs)); - asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss)); +#endif +#ifdef CONFIG_X86_64 + savesegment(gs, ctxt->gs); + savesegment(fs, ctxt->fs); + savesegment(ds, ctxt->ds); + savesegment(es, ctxt->es); rdmsrl(MSR_FS_BASE, ctxt->fs_base); - rdmsrl(MSR_GS_BASE, ctxt->gs_base); - rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); + rdmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base); + rdmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base); mtrr_save_fixed_ranges(NULL); rdmsrl(MSR_EFER, ctxt->efer); @@ -191,9 +187,12 @@ static void fix_processor_context(void) } /** - * __restore_processor_state - restore the contents of CPU registers saved - * by __save_processor_state() - * @ctxt - structure to load the registers contents from + * __restore_processor_state - restore the contents of CPU registers saved + * by __save_processor_state() + * @ctxt - structure to load the registers contents from + * + * The asm code that gets us here will have restored a usable GDT, although + * it will be pointing to the wrong alias. */ static void notrace __restore_processor_state(struct saved_context *ctxt) { @@ -216,46 +215,50 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) write_cr2(ctxt->cr2); write_cr0(ctxt->cr0); + /* Restore the IDT. */ + load_idt(&ctxt->idt); + /* - * now restore the descriptor tables to their proper values - * ltr is done i fix_processor_context(). + * Just in case the asm code got us here with the SS, DS, or ES + * out of sync with the GDT, update them. */ - load_idt(&ctxt->idt); + loadsegment(ss, __KERNEL_DS); + loadsegment(ds, __USER_DS); + loadsegment(es, __USER_DS); -#ifdef CONFIG_X86_64 /* - * We need GSBASE restored before percpu access can work. - * percpu access can happen in exception handlers or in complicated - * helpers like load_gs_index(). + * Restore percpu access. Percpu access can happen in exception + * handlers or in complicated helpers like load_gs_index(). */ - wrmsrl(MSR_GS_BASE, ctxt->gs_base); +#ifdef CONFIG_X86_64 + wrmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base); +#else + loadsegment(fs, __KERNEL_PERCPU); + loadsegment(gs, __KERNEL_STACK_CANARY); #endif + /* Restore the TSS, RO GDT, LDT, and usermode-relevant MSRs. */ fix_processor_context(); /* - * Restore segment registers. This happens after restoring the GDT - * and LDT, which happen in fix_processor_context(). + * Now that we have descriptor tables fully restored and working + * exception handling, restore the usermode segments. */ -#ifdef CONFIG_X86_32 +#ifdef CONFIG_X86_64 + loadsegment(ds, ctxt->es); loadsegment(es, ctxt->es); loadsegment(fs, ctxt->fs); - loadsegment(gs, ctxt->gs); - loadsegment(ss, ctxt->ss); -#else -/* CONFIG_X86_64 */ - asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds)); - asm volatile ("movw %0, %%es" :: "r" (ctxt->es)); - asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs)); load_gs_index(ctxt->gs); - asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss)); /* - * Restore FSBASE and user GSBASE after reloading the respective - * segment selectors. + * Restore FSBASE and GSBASE after restoring the selectors, since + * restoring the selectors clobbers the bases. Keep in mind + * that MSR_KERNEL_GS_BASE is horribly misnamed. */ wrmsrl(MSR_FS_BASE, ctxt->fs_base); - wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); + wrmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base); +#elif defined(CONFIG_X86_32_LAZY_GS) + loadsegment(gs, ctxt->gs); #endif do_fpu_end(); -- 2.11.0