Patch "x86/retpoline: Simplify retpolines" has been added to the 5.10-stable tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This is a note to let you know that I've just added the patch titled

    x86/retpoline: Simplify retpolines

to the 5.10-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     x86-retpoline-simplify-retpolines.patch
and it can be found in the queue-5.10 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@xxxxxxxxxxxxxxx> know about it.


>From foo@baz Tue Jul 12 05:07:35 PM CEST 2022
From: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Date: Fri, 26 Mar 2021 16:12:02 +0100
Subject: x86/retpoline: Simplify retpolines

From: Peter Zijlstra <peterz@xxxxxxxxxxxxx>

commit 119251855f9adf9421cb5eb409933092141ab2c7 upstream.

Due to:

  c9c324dc22aa ("objtool: Support stack layout changes in alternatives")

it is now possible to simplify the retpolines.

Currently our retpolines consist of 2 symbols:

 - __x86_indirect_thunk_\reg: the compiler target
 - __x86_retpoline_\reg:  the actual retpoline.

Both are consecutive in code and aligned such that for any one register
they both live in the same cacheline:

  0000000000000000 <__x86_indirect_thunk_rax>:
   0:   ff e0                   jmpq   *%rax
   2:   90                      nop
   3:   90                      nop
   4:   90                      nop

  0000000000000005 <__x86_retpoline_rax>:
   5:   e8 07 00 00 00          callq  11 <__x86_retpoline_rax+0xc>
   a:   f3 90                   pause
   c:   0f ae e8                lfence
   f:   eb f9                   jmp    a <__x86_retpoline_rax+0x5>
  11:   48 89 04 24             mov    %rax,(%rsp)
  15:   c3                      retq
  16:   66 2e 0f 1f 84 00 00 00 00 00   nopw   %cs:0x0(%rax,%rax,1)

The thunk is an alternative_2, where one option is a JMP to the
retpoline. This was done so that objtool didn't need to deal with
alternatives with stack ops. But that problem has been solved, so now
it is possible to fold the entire retpoline into the alternative to
simplify and consolidate unused bytes:

  0000000000000000 <__x86_indirect_thunk_rax>:
   0:   ff e0                   jmpq   *%rax
   2:   90                      nop
   3:   90                      nop
   4:   90                      nop
   5:   90                      nop
   6:   90                      nop
   7:   90                      nop
   8:   90                      nop
   9:   90                      nop
   a:   90                      nop
   b:   90                      nop
   c:   90                      nop
   d:   90                      nop
   e:   90                      nop
   f:   90                      nop
  10:   90                      nop
  11:   66 66 2e 0f 1f 84 00 00 00 00 00        data16 nopw %cs:0x0(%rax,%rax,1)
  1c:   0f 1f 40 00             nopl   0x0(%rax)

Notice that since the longest alternative sequence is now:

   0:   e8 07 00 00 00          callq  c <.altinstr_replacement+0xc>
   5:   f3 90                   pause
   7:   0f ae e8                lfence
   a:   eb f9                   jmp    5 <.altinstr_replacement+0x5>
   c:   48 89 04 24             mov    %rax,(%rsp)
  10:   c3                      retq

17 bytes, we have 15 bytes NOP at the end of our 32 byte slot. (IOW, if
we can shrink the retpoline by 1 byte we can pack it more densely).

 [ bp: Massage commit message. ]

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Signed-off-by: Borislav Petkov <bp@xxxxxxx>
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
Link: https://lkml.kernel.org/r/20210326151259.506071949@xxxxxxxxxxxxx
[bwh: Backported to 5.10:
 - Use X86_FEATRURE_RETPOLINE_LFENCE flag instead of
   X86_FEATURE_RETPOLINE_AMD, since the later renaming of this flag
   has already been applied
 - Adjust context]
Signed-off-by: Ben Hutchings <ben@xxxxxxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
 arch/x86/include/asm/asm-prototypes.h |    7 -------
 arch/x86/include/asm/nospec-branch.h  |    6 +++---
 arch/x86/lib/retpoline.S              |   34 +++++++++++++++++-----------------
 tools/objtool/check.c                 |    3 +--
 4 files changed, 21 insertions(+), 29 deletions(-)

--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -22,15 +22,8 @@ extern void cmpxchg8b_emu(void);
 #define DECL_INDIRECT_THUNK(reg) \
 	extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
 
-#define DECL_RETPOLINE(reg) \
-	extern asmlinkage void __x86_retpoline_ ## reg (void);
-
 #undef GEN
 #define GEN(reg) DECL_INDIRECT_THUNK(reg)
 #include <asm/GEN-for-each-reg.h>
 
-#undef GEN
-#define GEN(reg) DECL_RETPOLINE(reg)
-#include <asm/GEN-for-each-reg.h>
-
 #endif /* CONFIG_RETPOLINE */
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -80,7 +80,7 @@
 .macro JMP_NOSPEC reg:req
 #ifdef CONFIG_RETPOLINE
 	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
-		      __stringify(jmp __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \
+		      __stringify(jmp __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
 		      __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_LFENCE
 #else
 	jmp	*%\reg
@@ -90,7 +90,7 @@
 .macro CALL_NOSPEC reg:req
 #ifdef CONFIG_RETPOLINE
 	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \
-		      __stringify(call __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \
+		      __stringify(call __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
 		      __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_LFENCE
 #else
 	call	*%\reg
@@ -128,7 +128,7 @@
 	ALTERNATIVE_2(						\
 	ANNOTATE_RETPOLINE_SAFE					\
 	"call *%[thunk_target]\n",				\
-	"call __x86_retpoline_%V[thunk_target]\n",		\
+	"call __x86_indirect_thunk_%V[thunk_target]\n",		\
 	X86_FEATURE_RETPOLINE,					\
 	"lfence;\n"						\
 	ANNOTATE_RETPOLINE_SAFE					\
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -10,27 +10,31 @@
 #include <asm/unwind_hints.h>
 #include <asm/frame.h>
 
-.macro THUNK reg
-	.section .text.__x86.indirect_thunk
-
-	.align 32
-SYM_FUNC_START(__x86_indirect_thunk_\reg)
-	JMP_NOSPEC \reg
-SYM_FUNC_END(__x86_indirect_thunk_\reg)
-
-SYM_FUNC_START_NOALIGN(__x86_retpoline_\reg)
+.macro RETPOLINE reg
 	ANNOTATE_INTRA_FUNCTION_CALL
-	call	.Ldo_rop_\@
+	call    .Ldo_rop_\@
 .Lspec_trap_\@:
 	UNWIND_HINT_EMPTY
 	pause
 	lfence
-	jmp	.Lspec_trap_\@
+	jmp .Lspec_trap_\@
 .Ldo_rop_\@:
-	mov	%\reg, (%_ASM_SP)
+	mov     %\reg, (%_ASM_SP)
 	UNWIND_HINT_FUNC
 	ret
-SYM_FUNC_END(__x86_retpoline_\reg)
+.endm
+
+.macro THUNK reg
+	.section .text.__x86.indirect_thunk
+
+	.align 32
+SYM_FUNC_START(__x86_indirect_thunk_\reg)
+
+	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
+		      __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \
+		      __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_LFENCE
+
+SYM_FUNC_END(__x86_indirect_thunk_\reg)
 
 .endm
 
@@ -48,7 +52,6 @@ SYM_FUNC_END(__x86_retpoline_\reg)
 
 #define __EXPORT_THUNK(sym)	_ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
 #define EXPORT_THUNK(reg)	__EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
-#define EXPORT_RETPOLINE(reg)  __EXPORT_THUNK(__x86_retpoline_ ## reg)
 
 #undef GEN
 #define GEN(reg) THUNK reg
@@ -58,6 +61,3 @@ SYM_FUNC_END(__x86_retpoline_\reg)
 #define GEN(reg) EXPORT_THUNK(reg)
 #include <asm/GEN-for-each-reg.h>
 
-#undef GEN
-#define GEN(reg) EXPORT_RETPOLINE(reg)
-#include <asm/GEN-for-each-reg.h>
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -800,8 +800,7 @@ static int add_jump_destinations(struct
 		} else if (reloc->sym->type == STT_SECTION) {
 			dest_sec = reloc->sym->sec;
 			dest_off = arch_dest_reloc_offset(reloc->addend);
-		} else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21) ||
-			   !strncmp(reloc->sym->name, "__x86_retpoline_", 16)) {
+		} else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21)) {
 			/*
 			 * Retpoline jumps are really dynamic jumps in
 			 * disguise, so convert them accordingly.


Patches currently in stable-queue which might be from peterz@xxxxxxxxxxxxx are

queue-5.10/objtool-cache-instruction-relocs.patch
queue-5.10/x86-sev-avoid-using-__x86_return_thunk.patch
queue-5.10/objtool-add-elf_create_undef_symbol.patch
queue-5.10/x86-ftrace-use-alternative-ret-encoding.patch
queue-5.10/objtool-re-add-unwind_hint_-save_restore.patch
queue-5.10/x86-bugs-add-retbleed-ibpb.patch
queue-5.10/x86-bugs-enable-stibp-for-jmp2ret.patch
queue-5.10/x86-retpoline-cleanup-some-ifdefery.patch
queue-5.10/objtool-handle-__sanitize_cov-tail-calls.patch
queue-5.10/x86-prepare-asm-files-for-straight-line-speculation.patch
queue-5.10/kvm-vmx-flatten-__vmx_vcpu_run.patch
queue-5.10/x86-kvm-vmx-make-noinstr-clean.patch
queue-5.10/objtool-x86-replace-alternatives-with-.retpoline_sites.patch
queue-5.10/objtool-skip-magical-retpoline-.altinstr_replacement.patch
queue-5.10/x86-retbleed-add-fine-grained-kconfig-knobs.patch
queue-5.10/x86-cpu-amd-add-spectral-chicken.patch
queue-5.10/objtool-add-straight-line-speculation-validation.patch
queue-5.10/kvm-vmx-fix-ibrs-handling-after-vmexit.patch
queue-5.10/kvm-vmx-prevent-guest-rsb-poisoning-attacks-with-eibrs.patch
queue-5.10/x86-vsyscall_emu-64-don-t-use-ret-in-vsyscall-emulation.patch
queue-5.10/tools-arch-update-arch-x86-lib-mem-cpy-set-_64.s-copies-used-in-perf-bench-mem-memcpy.patch
queue-5.10/x86-add-straight-line-speculation-mitigation.patch
queue-5.10/x86-add-magic-amd-return-thunk.patch
queue-5.10/x86-bugs-keep-a-per-cpu-ia32_spec_ctrl-value.patch
queue-5.10/x86-alternatives-optimize-optimize_nops.patch
queue-5.10/x86-objtool-create-.return_sites.patch
queue-5.10/crypto-x86-poly1305-fixup-sls.patch
queue-5.10/x86-alternative-handle-jcc-__x86_indirect_thunk_-reg.patch
queue-5.10/x86-kvm-fix-setcc-emulation-for-return-thunks.patch
queue-5.10/objtool-fix-objtool-regression-on-x32-systems.patch
queue-5.10/x86-alternative-relax-text_poke_bp-constraint.patch
queue-5.10/x86-retpoline-swizzle-retpoline-thunk.patch
queue-5.10/objtool-rework-the-elf_rebuild_reloc_section-logic.patch
queue-5.10/x86-speculation-fix-firmware-entry-spec_ctrl-handling.patch
queue-5.10/x86-retpoline-remove-unused-replacement-symbols.patch
queue-5.10/objtool-fix-symbol-creation.patch
queue-5.10/x86-speculation-add-spectre_v2-ibrs-option-to-support-kernel-ibrs.patch
queue-5.10/bpf-x86-respect-x86_feature_retpoline.patch
queue-5.10/objtool-fix-type-of-reloc-addend.patch
queue-5.10/objtool-x86-rewrite-retpoline-thunk-calls.patch
queue-5.10/x86-undo-return-thunk-damage.patch
queue-5.10/x86-prepare-inline-asm-for-straight-line-speculation.patch
queue-5.10/x86-alternative-support-alternative_ternary.patch
queue-5.10/kvm-emulate-fix-setcc-emulation-function-offsets-with-sls.patch
queue-5.10/objtool-handle-per-arch-retpoline-naming.patch
queue-5.10/x86-retpoline-create-a-retpoline-thunk-array.patch
queue-5.10/x86-retpoline-simplify-retpolines.patch
queue-5.10/x86-asm-fix-register-order.patch
queue-5.10/x86-speculation-fill-rsb-on-vmexit-for-ibrs.patch
queue-5.10/objtool-add-entry-unret-validation.patch
queue-5.10/objtool-keep-track-of-retpoline-call-sites.patch
queue-5.10/kvm-vmx-convert-launched-argument-to-flags.patch
queue-5.10/objtool-add-elf_create_reloc-helper.patch
queue-5.10/objtool-make-.altinstructions-section-entry-size-consistent.patch
queue-5.10/x86-bpf-use-alternative-ret-encoding.patch
queue-5.10/x86-common-stamp-out-the-stepping-madness.patch
queue-5.10/x86-bugs-split-spectre_v2_select_mitigation-and-spectre_v2_user_select_mitigation.patch
queue-5.10/x86-bugs-report-intel-retbleed-vulnerability.patch
queue-5.10/bpf-x86-simplify-computing-label-offsets.patch
queue-5.10/x86-cpufeatures-move-retpoline-flags-to-word-11.patch
queue-5.10/x86-speculation-fix-spec_ctrl-write-on-smt-state-change.patch
queue-5.10/x86-retpoline-use-mfunction-return.patch
queue-5.10/x86-xen-rename-sys-entry-points.patch
queue-5.10/objtool-only-rewrite-unconditional-retpoline-thunk-calls.patch
queue-5.10/x86-bugs-optimize-spec_ctrl-msr-writes.patch
queue-5.10/x86-alternative-optimize-single-byte-nops-at-an-arbitrary-position.patch
queue-5.10/objtool-fix-code-relocs-vs-weak-symbols.patch
queue-5.10/x86-bugs-report-amd-retbleed-vulnerability.patch
queue-5.10/x86-static_call-use-alternative-ret-encoding.patch
queue-5.10/x86-speculation-fix-rsb-filling-with-config_retpoline-n.patch
queue-5.10/x86-asm-fixup-odd-gen-for-each-reg.h-usage.patch
queue-5.10/x86-alternative-add-debug-prints-to-apply_retpolines.patch
queue-5.10/objtool-extract-elf_symbol_add.patch
queue-5.10/x86-use-return-thunk-in-asm-code.patch
queue-5.10/objtool-remove-reloc-symbol-type-checks-in-get_alt_entry.patch
queue-5.10/objtool-classify-symbols.patch
queue-5.10/intel_idle-disable-ibrs-during-long-idle.patch
queue-5.10/objtool-correctly-handle-retpoline-thunk-calls.patch
queue-5.10/objtool-fix-.symtab_shndx-handling-for-elf_create_undef_symbol.patch
queue-5.10/x86-retpoline-move-the-retpoline-thunk-declarations-to-nospec-branch.h.patch
queue-5.10/objtool-support-asm-jump-tables.patch
queue-5.10/x86-alternative-implement-.retpoline_sites-support.patch
queue-5.10/objtool-x86-ignore-__x86_indirect_alt_-symbols.patch
queue-5.10/objtool-fix-sls-validation-for-kcov-tail-call-replacement.patch
queue-5.10/x86-alternative-try-inline-spectre_v2-retpoline-amd.patch
queue-5.10/x86-entry-remove-skip_r11rcx.patch
queue-5.10/objtool-explicitly-avoid-self-modifying-code-in-.altinstr_replacement.patch
queue-5.10/x86-speculation-use-cached-host-spec_ctrl-value-for-guest-entry-exit.patch
queue-5.10/x86-bugs-add-amd-retbleed-boot-parameter.patch
queue-5.10/objtool-create-reloc-sections-implicitly.patch
queue-5.10/x86-entry-add-kernel-ibrs-implementation.patch
queue-5.10/objtool-treat-.text.__x86.-as-noinstr.patch
queue-5.10/x86-lib-atomic64_386_32-rename-things.patch
queue-5.10/objtool-introduce-cfi-hash.patch
queue-5.10/objtool-default-ignore-int3-for-unreachable.patch
queue-5.10/objtool-extract-elf_strtab_concat.patch
queue-5.10/objtool-teach-get_alt_entry-about-more-relocation-types.patch
queue-5.10/objtool-update-retpoline-validation.patch



[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux