Re: [PATCH v2 12/12] x86/kvm/emulate: Avoid RET for fastops

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Nov 11, 2024 at 12:59:47PM +0100, Peter Zijlstra wrote:

> +/*
> + * All the FASTOP magic above relies on there being *one* instance of this
> + * so it can JMP back, avoiding RET and it's various thunks.
> + */
> +static noinline int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop)
>  {
>  	ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
>  
>  	if (!(ctxt->d & ByteOp))
>  		fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
>  
> -	asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
> +	asm("push %[flags]; popf \n\t"
> +	    UNWIND_HINT(UNWIND_HINT_TYPE_SAVE, 0, 0, 0)
> +	    ASM_ANNOTATE(ANNOTYPE_JUMP_TABLE)
> +	    JMP_NOSPEC
> +	    "fastop_return: \n\t"
> +	    UNWIND_HINT(UNWIND_HINT_TYPE_RESTORE, 0, 0, 0)
> +	    "pushf; pop %[flags]\n"
>  	    : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
>  	      [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT
>  	    : "c"(ctxt->src2.val));

Do Andrew is telling me the compiler is free to mess this up... Notably:

  https://github.com/llvm/llvm-project/issues/92161

In lieu of that, I wrote the below hack. It makes objtool sad (it don't
like STT_FUNC calling STT_NOTYPE), but it should work if we ever run
into the compiler being daft like that (it should fail to compile
because of the duplicate fastop_return label, so it's not silent
failure).

Wear protective eye gear before continuing...

---
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -429,9 +429,9 @@ static inline void call_depth_return_thu
 
 #ifdef CONFIG_X86_64
 
-#define __CS_PREFIX						\
+#define __CS_PREFIX(reg)					\
 	".irp rs,r8,r9,r10,r11,r12,r13,r14,r15\n"		\
-	".ifc %V[thunk_target],\\rs\n"				\
+	".ifc " reg ",\\rs\n"					\
 	".byte 0x2e\n"						\
 	".endif\n"						\
 	".endr\n"
@@ -441,12 +441,12 @@ static inline void call_depth_return_thu
  * which is ensured when CONFIG_MITIGATION_RETPOLINE is defined.
  */
 # define CALL_NOSPEC						\
-	__CS_PREFIX						\
+	__CS_PREFIX("%V[thunk_target]")				\
 	"call __x86_indirect_thunk_%V[thunk_target]\n"
 
-# define JMP_NOSPEC						\
-	__CS_PREFIX						\
-	"jmp __x86_indirect_thunk_%V[thunk_target]\n"
+# define __JMP_NOSPEC(reg)					\
+	__CS_PREFIX(reg)					\
+	"jmp __x86_indirect_thunk_" reg "\n"
 
 # define THUNK_TARGET(addr) [thunk_target] "r" (addr)
 
@@ -478,10 +478,10 @@ static inline void call_depth_return_thu
 	"call *%[thunk_target]\n",				\
 	X86_FEATURE_RETPOLINE_LFENCE)
 
-# define JMP_NOSPEC						\
+# define __JMP_NOSPEC(reg)					\
 	ALTERNATIVE_2(						\
 	ANNOTATE_RETPOLINE_SAFE					\
-	"jmp *%[thunk_target]\n",				\
+	"jmp *%%" reg "\n",					\
 	"       jmp    901f;\n"					\
 	"       .align 16\n"					\
 	"901:	call   903f;\n"					\
@@ -490,22 +490,25 @@ static inline void call_depth_return_thu
 	"       jmp    902b;\n"					\
 	"       .align 16\n"					\
 	"903:	lea    4(%%esp), %%esp;\n"			\
-	"       pushl  %[thunk_target];\n"			\
+	"       pushl  %%" reg "\n"				\
 	"       ret;\n",					\
 	X86_FEATURE_RETPOLINE,					\
 	"lfence;\n"						\
 	ANNOTATE_RETPOLINE_SAFE					\
-	"jmp *%[thunk_target]\n",				\
+	"jmp *%%" reg "\n",					\
 	X86_FEATURE_RETPOLINE_LFENCE)
 
 # define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
 #endif
+
 #else /* No retpoline for C / inline asm */
 # define CALL_NOSPEC "call *%[thunk_target]\n"
-# define JMP_NOSPEC "jmp *%[thunk_target]\n"
+# define __JMP_NOSPEC(reg) "jmp *%%" reg "\n"
 # define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
 #endif
 
+# define JMP_NOSPEC __JMP_NOSPEC("%V[thunk_target]")
+
 /* The Spectre V2 mitigation variants */
 enum spectre_v2_mitigation {
 	SPECTRE_V2_NONE,
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -5039,23 +5039,45 @@ static void fetch_possible_mmx_operand(s
 }
 
 /*
+ * Stub written in asm in order to ensure GCC doesn't duplicate the
+ * fastop_return: label.
+ *
+ * Custom calling convention.
+ *
+ * __fastop:
+ * ax = ctxt->dst.val
+ * dx = ctxt->src.val
+ * cx = ctxt->src.val2
+ * di = flags
+ * si = fop
+ */
+asm (ASM_FUNC_ALIGN
+     "__fastop: \n\t"
+     "push %" _ASM_DI "\n\t"
+     "popf \n\t"
+     UNWIND_HINT(UNWIND_HINT_TYPE_SAVE, 0, 0, 0)
+     ASM_ANNOTATE(ANNOTYPE_JUMP_TABLE)
+     __JMP_NOSPEC(_ASM_SI)
+     "fastop_return: \n\t"
+     UNWIND_HINT(UNWIND_HINT_TYPE_RESTORE, 0, 0, 0)
+     "pushf \n\t"
+     "pop %" _ASM_DI "\n\t"
+     ASM_RET
+     ".type __fastop, @notype \n\t"
+     ".size __fastop, . - __fastop \n\t");
+
+/*
  * All the FASTOP magic above relies on there being *one* instance of this
  * so it can JMP back, avoiding RET and it's various thunks.
  */
-static noinline int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop)
+static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop)
 {
 	ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
 
 	if (!(ctxt->d & ByteOp))
 		fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
 
-	asm("push %[flags]; popf \n\t"
-	    UNWIND_HINT(UNWIND_HINT_TYPE_SAVE, 0, 0, 0)
-	    ASM_ANNOTATE(ANNOTYPE_JUMP_TABLE)
-	    JMP_NOSPEC
-	    "fastop_return: \n\t"
-	    UNWIND_HINT(UNWIND_HINT_TYPE_RESTORE, 0, 0, 0)
-	    "pushf; pop %[flags]\n"
+	asm("call __fastop"
 	    : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
 	      [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT
 	    : "c"(ctxt->src2.val));




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux