Avoid instructions with explicit uses of the stack pointer between instructions that implicitly refer to it. The sequence of POP %reg; ADD $x, %RSP; POP %reg forces emission of synchronization uop to synchronize the value of the stack pointer in the stack engine and the out-of-order core. Using POP with the dummy register instead of ADD $x, %RSP results in a smaller code size and faster code. The patch also fixes the reference to the wrong register in the nearby comment. Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> Cc: Sean Christopherson <seanjc@xxxxxxxxxx> Signed-off-by: Uros Bizjak <ubizjak@xxxxxxxxx> --- arch/x86/kvm/vmx/vmenter.S | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 6de96b943804..afcb237e1c17 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -189,13 +189,16 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) xor %ebx, %ebx .Lclear_regs: + /* "POP" @regs. */ + pop %_ASM_AX + /* * Clear all general purpose registers except RSP and RBX to prevent * speculative use of the guest's values, even those that are reloaded * via the stack. In theory, an L1 cache miss when restoring registers * could lead to speculative execution with the guest's values. * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially - * free. RSP and RAX are exempt as RSP is restored by hardware during + * free. RSP and RBX are exempt as RSP is restored by hardware during * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return * value. */ @@ -216,9 +219,6 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) xor %r15d, %r15d #endif - /* "POP" @regs. */ - add $WORD_SIZE, %_ASM_SP - /* * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before * the first unbalanced RET after vmexit! @@ -234,7 +234,6 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\ X86_FEATURE_RSB_VMEXIT_LITE - pop %_ASM_ARG2 /* @flags */ pop %_ASM_ARG1 /* @vmx */ -- 2.37.1