mfence appears to be way slower than a locked instruction - let's use lock+add unconditionally, same as we always did on old 32-bit. Signed-off-by: Michael S. Tsirkin <mst@xxxxxxxxxx> --- arch/x86/include/asm/barrier.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index a584e1c..7f99726 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -15,11 +15,12 @@ * Some non-Intel clones support out of order store. wmb() ceases to be a * nop for these. */ -#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) + +#define mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") #define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) #define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) #else -#define mb() asm volatile("mfence":::"memory") +#define mb() asm volatile("lock; addl $0,0(%%rsp)" ::: "memory") #define rmb() asm volatile("lfence":::"memory") #define wmb() asm volatile("sfence" ::: "memory") #endif -- MST _______________________________________________ Virtualization mailing list Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/virtualization