From: Dave Jiang > Sent: 24 September 2020 00:11 > > The MOVDIR64B instruction can be used by other wrapper instructions. Move > the asm code to special_insns.h and have iosubmit_cmds512() call the > asm function. > > Signed-off-by: Dave Jiang <dave.jiang@xxxxxxxxx> > Reviewed-by: Tony Luck <tony.luck@xxxxxxxxx> > --- > arch/x86/include/asm/io.h | 17 +++-------------- > arch/x86/include/asm/special_insns.h | 19 +++++++++++++++++++ > 2 files changed, 22 insertions(+), 14 deletions(-) > > diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h > index e1aa17a468a8..d726459d08e5 100644 > --- a/arch/x86/include/asm/io.h > +++ b/arch/x86/include/asm/io.h ... > diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h > index 59a3e13204c3..2a5abd27bb86 100644 > --- a/arch/x86/include/asm/special_insns.h > +++ b/arch/x86/include/asm/special_insns.h > @@ -234,6 +234,25 @@ static inline void clwb(volatile void *__p) > > #define nop() asm volatile ("nop") > > +/* The dst parameter must be 64-bytes aligned */ > +static inline void movdir64b(void *dst, const void *src) > +{ > + /* > + * Note that this isn't an "on-stack copy", just definition of "dst" > + * as a pointer to 64-bytes of stuff that is going to be overwritten. > + * In the MOVDIR64B case that may be needed as you can use the > + * MOVDIR64B instruction to copy arbitrary memory around. This trick > + * lets the compiler know how much gets clobbered. > + */ > + volatile struct { char _[64]; } *__dst = dst; > + > + /* MOVDIR64B [rdx], rax */ > + asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02" > + : > + : "m" (*(struct { char _[64];} **)src), "a" (__dst) > + : "memory"); > +} > + > #endif /* __KERNEL__ */ You've lost the "d" (src). You don't need the 'memory' clobber, just: static inline void movdir64b(void *dst, const void *src) { /* * 64 bytes from dst are marked as modified for completeness. * Since the writes bypass the cache later reads may return * old data anyway. */ /* MOVDIR64B [rdx], rax */ asm volatile (".byte 0x66, 0x0f, 0x38, 0xf8, 0x02" : "=m" ((struct { char _[64];} *)dst), : "m" ((struct { char _[64];} *)src), "d" (src), "a" (dst)); } I've checked that the "m" constraint on src does force (at least one version of) gcc to actually write to the supplied buffer. David - Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)