The following commit has been merged into the x86/asm branch of tip: Commit-ID: 4c9a93800121e90484cd07c8e5bde70e31cdb996 Gitweb: https://git.kernel.org/tip/4c9a93800121e90484cd07c8e5bde70e31cdb996 Author: Alexey Dobriyan <adobriyan@xxxxxxxxx> AuthorDate: Thu, 14 Mar 2024 19:57:15 +03:00 Committer: Ingo Molnar <mingo@xxxxxxxxxx> CommitterDate: Fri, 22 Mar 2024 11:47:34 +01:00 x86/asm/64: Clean up memset16(), memset32(), memset64() assembly constraints in <asm/string_64.h> - Use "+" constraint modifier, simplify inputs and output lists, delete dummy variables with meaningless names, "&" only makes sense in complex assembly creating constraints on intermediate registers. But 1 instruction assemblies don't have inner body so to speak. - Write "rep stos*" on one line: Rep prefix is integral part of x86 instruction. I'm not sure why people separate "rep" with newline. Uros Bizjak adds context: "some archaic assemblers rejected 'rep insn' on one line. I have checked that the minimum required binutils-2.25 assembles this without problems." - Use __auto_type for maximum copy pasta experience, - Reformat a bit to make everything looks nicer. Note that "memory" clobber is too much if "n" is known at compile time. However, "=m" (*(T(*)[n])s) doesn't work because -Wvla even if "n" is compile time constant: if (BCP(n)) { rep stos : "=m" (*(T(*)[n])s) } else { rep stosw : "memory" } The above doesn't work. Signed-off-by: Alexey Dobriyan <adobriyan@xxxxxxxxx> Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx> Reviewed-by: Uros Bizjak <ubizjak@xxxxxxxxx> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Cc: Andy Lutomirski <luto@xxxxxxxxxx> Cc: "H. Peter Anvin" <hpa@xxxxxxxxx> Link: https://lore.kernel.org/r/20240314165715.31831-1-adobriyan@xxxxxxxxx --- arch/x86/include/asm/string_64.h | 45 ++++++++++++++++--------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 857d364..9d0b324 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -30,37 +30,40 @@ void *__memset(void *s, int c, size_t n); #define __HAVE_ARCH_MEMSET16 static inline void *memset16(uint16_t *s, uint16_t v, size_t n) { - long d0, d1; - asm volatile("rep\n\t" - "stosw" - : "=&c" (d0), "=&D" (d1) - : "a" (v), "1" (s), "0" (n) - : "memory"); - return s; + const __auto_type s0 = s; + asm volatile ( + "rep stosw" + : "+D" (s), "+c" (n) + : "a" (v) + : "memory" + ); + return s0; } #define __HAVE_ARCH_MEMSET32 static inline void *memset32(uint32_t *s, uint32_t v, size_t n) { - long d0, d1; - asm volatile("rep\n\t" - "stosl" - : "=&c" (d0), "=&D" (d1) - : "a" (v), "1" (s), "0" (n) - : "memory"); - return s; + const __auto_type s0 = s; + asm volatile ( + "rep stosl" + : "+D" (s), "+c" (n) + : "a" (v) + : "memory" + ); + return s0; } #define __HAVE_ARCH_MEMSET64 static inline void *memset64(uint64_t *s, uint64_t v, size_t n) { - long d0, d1; - asm volatile("rep\n\t" - "stosq" - : "=&c" (d0), "=&D" (d1) - : "a" (v), "1" (s), "0" (n) - : "memory"); - return s; + const __auto_type s0 = s; + asm volatile ( + "rep stosq" + : "+D" (s), "+c" (n) + : "a" (v) + : "memory" + ); + return s0; } #endif