[RFC PATCH] Use __builtin_bswap32() et al where available.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Since about GCC 4.4, the compiler has builtins for byte-swapping, which
ought to be at least as capable as our hand-written assembler versions
of the same.

More to the point, if the compiler can actually see what we're doing
rather than being given opaque assembler code, it can properly optimise
it to use load-and-swap and store-and-swap instructions (i.e. movbe on
Atom, lwbrx on PowerPC). Our byteswap macros/functions don't otherwise
give us a way to support those instructions (and PowerPC has horridness
in <asm/io.h> to cope with that by doing explicit little-endian loads
and stores instead).

The CONFIG_X86_MOVBE in this isn't strictly necessary yet, as -mmovbe is
implied by -march=atom. But when we want to enable it on big-core we'll
want a trigger for that too.

I've tested this on x86_64 with movbe, where I get a network stack
littered with movbe instructions that seems to work for IPv6 and Legacy
IP. I've compile-tested on PowerPC, where I see about a 3.8% reduction
in text size for ext2, and more like 2.5% for ext3/ext4.

Rather than enable the use of the builtins unconditionally, I've allowed
the architectures to 'opt in' to using them. If ARCH_USE_BUILTIN_BSWAP
is enabled, the GCC builtins will be used in *preference* to the
__arch_swabXX macros defined in asm/swab.h. This allows you to provide
the assembler versions as a fallback for older and non-GCC compilers.

This patch enables the use of the builtins for x86 and PowerPC.

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a902a5c..b4ea516 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -78,6 +78,9 @@ config ARCH_HAS_ILOG2_U64
 	bool
 	default y if 64BIT
 
+config ARCH_USE_BUILTIN_BSWAP
+       def_bool y
+
 config GENERIC_HWEIGHT
 	bool
 	default y
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 46c3bff..238f2ea 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -194,6 +194,9 @@ config ARCH_HAS_CACHE_LINE_SIZE
 config ARCH_HAS_CPU_AUTOPROBE
 	def_bool y
 
+config ARCH_USE_BUILTIN_BSWAP
+	def_bool y
+
 config HAVE_SETUP_PER_CPU_AREA
 	def_bool y
 
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index f3b86d0..969f7a6 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -353,6 +353,10 @@ config X86_BSWAP
 	def_bool y
 	depends on X86_32 && !M386
 
+config X86_MOVBE
+	def_bool y
+	depends on MATOM
+
 config X86_POPAD_OK
 	def_bool y
 	depends on X86_32 && !M386
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 05afcca..0e71d76 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -64,6 +64,7 @@ else
                 $(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
 	cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \
 		$(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
+        cflags-$(CONFIG_X86_MOVBE) += $(call cc-option,-mmovbe)
         cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
         KBUILD_CFLAGS += $(cflags-y)
 
diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 412bc6c..925299c 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -63,3 +63,13 @@
 #define __compiletime_warning(message) __attribute__((warning(message)))
 #define __compiletime_error(message) __attribute__((error(message)))
 #endif
+
+#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
+#if __GNUC_MINOR__ >= 4
+#define __HAVE_BUILTIN_BSWAP32__
+#define __HAVE_BUILTIN_BSWAP64__
+#endif
+#if __GNUC_MINOR__ >= 9 || (defined(__powerpc__) && __GNUC_MINOR__ >= 6)
+#define __HAVE_BUILTIN_BSWAP16__
+#endif
+#endif
diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h
index e811474..0e011eb 100644
--- a/include/uapi/linux/swab.h
+++ b/include/uapi/linux/swab.h
@@ -45,7 +45,9 @@
 
 static inline __attribute_const__ __u16 __fswab16(__u16 val)
 {
-#ifdef __arch_swab16
+#ifdef __HAVE_BUILTIN_BSWAP16__
+	return __builtin_bswap16(val);
+#elif defined (__arch_swab16)
 	return __arch_swab16(val);
 #else
 	return ___constant_swab16(val);
@@ -54,7 +56,9 @@ static inline __attribute_const__ __u16 __fswab16(__u16 val)
 
 static inline __attribute_const__ __u32 __fswab32(__u32 val)
 {
-#ifdef __arch_swab32
+#ifdef __HAVE_BUILTIN_BSWAP32__
+	return __builtin_bswap32(val);
+#elif defined(__arch_swab32)
 	return __arch_swab32(val);
 #else
 	return ___constant_swab32(val);
@@ -63,7 +67,9 @@ static inline __attribute_const__ __u32 __fswab32(__u32 val)
 
 static inline __attribute_const__ __u64 __fswab64(__u64 val)
 {
-#ifdef __arch_swab64
+#ifdef __HAVE_BUILTIN_BSWAP64__
+	return __builtin_bswap64(val);
+#elif defined (__arch_swab64)
 	return __arch_swab64(val);
 #elif defined(__SWAB_64_THRU_32__)
 	__u32 h = val >> 32;



-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@xxxxxxxxx                              Intel Corporation

Attachment: smime.p7s
Description: S/MIME cryptographic signature


[Index of Archives]     [Linux Kernel]     [Kernel Newbies]     [x86 Platform Driver]     [Netdev]     [Linux Wireless]     [Netfilter]     [Bugtraq]     [Linux Filesystems]     [Yosemite Discussion]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]

  Powered by Linux