Since about GCC 4.4, the compiler has builtins for byte-swapping, which ought to be at least as capable as our hand-written assembler versions of the same. More to the point, if the compiler can actually see what we're doing rather than being given opaque assembler code, it can properly optimise it to use load-and-swap and store-and-swap instructions (i.e. movbe on Atom, lwbrx on PowerPC). Our byteswap macros/functions don't otherwise give us a way to support those instructions (and PowerPC has horridness in <asm/io.h> to cope with that by doing explicit little-endian loads and stores instead). The CONFIG_X86_MOVBE in this isn't strictly necessary yet, as -mmovbe is implied by -march=atom. But when we want to enable it on big-core we'll want a trigger for that too. I've tested this on x86_64 with movbe, where I get a network stack littered with movbe instructions that seems to work for IPv6 and Legacy IP. I've compile-tested on PowerPC, where I see about a 3.8% reduction in text size for ext2, and more like 2.5% for ext3/ext4. Rather than enable the use of the builtins unconditionally, I've allowed the architectures to 'opt in' to using them. If ARCH_USE_BUILTIN_BSWAP is enabled, the GCC builtins will be used in *preference* to the __arch_swabXX macros defined in asm/swab.h. This allows you to provide the assembler versions as a fallback for older and non-GCC compilers. This patch enables the use of the builtins for x86 and PowerPC. diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index a902a5c..b4ea516 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -78,6 +78,9 @@ config ARCH_HAS_ILOG2_U64 bool default y if 64BIT +config ARCH_USE_BUILTIN_BSWAP + def_bool y + config GENERIC_HWEIGHT bool default y diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 46c3bff..238f2ea 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -194,6 +194,9 @@ config ARCH_HAS_CACHE_LINE_SIZE config ARCH_HAS_CPU_AUTOPROBE def_bool y +config ARCH_USE_BUILTIN_BSWAP + def_bool y + config HAVE_SETUP_PER_CPU_AREA def_bool y diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index f3b86d0..969f7a6 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -353,6 +353,10 @@ config X86_BSWAP def_bool y depends on X86_32 && !M386 +config X86_MOVBE + def_bool y + depends on MATOM + config X86_POPAD_OK def_bool y depends on X86_32 && !M386 diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 05afcca..0e71d76 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -64,6 +64,7 @@ else $(call cc-option,-march=core2,$(call cc-option,-mtune=generic)) cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \ $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) + cflags-$(CONFIG_X86_MOVBE) += $(call cc-option,-mmovbe) cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) KBUILD_CFLAGS += $(cflags-y) diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 412bc6c..925299c 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -63,3 +63,13 @@ #define __compiletime_warning(message) __attribute__((warning(message))) #define __compiletime_error(message) __attribute__((error(message))) #endif + +#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP +#if __GNUC_MINOR__ >= 4 +#define __HAVE_BUILTIN_BSWAP32__ +#define __HAVE_BUILTIN_BSWAP64__ +#endif +#if __GNUC_MINOR__ >= 9 || (defined(__powerpc__) && __GNUC_MINOR__ >= 6) +#define __HAVE_BUILTIN_BSWAP16__ +#endif +#endif diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h index e811474..0e011eb 100644 --- a/include/uapi/linux/swab.h +++ b/include/uapi/linux/swab.h @@ -45,7 +45,9 @@ static inline __attribute_const__ __u16 __fswab16(__u16 val) { -#ifdef __arch_swab16 +#ifdef __HAVE_BUILTIN_BSWAP16__ + return __builtin_bswap16(val); +#elif defined (__arch_swab16) return __arch_swab16(val); #else return ___constant_swab16(val); @@ -54,7 +56,9 @@ static inline __attribute_const__ __u16 __fswab16(__u16 val) static inline __attribute_const__ __u32 __fswab32(__u32 val) { -#ifdef __arch_swab32 +#ifdef __HAVE_BUILTIN_BSWAP32__ + return __builtin_bswap32(val); +#elif defined(__arch_swab32) return __arch_swab32(val); #else return ___constant_swab32(val); @@ -63,7 +67,9 @@ static inline __attribute_const__ __u32 __fswab32(__u32 val) static inline __attribute_const__ __u64 __fswab64(__u64 val) { -#ifdef __arch_swab64 +#ifdef __HAVE_BUILTIN_BSWAP64__ + return __builtin_bswap64(val); +#elif defined (__arch_swab64) return __arch_swab64(val); #elif defined(__SWAB_64_THRU_32__) __u32 h = val >> 32; -- David Woodhouse Open Source Technology Centre David.Woodhouse@xxxxxxxxx Intel Corporation
Attachment:
smime.p7s
Description: S/MIME cryptographic signature