fls(N), ffs(N) and fls64(N) can be optimised on x86/x86_64. Currently they perform checks against N being 0 before invoking the BSR/BSF instruction, or use a CMOV instruction afterwards. Either the check involves a conditional jump which we'd like to avoid, or a CMOV, which we'd also quite like to avoid. Instead, we can make use of the fact that BSR/BSF doesn't modify its output register if its input is 0. By preloading the output with -1 and incrementing the result, we achieve the desired result without the need for a conditional check. The 32-bit version of fls64() can also have all its conditional checks removed by chaining BSRs with a subtraction in the middle. However, this may be suboptimal on CPUs for which BSR/BSF is really slow (i486 and below for example). Signed-off-by: David Howells <dhowells@xxxxxxxxxx> --- arch/x86/include/asm/bitops.h | 79 ++++++++++++++++++++++++++--------------- 1 files changed, 50 insertions(+), 29 deletions(-) diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 02b47a6..a8e85ab 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -394,18 +394,12 @@ static inline unsigned long __fls(unsigned long word) */ static inline int ffs(int x) { - int r; -#ifdef CONFIG_X86_CMOV - asm("bsfl %1,%0\n\t" - "cmovzl %2,%0" - : "=r" (r) : "rm" (x), "r" (-1)); -#else - asm("bsfl %1,%0\n\t" - "jnz 1f\n\t" - "movl $-1,%0\n" - "1:" : "=r" (r) : "rm" (x)); -#endif - return r + 1; + int bitpos = -1; + + asm("bsfl %1,%0" + : "+r" (bitpos) + : "rm" (x)); + return bitpos + 1; } /** @@ -421,19 +415,52 @@ static inline int ffs(int x) */ static inline int fls(int x) { - int r; -#ifdef CONFIG_X86_CMOV - asm("bsrl %1,%0\n\t" - "cmovzl %2,%0" - : "=&r" (r) : "rm" (x), "rm" (-1)); + int bitpos = -1; + + asm("bsrl %1,%0" + : "+r" (bitpos) + : "rm" (x)); + return bitpos + 1; +} + +/** + * fls64 - find last set bit in a 64-bit word + * @x: the word to search + * + * This is defined in a similar way as the libc and compiler builtin + * ffsll, but returns the position of the most significant set bit. + * + * fls64(value) returns 0 if value is 0 or the position of the last + * set bit if value is nonzero. The last (most significant) bit is + * at position 64. + */ +#if BITS_PER_LONG == 32 +static __always_inline int fls64(__u64 x) +{ + __u32 h = x >> 32; + int bitpos = -1; + + asm("bsrl %1,%0 \n" + "subl %2,%0 \n" + "bsrl %3,%0 \n" + : "+r" (bitpos) + : "rm" (x), "i"(32), "rm" (h)); + + return bitpos + 33; +} +#elif BITS_PER_LONG == 64 +static __always_inline int fls64(__u64 x) +{ + long bitpos = -1; + + asm("bsrq %1,%0" + : "+r" (bitpos) + : "rm" (x)); + return bitpos + 1; +} #else - asm("bsrl %1,%0\n\t" - "jnz 1f\n\t" - "movl $-1,%0\n" - "1:" : "=r" (r) : "rm" (x)); +#error BITS_PER_LONG not 32 or 64 #endif - return r + 1; -} #endif /* __KERNEL__ */ #undef ADDR @@ -446,12 +473,6 @@ static inline int fls(int x) #include <asm-generic/bitops/hweight.h> -#endif /* __KERNEL__ */ - -#include <asm-generic/bitops/fls64.h> - -#ifdef __KERNEL__ - #include <asm-generic/bitops/ext2-non-atomic.h> #define ext2_set_bit_atomic(lock, nr, addr) \ -- To unsubscribe from this list: send the line "unsubscribe linux-arch" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html