On Mon, Feb 20, 2012 at 6:20 PM, tip-bot for David Howells <dhowells@xxxxxxxxxx> wrote: > Commit-ID: d66acc39c7cee323733c8503b9de1821a56dff7e > Gitweb: http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e > Author: David Howells <dhowells@xxxxxxxxxx> > AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000 > Committer: H. Peter Anvin <hpa@xxxxxxxxx> > CommitDate: Mon, 20 Feb 2012 14:47:02 -0800 > > bitops: Optimise get_order() This is causing build failures on non-x86 in linux next according to git bisect. Here is one example: http://kisskb.ellerman.id.au/kisskb/buildresult/5746632/ Paul. -- > > Optimise get_order() to use bit scanning instructions if such exist rather than > a loop. Also, make it possible to use get_order() in static initialisations > too by building it on top of ilog2() in the constant parameter case. > > This has been tested for i386 and x86_64 using the following userspace program, > and for FRV by making appropriate substitutions for fls() and fls64(). It will > abort if the case for get_order() deviates from the original except for the > order of 0, for which get_order() produces an undefined result. This program > tests both dynamic and static parameters. > > #include <stdlib.h> > #include <stdio.h> > > #ifdef __x86_64__ > #define BITS_PER_LONG 64 > #else > #define BITS_PER_LONG 32 > #endif > > #define PAGE_SHIFT 12 > > typedef unsigned long long __u64, u64; > typedef unsigned int __u32, u32; > #define noinline __attribute__((noinline)) > > static inline int fls(int x) > { > int bitpos = -1; > > asm("bsrl %1,%0" > : "+r" (bitpos) > : "rm" (x)); > return bitpos + 1; > } > > static __always_inline int fls64(__u64 x) > { > #if BITS_PER_LONG == 64 > long bitpos = -1; > > asm("bsrq %1,%0" > : "+r" (bitpos) > : "rm" (x)); > return bitpos + 1; > #else > __u32 h = x >> 32, l = x; > int bitpos = -1; > > asm("bsrl %1,%0 \n" > "subl %2,%0 \n" > "bsrl %3,%0 \n" > : "+r" (bitpos) > : "rm" (l), "i"(32), "rm" (h)); > > return bitpos + 33; > #endif > } > > static inline __attribute__((const)) > int __ilog2_u32(u32 n) > { > return fls(n) - 1; > } > > static inline __attribute__((const)) > int __ilog2_u64(u64 n) > { > return fls64(n) - 1; > } > > extern __attribute__((const, noreturn)) > int ____ilog2_NaN(void); > > #define ilog2(n) \ > ( \ > __builtin_constant_p(n) ? ( \ > (n) < 1 ? ____ilog2_NaN() : \ > (n) & (1ULL << 63) ? 63 : \ > (n) & (1ULL << 62) ? 62 : \ > (n) & (1ULL << 61) ? 61 : \ > (n) & (1ULL << 60) ? 60 : \ > (n) & (1ULL << 59) ? 59 : \ > (n) & (1ULL << 58) ? 58 : \ > (n) & (1ULL << 57) ? 57 : \ > (n) & (1ULL << 56) ? 56 : \ > (n) & (1ULL << 55) ? 55 : \ > (n) & (1ULL << 54) ? 54 : \ > (n) & (1ULL << 53) ? 53 : \ > (n) & (1ULL << 52) ? 52 : \ > (n) & (1ULL << 51) ? 51 : \ > (n) & (1ULL << 50) ? 50 : \ > (n) & (1ULL << 49) ? 49 : \ > (n) & (1ULL << 48) ? 48 : \ > (n) & (1ULL << 47) ? 47 : \ > (n) & (1ULL << 46) ? 46 : \ > (n) & (1ULL << 45) ? 45 : \ > (n) & (1ULL << 44) ? 44 : \ > (n) & (1ULL << 43) ? 43 : \ > (n) & (1ULL << 42) ? 42 : \ > (n) & (1ULL << 41) ? 41 : \ > (n) & (1ULL << 40) ? 40 : \ > (n) & (1ULL << 39) ? 39 : \ > (n) & (1ULL << 38) ? 38 : \ > (n) & (1ULL << 37) ? 37 : \ > (n) & (1ULL << 36) ? 36 : \ > (n) & (1ULL << 35) ? 35 : \ > (n) & (1ULL << 34) ? 34 : \ > (n) & (1ULL << 33) ? 33 : \ > (n) & (1ULL << 32) ? 32 : \ > (n) & (1ULL << 31) ? 31 : \ > (n) & (1ULL << 30) ? 30 : \ > (n) & (1ULL << 29) ? 29 : \ > (n) & (1ULL << 28) ? 28 : \ > (n) & (1ULL << 27) ? 27 : \ > (n) & (1ULL << 26) ? 26 : \ > (n) & (1ULL << 25) ? 25 : \ > (n) & (1ULL << 24) ? 24 : \ > (n) & (1ULL << 23) ? 23 : \ > (n) & (1ULL << 22) ? 22 : \ > (n) & (1ULL << 21) ? 21 : \ > (n) & (1ULL << 20) ? 20 : \ > (n) & (1ULL << 19) ? 19 : \ > (n) & (1ULL << 18) ? 18 : \ > (n) & (1ULL << 17) ? 17 : \ > (n) & (1ULL << 16) ? 16 : \ > (n) & (1ULL << 15) ? 15 : \ > (n) & (1ULL << 14) ? 14 : \ > (n) & (1ULL << 13) ? 13 : \ > (n) & (1ULL << 12) ? 12 : \ > (n) & (1ULL << 11) ? 11 : \ > (n) & (1ULL << 10) ? 10 : \ > (n) & (1ULL << 9) ? 9 : \ > (n) & (1ULL << 8) ? 8 : \ > (n) & (1ULL << 7) ? 7 : \ > (n) & (1ULL << 6) ? 6 : \ > (n) & (1ULL << 5) ? 5 : \ > (n) & (1ULL << 4) ? 4 : \ > (n) & (1ULL << 3) ? 3 : \ > (n) & (1ULL << 2) ? 2 : \ > (n) & (1ULL << 1) ? 1 : \ > (n) & (1ULL << 0) ? 0 : \ > ____ilog2_NaN() \ > ) : \ > (sizeof(n) <= 4) ? \ > __ilog2_u32(n) : \ > __ilog2_u64(n) \ > ) > > static noinline __attribute__((const)) > int old_get_order(unsigned long size) > { > int order; > > size = (size - 1) >> (PAGE_SHIFT - 1); > order = -1; > do { > size >>= 1; > order++; > } while (size); > return order; > } > > static noinline __attribute__((const)) > int __get_order(unsigned long size) > { > int order; > size--; > size >>= PAGE_SHIFT; > #if BITS_PER_LONG == 32 > order = fls(size); > #else > order = fls64(size); > #endif > return order; > } > > #define get_order(n) \ > ( \ > __builtin_constant_p(n) ? ( \ > (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \ > ((n < (1UL << PAGE_SHIFT)) ? 0 : \ > ilog2((n) - 1) - PAGE_SHIFT + 1) \ > ) : \ > __get_order(n) \ > ) > > #define order(N) \ > { (1UL << N) - 1, get_order((1UL << N) - 1) }, \ > { (1UL << N), get_order((1UL << N)) }, \ > { (1UL << N) + 1, get_order((1UL << N) + 1) } > > struct order { > unsigned long n, order; > }; > > static const struct order order_table[] = { > order(0), > order(1), > order(2), > order(3), > order(4), > order(5), > order(6), > order(7), > order(8), > order(9), > order(10), > order(11), > order(12), > order(13), > order(14), > order(15), > order(16), > order(17), > order(18), > order(19), > order(20), > order(21), > order(22), > order(23), > order(24), > order(25), > order(26), > order(27), > order(28), > order(29), > order(30), > order(31), > #if BITS_PER_LONG == 64 > order(32), > order(33), > order(34), > order(35), > #endif > { 0x2929 } > }; > > void check(int loop, unsigned long n) > { > unsigned long old, new; > > printf("[%2d]: %09lx | ", loop, n); > > old = old_get_order(n); > new = get_order(n); > > printf("%3ld, %3ld\n", old, new); > if (n != 0 && old != new) > abort(); > } > > int main(int argc, char **argv) > { > const struct order *p; > unsigned long n; > int loop; > > for (loop = 0; loop <= BITS_PER_LONG - 1; loop++) { > n = 1UL << loop; > check(loop, n - 1); > check(loop, n); > check(loop, n + 1); > } > > for (p = order_table; p->n != 0x2929; p++) { > unsigned long old, new; > > old = old_get_order(p->n); > new = p->order; > printf("%09lx\t%3ld, %3ld\n", p->n, old, new); > if (p->n != 0 && old != new) > abort(); > } > > return 0; > } > > Disassembling the x86_64 version of the above code shows: > > 0000000000400510 <old_get_order>: > 400510: 48 83 ef 01 sub $0x1,%rdi > 400514: b8 ff ff ff ff mov $0xffffffff,%eax > 400519: 48 c1 ef 0b shr $0xb,%rdi > 40051d: 0f 1f 00 nopl (%rax) > 400520: 83 c0 01 add $0x1,%eax > 400523: 48 d1 ef shr %rdi > 400526: 75 f8 jne 400520 <old_get_order+0x10> > 400528: f3 c3 repz retq > 40052a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1) > > 0000000000400530 <__get_order>: > 400530: 48 83 ef 01 sub $0x1,%rdi > 400534: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax > 40053b: 48 c1 ef 0c shr $0xc,%rdi > 40053f: 48 0f bd c7 bsr %rdi,%rax > 400543: 83 c0 01 add $0x1,%eax > 400546: c3 retq > 400547: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1) > 40054e: 00 00 > > As can be seen, the new __get_order() function is simpler than the > old_get_order() function. > > Signed-off-by: David Howells <dhowells@xxxxxxxxxx> > Link: http://lkml.kernel.org/r/20120220223928.16199.29548.stgit@xxxxxxxxxxxxxxxxxxxxxx > Acked-by: Arnd Bergmann <arnd@xxxxxxxx> > Signed-off-by: H. Peter Anvin <hpa@xxxxxxxxx> > --- > include/asm-generic/getorder.h | 40 ++++++++++++++++++++++++++++------------ > 1 files changed, 28 insertions(+), 12 deletions(-) > > diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h > index 76e9687..e0fb4bf 100644 > --- a/include/asm-generic/getorder.h > +++ b/include/asm-generic/getorder.h > @@ -4,6 +4,25 @@ > #ifndef __ASSEMBLY__ > > #include <linux/compiler.h> > +#include <linux/log2.h> > + > +/* > + * Runtime evaluation of get_order() > + */ > +static inline __attribute_const__ > +int __get_order(unsigned long size) > +{ > + int order; > + > + size--; > + size >>= PAGE_SHIFT; > +#if BITS_PER_LONG == 32 > + order = fls(size); > +#else > + order = fls64(size); > +#endif > + return order; > +} > > /** > * get_order - Determine the allocation order of a memory size > @@ -27,18 +46,15 @@ > * This function may be used to initialise variables with compile time > * evaluations of constants. > */ > -static inline __attribute_const__ int get_order(unsigned long size) > -{ > - int order; > - > - size = (size - 1) >> (PAGE_SHIFT - 1); > - order = -1; > - do { > - size >>= 1; > - order++; > - } while (size); > - return order; > -} > +#define get_order(n) \ > +( \ > + __builtin_constant_p(n) ? ( \ > + (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \ > + ((n < (1UL << PAGE_SHIFT)) ? 0 : \ > + ilog2((n) - 1) - PAGE_SHIFT + 1) \ > + ) : \ > + __get_order(n) \ > +) > > #endif /* __ASSEMBLY__ */ > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ ÿôèº{.nÇ+?·?®??+%?Ëÿ±éݶ¥?wÿº{.nÇ+?·¥?{±þw±·ø§¶?¡Ü¨}©?²Æ zÚ&j:+v?¨þø¯ù®w¥þ?à2?Þ?¨èÚ&¢)ß¡«a¶Úÿÿûàz¿äz¹Þ?ú+?ù???Ý¢jÿ?wèþf