find_next_bit() and friends use small_const_nbits() to detect possibility of compile-time optimization. It works well for nbits up to BITS_PER_LONG, i.e. for 1-word bitmaps. When nbits belongs to 2nd, 3rd or any other word, small_const_nbits() returns false. But when both offset and size are within the same word boundary, we still can make a small_const optimization because the whole business is just fetching a single word, and masking head and tail bits if needed. This patch adds a new small_const_nbits_off() macro doing that. It replaces small_const_nbits() in find_next_bit() functions. Signed-off-by: Yury Norov <yury.norov@xxxxxxxxx> --- include/asm-generic/bitsperlong.h | 12 +++++++++ include/linux/find.h | 45 ++++++++++++------------------- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/include/asm-generic/bitsperlong.h b/include/asm-generic/bitsperlong.h index 1023e2a4bd37..c294ff798154 100644 --- a/include/asm-generic/bitsperlong.h +++ b/include/asm-generic/bitsperlong.h @@ -35,4 +35,16 @@ #define small_const_nbits(nbits) \ (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG && (nbits) > 0) +/* + * small_const_nbits_off(nbits, off) is true precisely when it is known at + * compile-time that all bits in range [off, nbits) belong to the same word. + * Bitmaps of size 0 are very rare, and a compile-time-known-size 0 is most + * likely a sign of error. They will be handled correctly by the bit/bitmap + * APIs using the out-of-line functions, so that the inline implementations + * can unconditionally dereference the pointer(s). + */ +#define small_const_nbits_off(nbits, off) \ + (__builtin_constant_p(nbits) && __builtin_constant_p(off) && (nbits) > 0 && \ + (nbits) > (off) && (off) / BITS_PER_LONG == ((nbits) - 1) / BITS_PER_LONG) + #endif /* __ASM_GENERIC_BITS_PER_LONG */ diff --git a/include/linux/find.h b/include/linux/find.h index db2f2851601d..df5c4d1adf4c 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -7,6 +7,7 @@ #endif #include <linux/bitops.h> +#include <linux/math.h> unsigned long _find_next_bit(const unsigned long *addr1, unsigned long nbits, unsigned long start); @@ -49,14 +50,11 @@ static __always_inline unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { - if (small_const_nbits(size)) { - unsigned long val; + if (small_const_nbits_off(size, offset)) { + unsigned long val = addr[offset/BITS_PER_LONG] & + GENMASK((size - 1) % BITS_PER_LONG, offset % BITS_PER_LONG); - if (unlikely(offset >= size)) - return size; - - val = *addr & GENMASK(size - 1, offset); - return val ? __ffs(val) : size; + return val ? round_down(offset, BITS_PER_LONG) + __ffs(val) : size; } return _find_next_bit(addr, size, offset); @@ -79,14 +77,11 @@ unsigned long find_next_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, unsigned long offset) { - if (small_const_nbits(size)) { - unsigned long val; + if (small_const_nbits_off(size, offset)) { + unsigned long val = addr1[offset/BITS_PER_LONG] & addr2[offset/BITS_PER_LONG] & + GENMASK((size - 1) % BITS_PER_LONG, offset % BITS_PER_LONG); - if (unlikely(offset >= size)) - return size; - - val = *addr1 & *addr2 & GENMASK(size - 1, offset); - return val ? __ffs(val) : size; + return val ? round_down(offset, BITS_PER_LONG) + __ffs(val) : size; } return _find_next_and_bit(addr1, addr2, size, offset); @@ -110,14 +105,11 @@ unsigned long find_next_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, unsigned long offset) { - if (small_const_nbits(size)) { - unsigned long val; + if (small_const_nbits_off(size, offset)) { + unsigned long val = addr1[offset/BITS_PER_LONG] & ~addr2[offset/BITS_PER_LONG] & + GENMASK((size - 1) % BITS_PER_LONG, offset % BITS_PER_LONG); - if (unlikely(offset >= size)) - return size; - - val = *addr1 & ~*addr2 & GENMASK(size - 1, offset); - return val ? __ffs(val) : size; + return val ? round_down(offset, BITS_PER_LONG) + __ffs(val) : size; } return _find_next_andnot_bit(addr1, addr2, size, offset); @@ -138,14 +130,11 @@ static __always_inline unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { - if (small_const_nbits(size)) { - unsigned long val; + if (small_const_nbits_off(size, offset)) { + unsigned long val = addr[offset/BITS_PER_LONG] | + ~GENMASK((size - 1) % BITS_PER_LONG, offset % BITS_PER_LONG); - if (unlikely(offset >= size)) - return size; - - val = *addr | ~GENMASK(size - 1, offset); - return val == ~0UL ? size : ffz(val); + return val == ~0UL ? size : round_down(offset, BITS_PER_LONG) + ffz(val); } return _find_next_zero_bit(addr, size, offset); -- 2.34.1