From: Mirsad Todorovac <mirsad.todorovac@xxxxxxxxxxxx> Some users of lib/find functions can operate on bitmaps that change while we are looking at the bitmap. For example xarray code looks at tag bitmaps only with RCU protection. The xarray users are prepared to handle a situation were tag modification races with reading of a tag bitmap (and thus we get back a stale information) but the find_*bit() functions should return based on bitmap contents at *some* point in time. As UBSAN properly warns, the code like: val = *addr; if (val) __ffs(val) prone to refetching 'val' contents from addr by the compiler and thus passing 0 to __ffs() which has undefined results. Fix the problem by using READ_ONCE() in all the appropriate places so that the compiler cannot accidentally refetch the contents of addr between the test and __ffs(). This makes the undefined behavior impossible. The generated code is somewhat larger due to gcc tracking both the index and target fetch address in separate registers (according to GCC folks the volatile cast confuses their optimizer a bit, they are looking into a fix). The difference in performance is minimal though. Targetted microbenchmark (in userspace) of the bit searching loop shows about 2% regression on some x86 microarchitectures so for normal kernel workloads this should be in the noise and not worth introducing special set of bitmap searching functions. [JK: Wrote commit message] CC: Yury Norov <yury.norov@xxxxxxxxx> Link: https://lore.kernel.org/all/20230918044739.29782-1-mirsad.todorovac@xxxxxxxxxxxx/ Signed-off-by: Mirsad Todorovac <mirsad.todorovac@xxxxxxxxxxxx> Signed-off-by: Jan Kara <jack@xxxxxxx> --- include/linux/find.h | 40 ++++++++++++++++++++++++---------------- lib/find_bit.c | 39 +++++++++++++++++++++++---------------- 2 files changed, 47 insertions(+), 32 deletions(-) diff --git a/include/linux/find.h b/include/linux/find.h index 5e4f39ef2e72..5ef6466dc7cc 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -60,7 +60,7 @@ unsigned long find_next_bit(const unsigned long *addr, unsigned long size, if (unlikely(offset >= size)) return size; - val = *addr & GENMASK(size - 1, offset); + val = READ_ONCE(*addr) & GENMASK(size - 1, offset); return val ? __ffs(val) : size; } @@ -90,7 +90,8 @@ unsigned long find_next_and_bit(const unsigned long *addr1, if (unlikely(offset >= size)) return size; - val = *addr1 & *addr2 & GENMASK(size - 1, offset); + val = READ_ONCE(*addr1) & READ_ONCE(*addr2) & + GENMASK(size - 1, offset); return val ? __ffs(val) : size; } @@ -121,7 +122,8 @@ unsigned long find_next_andnot_bit(const unsigned long *addr1, if (unlikely(offset >= size)) return size; - val = *addr1 & ~*addr2 & GENMASK(size - 1, offset); + val = READ_ONCE(*addr1) & ~READ_ONCE(*addr2) & + GENMASK(size - 1, offset); return val ? __ffs(val) : size; } @@ -151,7 +153,8 @@ unsigned long find_next_or_bit(const unsigned long *addr1, if (unlikely(offset >= size)) return size; - val = (*addr1 | *addr2) & GENMASK(size - 1, offset); + val = (READ_ONCE(*addr1) | READ_ONCE(*addr2)) & + GENMASK(size - 1, offset); return val ? __ffs(val) : size; } @@ -179,7 +182,7 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, if (unlikely(offset >= size)) return size; - val = *addr | ~GENMASK(size - 1, offset); + val = READ_ONCE(*addr) | ~GENMASK(size - 1, offset); return val == ~0UL ? size : ffz(val); } @@ -200,7 +203,7 @@ static inline unsigned long find_first_bit(const unsigned long *addr, unsigned long size) { if (small_const_nbits(size)) { - unsigned long val = *addr & GENMASK(size - 1, 0); + unsigned long val = READ_ONCE(*addr) & GENMASK(size - 1, 0); return val ? __ffs(val) : size; } @@ -229,7 +232,7 @@ unsigned long find_nth_bit(const unsigned long *addr, unsigned long size, unsign return size; if (small_const_nbits(size)) { - unsigned long val = *addr & GENMASK(size - 1, 0); + unsigned long val = READ_ONCE(*addr) & GENMASK(size - 1, 0); return val ? fns(val, n) : size; } @@ -255,7 +258,8 @@ unsigned long find_nth_and_bit(const unsigned long *addr1, const unsigned long * return size; if (small_const_nbits(size)) { - unsigned long val = *addr1 & *addr2 & GENMASK(size - 1, 0); + unsigned long val = READ_ONCE(*addr1) & READ_ONCE(*addr2) + & GENMASK(size - 1, 0); return val ? fns(val, n) : size; } @@ -282,7 +286,8 @@ unsigned long find_nth_andnot_bit(const unsigned long *addr1, const unsigned lon return size; if (small_const_nbits(size)) { - unsigned long val = *addr1 & (~*addr2) & GENMASK(size - 1, 0); + unsigned long val = READ_ONCE(*addr1) & ~READ_ONCE(*addr2) & + GENMASK(size - 1, 0); return val ? fns(val, n) : size; } @@ -312,7 +317,8 @@ unsigned long find_nth_and_andnot_bit(const unsigned long *addr1, return size; if (small_const_nbits(size)) { - unsigned long val = *addr1 & *addr2 & (~*addr3) & GENMASK(size - 1, 0); + unsigned long val = READ_ONCE(*addr1) & READ_ONCE(*addr2) & + ~READ_ONCE(*addr3) & GENMASK(size - 1, 0); return val ? fns(val, n) : size; } @@ -336,7 +342,8 @@ unsigned long find_first_and_bit(const unsigned long *addr1, unsigned long size) { if (small_const_nbits(size)) { - unsigned long val = *addr1 & *addr2 & GENMASK(size - 1, 0); + unsigned long val = READ_ONCE(*addr1) & READ_ONCE(*addr2) & + GENMASK(size - 1, 0); return val ? __ffs(val) : size; } @@ -358,7 +365,7 @@ static inline unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) { if (small_const_nbits(size)) { - unsigned long val = *addr | ~GENMASK(size - 1, 0); + unsigned long val = READ_ONCE(*addr) | ~GENMASK(size - 1, 0); return val == ~0UL ? size : ffz(val); } @@ -379,7 +386,7 @@ static inline unsigned long find_last_bit(const unsigned long *addr, unsigned long size) { if (small_const_nbits(size)) { - unsigned long val = *addr & GENMASK(size - 1, 0); + unsigned long val = READ_ONCE(*addr) & GENMASK(size - 1, 0); return val ? __fls(val) : size; } @@ -505,7 +512,7 @@ unsigned long find_next_zero_bit_le(const void *addr, unsigned long size, unsigned long offset) { if (small_const_nbits(size)) { - unsigned long val = *(const unsigned long *)addr; + unsigned long val = READ_ONCE(*(const unsigned long *)addr); if (unlikely(offset >= size)) return size; @@ -523,7 +530,8 @@ static inline unsigned long find_first_zero_bit_le(const void *addr, unsigned long size) { if (small_const_nbits(size)) { - unsigned long val = swab(*(const unsigned long *)addr) | ~GENMASK(size - 1, 0); + unsigned long val = swab(READ_ONCE(*(const unsigned long *)addr)) + | ~GENMASK(size - 1, 0); return val == ~0UL ? size : ffz(val); } @@ -538,7 +546,7 @@ unsigned long find_next_bit_le(const void *addr, unsigned long size, unsigned long offset) { if (small_const_nbits(size)) { - unsigned long val = *(const unsigned long *)addr; + unsigned long val = READ_ONCE(*(const unsigned long *)addr); if (unlikely(offset >= size)) return size; diff --git a/lib/find_bit.c b/lib/find_bit.c index 32f99e9a670e..6867ef8a85e0 100644 --- a/lib/find_bit.c +++ b/lib/find_bit.c @@ -98,7 +98,7 @@ out: \ */ unsigned long _find_first_bit(const unsigned long *addr, unsigned long size) { - return FIND_FIRST_BIT(addr[idx], /* nop */, size); + return FIND_FIRST_BIT(READ_ONCE(addr[idx]), /* nop */, size); } EXPORT_SYMBOL(_find_first_bit); #endif @@ -111,7 +111,8 @@ unsigned long _find_first_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size) { - return FIND_FIRST_BIT(addr1[idx] & addr2[idx], /* nop */, size); + return FIND_FIRST_BIT(READ_ONCE(addr1[idx]) & READ_ONCE(addr2[idx]), + /* nop */, size); } EXPORT_SYMBOL(_find_first_and_bit); #endif @@ -122,7 +123,7 @@ EXPORT_SYMBOL(_find_first_and_bit); */ unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size) { - return FIND_FIRST_BIT(~addr[idx], /* nop */, size); + return FIND_FIRST_BIT(~READ_ONCE(addr[idx]), /* nop */, size); } EXPORT_SYMBOL(_find_first_zero_bit); #endif @@ -130,28 +131,30 @@ EXPORT_SYMBOL(_find_first_zero_bit); #ifndef find_next_bit unsigned long _find_next_bit(const unsigned long *addr, unsigned long nbits, unsigned long start) { - return FIND_NEXT_BIT(addr[idx], /* nop */, nbits, start); + return FIND_NEXT_BIT(READ_ONCE(addr[idx]), /* nop */, nbits, start); } EXPORT_SYMBOL(_find_next_bit); #endif unsigned long __find_nth_bit(const unsigned long *addr, unsigned long size, unsigned long n) { - return FIND_NTH_BIT(addr[idx], size, n); + return FIND_NTH_BIT(READ_ONCE(addr[idx]), size, n); } EXPORT_SYMBOL(__find_nth_bit); unsigned long __find_nth_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, unsigned long n) { - return FIND_NTH_BIT(addr1[idx] & addr2[idx], size, n); + return FIND_NTH_BIT(READ_ONCE(addr1[idx]) & READ_ONCE(addr2[idx]), + size, n); } EXPORT_SYMBOL(__find_nth_and_bit); unsigned long __find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, unsigned long n) { - return FIND_NTH_BIT(addr1[idx] & ~addr2[idx], size, n); + return FIND_NTH_BIT(READ_ONCE(addr1[idx]) & ~READ_ONCE(addr2[idx]), + size, n); } EXPORT_SYMBOL(__find_nth_andnot_bit); @@ -160,7 +163,8 @@ unsigned long __find_nth_and_andnot_bit(const unsigned long *addr1, const unsigned long *addr3, unsigned long size, unsigned long n) { - return FIND_NTH_BIT(addr1[idx] & addr2[idx] & ~addr3[idx], size, n); + return FIND_NTH_BIT(READ_ONCE(addr1[idx]) & READ_ONCE(addr2[idx]) & + ~READ_ONCE(addr3[idx]), size, n); } EXPORT_SYMBOL(__find_nth_and_andnot_bit); @@ -168,7 +172,8 @@ EXPORT_SYMBOL(__find_nth_and_andnot_bit); unsigned long _find_next_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long nbits, unsigned long start) { - return FIND_NEXT_BIT(addr1[idx] & addr2[idx], /* nop */, nbits, start); + return FIND_NEXT_BIT(READ_ONCE(addr1[idx]) & READ_ONCE(addr2[idx]), + /* nop */, nbits, start); } EXPORT_SYMBOL(_find_next_and_bit); #endif @@ -177,7 +182,8 @@ EXPORT_SYMBOL(_find_next_and_bit); unsigned long _find_next_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long nbits, unsigned long start) { - return FIND_NEXT_BIT(addr1[idx] & ~addr2[idx], /* nop */, nbits, start); + return FIND_NEXT_BIT(READ_ONCE(addr1[idx]) & ~READ_ONCE(addr2[idx]), + /* nop */, nbits, start); } EXPORT_SYMBOL(_find_next_andnot_bit); #endif @@ -186,7 +192,8 @@ EXPORT_SYMBOL(_find_next_andnot_bit); unsigned long _find_next_or_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long nbits, unsigned long start) { - return FIND_NEXT_BIT(addr1[idx] | addr2[idx], /* nop */, nbits, start); + return FIND_NEXT_BIT(READ_ONCE(addr1[idx]) | READ_ONCE(addr2[idx]), + /* nop */, nbits, start); } EXPORT_SYMBOL(_find_next_or_bit); #endif @@ -195,7 +202,7 @@ EXPORT_SYMBOL(_find_next_or_bit); unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits, unsigned long start) { - return FIND_NEXT_BIT(~addr[idx], /* nop */, nbits, start); + return FIND_NEXT_BIT(~READ_ONCE(addr[idx]), /* nop */, nbits, start); } EXPORT_SYMBOL(_find_next_zero_bit); #endif @@ -208,7 +215,7 @@ unsigned long _find_last_bit(const unsigned long *addr, unsigned long size) unsigned long idx = (size-1) / BITS_PER_LONG; do { - val &= addr[idx]; + val &= READ_ONCE(addr[idx]); if (val) return idx * BITS_PER_LONG + __fls(val); @@ -242,7 +249,7 @@ EXPORT_SYMBOL(find_next_clump8); */ unsigned long _find_first_zero_bit_le(const unsigned long *addr, unsigned long size) { - return FIND_FIRST_BIT(~addr[idx], swab, size); + return FIND_FIRST_BIT(~READ_ONCE(addr[idx]), swab, size); } EXPORT_SYMBOL(_find_first_zero_bit_le); @@ -252,7 +259,7 @@ EXPORT_SYMBOL(_find_first_zero_bit_le); unsigned long _find_next_zero_bit_le(const unsigned long *addr, unsigned long size, unsigned long offset) { - return FIND_NEXT_BIT(~addr[idx], swab, size, offset); + return FIND_NEXT_BIT(~READ_ONCE(addr[idx]), swab, size, offset); } EXPORT_SYMBOL(_find_next_zero_bit_le); #endif @@ -261,7 +268,7 @@ EXPORT_SYMBOL(_find_next_zero_bit_le); unsigned long _find_next_bit_le(const unsigned long *addr, unsigned long size, unsigned long offset) { - return FIND_NEXT_BIT(addr[idx], swab, size, offset); + return FIND_NEXT_BIT(READ_ONCE(addr[idx]), swab, size, offset); } EXPORT_SYMBOL(_find_next_bit_le); -- 2.35.3