Re: [PATCH v1 1/1] xarray: fix the data-race in xas_find_chunk() by using READ_ONCE()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 9/18/23 17:54, Jan Kara wrote:
On Mon 18-09-23 07:59:03, Yury Norov wrote:
On Mon, Sep 18, 2023 at 02:46:02PM +0200, Mirsad Todorovac wrote:
--------------------------------------------------------
  lib/find_bit.c | 33 +++++++++++++++++----------------
  1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/lib/find_bit.c b/lib/find_bit.c
index 32f99e9a670e..56244e4f744e 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -18,6 +18,7 @@
  #include <linux/math.h>
  #include <linux/minmax.h>
  #include <linux/swab.h>
+#include <asm/rwonce.h>
  /*
   * Common helper for find_bit() function family
@@ -98,7 +99,7 @@ out:                                                                          \
   */
  unsigned long _find_first_bit(const unsigned long *addr, unsigned long size)
  {
-       return FIND_FIRST_BIT(addr[idx], /* nop */, size);
+       return FIND_FIRST_BIT(READ_ONCE(addr[idx]), /* nop */, size);
  }
  EXPORT_SYMBOL(_find_first_bit);
  #endif

...

That doesn't look correct. READ_ONCE() implies that there's another
thread modifying the bitmap concurrently. This is not the true for
vast majority of bitmap API users, and I expect that forcing
READ_ONCE() would affect performance for them.

Bitmap functions, with a few rare exceptions like set_bit(), are not
thread-safe and require users to perform locking/synchronization where
needed.

Well, for xarray the write side is synchronized with a spinlock but the read
side is not (only RCU protected).

If you really need READ_ONCE, I think it's better to implement a new
flavor of the function(s) separately, like:
         find_first_bit_read_once()

So yes, xarray really needs READ_ONCE(). And I don't think READ_ONCE()
imposes any real perfomance overhead in this particular case because for
any sane compiler the generated assembly with & without READ_ONCE() will be
exactly the same. For example I've checked disassembly of _find_next_bit()
using READ_ONCE(). The main loop is:

    0xffffffff815a2b6d <+77>:	inc    %r8
    0xffffffff815a2b70 <+80>:	add    $0x8,%rdx
    0xffffffff815a2b74 <+84>:	mov    %r8,%rcx
    0xffffffff815a2b77 <+87>:	shl    $0x6,%rcx
    0xffffffff815a2b7b <+91>:	cmp    %rcx,%rax
    0xffffffff815a2b7e <+94>:	jbe    0xffffffff815a2b9b <_find_next_bit+123>
    0xffffffff815a2b80 <+96>:	mov    (%rdx),%rcx
    0xffffffff815a2b83 <+99>:	test   %rcx,%rcx
    0xffffffff815a2b86 <+102>:	je     0xffffffff815a2b6d <_find_next_bit+77>
    0xffffffff815a2b88 <+104>:	shl    $0x6,%r8
    0xffffffff815a2b8c <+108>:	tzcnt  %rcx,%rcx

So you can see the value we work with is copied from the address (rdx) into
a register (rcx) and the test and __ffs() happens on a register value and
thus READ_ONCE() has no practical effect. It just prevents the compiler
from doing some stupid de-optimization.

								Honza

If I may also add, centralised READ_ONCE() version had fixed a couple of hundred of
the instances of KCSAN data-races in dmesg.

_find_*_bit() functions and/or macros cause quite a number of KCSAN BUG warnings:

 95 _find_first_and_bit (lib/find_bit.c:114 (discriminator 10))
 31 _find_first_zero_bit (lib/find_bit.c:125 (discriminator 10))
173 _find_next_and_bit (lib/find_bit.c:171 (discriminator 2))
655 _find_next_bit (lib/find_bit.c:133 (discriminator 2))
  5 _find_next_zero_bit

Finding each one find_bit_*() function and replacing it with find_bit_*_read_once()
could be time-consuming and challenging.

However, I will do both versions so you could compare, if you'd like.

Note, in the PoC version I have only implemented find_next_bit_read_once() ATM to see if
this works.

Regards,
Mirsad

diff --git a/include/linux/find.h b/include/linux/find.h
index 5e4f39ef2e72..2b7f9f24cffb 100644
--- a/include/linux/find.h
+++ b/include/linux/find.h
@@ -40,6 +40,38 @@ unsigned long _find_next_bit_le(const unsigned long *addr, unsigned
 				long size, unsigned long offset);
 #endif
 
+unsigned long _find_next_bit_read_once(const unsigned long *addr1, unsigned long nbits,
+				unsigned long start);
+unsigned long _find_next_and_bit_read_once(const unsigned long *addr1, const unsigned long *addr2,
+					unsigned long nbits, unsigned long start);
+unsigned long _find_next_andnot_bit_read_once(const unsigned long *addr1, const unsigned long *addr2,
+					unsigned long nbits, unsigned long start);
+unsigned long _find_next_or_bit_read_once(const unsigned long *addr1, const unsigned long *addr2,
+					unsigned long nbits, unsigned long start);
+unsigned long _find_next_zero_bit_read_once(const unsigned long *addr, unsigned long nbits,
+					 unsigned long start);
+extern unsigned long _find_first_bit_read_once(const unsigned long *addr, unsigned long size);
+unsigned long __find_nth_bit_read_once(const unsigned long *addr, unsigned long size, unsigned long n);
+unsigned long __find_nth_and_bit_read_once(const unsigned long *addr1, const unsigned long *addr2,
+				unsigned long size, unsigned long n);
+unsigned long __find_nth_andnot_bit_read_once(const unsigned long *addr1, const unsigned long *addr2,
+					unsigned long size, unsigned long n);
+unsigned long __find_nth_and_andnot_bit_read_once(const unsigned long *addr1, const unsigned long *addr2,
+					const unsigned long *addr3, unsigned long size,
+					unsigned long n);
+extern unsigned long _find_first_and_bit_read_once(const unsigned long *addr1,
+					 const unsigned long *addr2, unsigned long size);
+extern unsigned long _find_first_zero_bit_read_once(const unsigned long *addr, unsigned long size);
+extern unsigned long _find_last_bit_read_once(const unsigned long *addr, unsigned long size);
+
+#ifdef __BIG_ENDIAN
+unsigned long _find_first_zero_bit_le_read_once(const unsigned long *addr, unsigned long size);
+unsigned long _find_next_zero_bit_le_read_once(const  unsigned long *addr, unsigned
+					long size, unsigned long offset);
+unsigned long _find_next_bit_le_read_once(const unsigned long *addr, unsigned
+				long size, unsigned long offset);
+#endif
+
 #ifndef find_next_bit
 /**
  * find_next_bit - find the next set bit in a memory region
@@ -68,6 +100,32 @@ unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
 }
 #endif
 
+#ifndef find_next_bit_read_once
+/**
+ * find_next_bit_read_once - find the next set bit in a memory region
+ *				with data-race protection
+ * @addr: The address to base the search on
+ * @size: The bitmap size in bits
+ * @offset: The bitnumber to start searching at
+ *
+ * Returns the bit number for the next set bit
+ * If no bits are set, returns @size.
+ */
+static inline
+unsigned long find_next_bit_read_once(const unsigned long *addr, unsigned long size,
+				      unsigned long offset)
+{
+	if (small_const_nbits(size)) {
+		unsigned long val;
+
+		val = *addr & GENMASK(size - 1, offset);
+		return val ? __ffs(val) : size;
+	}
+
+	return _find_next_bit_read_once(addr, size, offset);
+}
+#endif
+
 #ifndef find_next_and_bit
 /**
  * find_next_and_bit - find the next set bit in both memory regions
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 1715fd322d62..6c04f2117c06 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -1718,16 +1718,8 @@ static inline unsigned int xas_find_chunk(struct xa_state *xas, bool advance,
 
 	if (advance)
 		offset++;
-	if (XA_CHUNK_SIZE == BITS_PER_LONG) {
-		if (offset < XA_CHUNK_SIZE) {
-			unsigned long data = READ_ONCE(*addr) & (~0UL << offset);
-			if (data)
-				return __ffs(data);
-		}
-		return XA_CHUNK_SIZE;
-	}
 
-	return find_next_bit(addr, XA_CHUNK_SIZE, offset);
+	return find_next_bit_read_once(addr, XA_CHUNK_SIZE, offset);
 }
 
 /**
diff --git a/lib/find_bit.c b/lib/find_bit.c
index 32f99e9a670e..92a8e0016a20 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -18,6 +18,7 @@
 #include <linux/math.h>
 #include <linux/minmax.h>
 #include <linux/swab.h>
+#include <asm/rwonce.h>
 
 /*
  * Common helper for find_bit() function family
@@ -268,3 +269,172 @@ EXPORT_SYMBOL(_find_next_bit_le);
 #endif
 
 #endif /* __BIG_ENDIAN */
+
+/*
+ * The read_once flavour of functions to avoid data-races.
+ *
+ */
+
+#ifndef find_first_bit_read_once
+/*
+ * Find the first set bit in a memory region.
+ */
+unsigned long _find_first_bit_read_once(const unsigned long *addr, unsigned long size)
+{
+	return FIND_FIRST_BIT(READ_ONCE(addr[idx]), /* nop */, size);
+}
+EXPORT_SYMBOL(_find_first_bit_read_once);
+#endif
+
+#ifndef find_first_and_bit_read_once
+/*
+ * Find the first set bit in two memory regions.
+ */
+unsigned long _find_first_and_bit_read_once(const unsigned long *addr1,
+					    const unsigned long *addr2,
+					    unsigned long size)
+{
+	return FIND_FIRST_BIT(READ_ONCE(addr1[idx]) & READ_ONCE(addr2[idx]), /* nop */, size);
+}
+EXPORT_SYMBOL(_find_first_and_bit_read_once);
+#endif
+
+#ifndef find_first_zero_bit_read_once
+/*
+ * Find the first cleared bit in a memory region.
+ */
+unsigned long _find_first_zero_bit_read_once(const unsigned long *addr, unsigned long size)
+{
+	return FIND_FIRST_BIT(~READ_ONCE(addr[idx]), /* nop */, size);
+}
+EXPORT_SYMBOL(_find_first_zero_bit_read_once);
+#endif
+
+#ifndef find_next_bit_read_once
+unsigned long _find_next_bit_read_once(const unsigned long *addr, unsigned long nbits, unsigned long start)
+{
+	return FIND_NEXT_BIT(READ_ONCE(addr[idx]), /* nop */, nbits, start);
+}
+EXPORT_SYMBOL(_find_next_bit_read_once);
+#endif
+
+unsigned long __find_nth_bit_read_once(const unsigned long *addr, unsigned long size, unsigned long n)
+{
+	return FIND_NTH_BIT(READ_ONCE(addr[idx]), size, n);
+}
+EXPORT_SYMBOL(__find_nth_bit_read_once);
+
+unsigned long __find_nth_and_bit_read_once(const unsigned long *addr1, const unsigned long *addr2,
+					   unsigned long size, unsigned long n)
+{
+	return FIND_NTH_BIT(READ_ONCE(addr1[idx]) & READ_ONCE(addr2[idx]), size, n);
+}
+EXPORT_SYMBOL(__find_nth_and_bit_read_once);
+
+unsigned long __find_nth_andnot_bit_read_once(const unsigned long *addr1, const unsigned long *addr2,
+					      unsigned long size, unsigned long n)
+{
+	return FIND_NTH_BIT(READ_ONCE(addr1[idx]) & ~READ_ONCE(addr2[idx]), size, n);
+}
+EXPORT_SYMBOL(__find_nth_andnot_bit_read_once);
+
+unsigned long __find_nth_and_andnot_bit_read_once(const unsigned long *addr1,
+						  const unsigned long *addr2,
+						  const unsigned long *addr3,
+						  unsigned long size, unsigned long n)
+{
+	return FIND_NTH_BIT(READ_ONCE(addr1[idx]) & READ_ONCE(addr2[idx]) & ~READ_ONCE(addr3[idx]), size, n);
+}
+EXPORT_SYMBOL(__find_nth_and_andnot_bit_read_once);
+
+#ifndef find_next_and_bit_read_once
+unsigned long _find_next_and_bit_read_once(const unsigned long *addr1, const unsigned long *addr2,
+					   unsigned long nbits, unsigned long start)
+{
+	return FIND_NEXT_BIT(READ_ONCE(addr1[idx]) & READ_ONCE(addr2[idx]), /* nop */, nbits, start);
+}
+EXPORT_SYMBOL(_find_next_and_bit_read_once);
+#endif
+
+#ifndef find_next_andnot_bit_read_once
+unsigned long _find_next_andnot_bit_read_once(const unsigned long *addr1, const unsigned long *addr2,
+					      unsigned long nbits, unsigned long start)
+{
+	return FIND_NEXT_BIT(READ_ONCE(addr1[idx]) & ~READ_ONCE(addr2[idx]), /* nop */, nbits, start);
+}
+EXPORT_SYMBOL(_find_next_andnot_bit_read_once);
+#endif
+
+#ifndef find_next_or_bit_read_once
+unsigned long _find_next_or_bit_read_once(const unsigned long *addr1, const unsigned long *addr2,
+					  unsigned long nbits, unsigned long start)
+{
+	return FIND_NEXT_BIT(READ_ONCE(addr1[idx]) | READ_ONCE(addr2[idx]), /* nop */, nbits, start);
+}
+EXPORT_SYMBOL(_find_next_or_bit_read_once);
+#endif
+
+#ifndef find_next_zero_bit_read_once
+unsigned long _find_next_zero_bit_read_once(const unsigned long *addr, unsigned long nbits,
+					    unsigned long start)
+{
+	return FIND_NEXT_BIT(~READ_ONCE(addr[idx]), /* nop */, nbits, start);
+}
+EXPORT_SYMBOL(_find_next_zero_bit_read_once);
+#endif
+
+#ifndef find_last_bit_read_once
+unsigned long _find_last_bit_read_once(const unsigned long *addr, unsigned long size)
+{
+	if (size) {
+		unsigned long val = BITMAP_LAST_WORD_MASK(size);
+		unsigned long idx = (size-1) / BITS_PER_LONG;
+
+		do {
+			val &= READ_ONCE(addr[idx]);
+			if (val)
+				return idx * BITS_PER_LONG + __fls(val);
+
+			val = ~0ul;
+		} while (idx--);
+	}
+	return size;
+}
+EXPORT_SYMBOL(_find_last_bit_read_once);
+#endif
+
+#ifdef __BIG_ENDIAN
+
+#ifndef find_first_zero_bit_le_read_once
+/*
+ * Find the first cleared bit in an LE memory region.
+ */
+unsigned long _find_first_zero_bit_le_read_once(const unsigned long *addr,
+						unsigned long size)
+{
+	return FIND_FIRST_BIT(~READ_ONCE(addr[idx]), swab, size);
+}
+EXPORT_SYMBOL(_find_first_zero_bit_le_read_once);
+
+#endif
+
+#ifndef find_next_zero_bit_le_read_once
+unsigned long _find_next_zero_bit_le_read_once(const unsigned long *addr,
+					       unsigned long size, unsigned long offset)
+{
+	return FIND_NEXT_BIT(~READ_ONCE(addr[idx]), swab, size, offset);
+}
+EXPORT_SYMBOL(_find_next_zero_bit_le_read_once);
+#endif
+
+#ifndef find_next_bit_le_read_once
+unsigned long _find_next_bit_le_read_once(const unsigned long *addr,
+					  unsigned long size, unsigned long offset)
+{
+	return FIND_NEXT_BIT(READ_ONCE(addr[idx]), swab, size, offset);
+}
+EXPORT_SYMBOL(_find_next_bit_le_read_once);
+
+#endif
+
+#endif /* __BIG_ENDIAN */

[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux