The patch titled Subject: bitops: add xor_unlock_is_negative_byte() has been added to the -mm mm-unstable branch. Its filename is bitops-add-xor_unlock_is_negative_byte.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/bitops-add-xor_unlock_is_negative_byte.patch This patch will later appear in the mm-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: "Matthew Wilcox (Oracle)" <willy@xxxxxxxxxxxxx> Subject: bitops: add xor_unlock_is_negative_byte() Date: Wed, 4 Oct 2023 17:53:07 +0100 Replace clear_bit_and_unlock_is_negative_byte() with xor_unlock_is_negative_byte(). We have a few places that like to lock a folio, set a flag and unlock it again. Allow for the possibility of combining the latter two operations for efficiency. We are guaranteed that the caller holds the lock, so it is safe to unlock it with the xor. The caller must guarantee that nobody else will set the flag without holding the lock; it is not safe to do this with the PG_dirty flag, for example. Link: https://lkml.kernel.org/r/20231004165317.1061855-8-willy@xxxxxxxxxxxxx Signed-off-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx> Cc: Albert Ou <aou@xxxxxxxxxxxxxxxxx> Cc: Alexander Gordeev <agordeev@xxxxxxxxxxxxx> Cc: Andreas Dilger <adilger.kernel@xxxxxxxxx> Cc: Christian Borntraeger <borntraeger@xxxxxxxxxxxxx> Cc: Christophe Leroy <christophe.leroy@xxxxxxxxxx> Cc: Geert Uytterhoeven <geert@xxxxxxxxxxxxxx> Cc: Heiko Carstens <hca@xxxxxxxxxxxxx> Cc: Ivan Kokshaysky <ink@xxxxxxxxxxxxxxxxxxxx> Cc: Matt Turner <mattst88@xxxxxxxxx> Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx> Cc: Nicholas Piggin <npiggin@xxxxxxxxx> Cc: Palmer Dabbelt <palmer@xxxxxxxxxxx> Cc: Paul Walmsley <paul.walmsley@xxxxxxxxxx> Cc: Richard Henderson <richard.henderson@xxxxxxxxxx> Cc: Sven Schnelle <svens@xxxxxxxxxxxxx> Cc: "Theodore Ts'o" <tytso@xxxxxxx> Cc: Thomas Bogendoerfer <tsbogend@xxxxxxxxxxxxxxxx> Cc: Vasily Gorbik <gor@xxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- arch/powerpc/include/asm/bitops.h | 17 ++------- arch/x86/include/asm/bitops.h | 11 ++---- include/asm-generic/bitops/instrumented-lock.h | 27 ++++++++------- include/asm-generic/bitops/lock.h | 21 ++--------- kernel/kcsan/kcsan_test.c | 8 ++-- kernel/kcsan/selftest.c | 8 ++-- mm/filemap.c | 5 ++ mm/kasan/kasan_test.c | 7 ++- 8 files changed, 47 insertions(+), 57 deletions(-) --- a/arch/powerpc/include/asm/bitops.h~bitops-add-xor_unlock_is_negative_byte +++ a/arch/powerpc/include/asm/bitops.h @@ -234,32 +234,25 @@ static inline int arch_test_and_change_b } #ifdef CONFIG_PPC64 -static inline unsigned long -clear_bit_unlock_return_word(int nr, volatile unsigned long *addr) +static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask, + volatile unsigned long *p) { unsigned long old, t; - unsigned long *p = (unsigned long *)addr + BIT_WORD(nr); - unsigned long mask = BIT_MASK(nr); __asm__ __volatile__ ( PPC_RELEASE_BARRIER "1:" PPC_LLARX "%0,0,%3,0\n" - "andc %1,%0,%2\n" + "xor %1,%0,%2\n" PPC_STLCX "%1,0,%3\n" "bne- 1b\n" : "=&r" (old), "=&r" (t) : "r" (mask), "r" (p) : "cc", "memory"); - return old; + return (old & BIT_MASK(7)) != 0; } -/* - * This is a special function for mm/filemap.c - * Bit 7 corresponds to PG_waiters. - */ -#define arch_clear_bit_unlock_is_negative_byte(nr, addr) \ - (clear_bit_unlock_return_word(nr, addr) & BIT_MASK(7)) +#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte #endif /* CONFIG_PPC64 */ --- a/arch/x86/include/asm/bitops.h~bitops-add-xor_unlock_is_negative_byte +++ a/arch/x86/include/asm/bitops.h @@ -94,18 +94,17 @@ arch___clear_bit(unsigned long nr, volat asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); } -static __always_inline bool -arch_clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) +static __always_inline bool arch_xor_unlock_is_negative_byte(unsigned long mask, + volatile unsigned long *addr) { bool negative; - asm volatile(LOCK_PREFIX "andb %2,%1" + asm volatile(LOCK_PREFIX "xorb %2,%1" CC_SET(s) : CC_OUT(s) (negative), WBYTE_ADDR(addr) - : "ir" ((char) ~(1 << nr)) : "memory"); + : "iq" ((char)mask) : "memory"); return negative; } -#define arch_clear_bit_unlock_is_negative_byte \ - arch_clear_bit_unlock_is_negative_byte +#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte static __always_inline void arch___clear_bit_unlock(long nr, volatile unsigned long *addr) --- a/include/asm-generic/bitops/instrumented-lock.h~bitops-add-xor_unlock_is_negative_byte +++ a/include/asm-generic/bitops/instrumented-lock.h @@ -58,27 +58,30 @@ static inline bool test_and_set_bit_lock return arch_test_and_set_bit_lock(nr, addr); } -#if defined(arch_clear_bit_unlock_is_negative_byte) +#if defined(arch_xor_unlock_is_negative_byte) /** - * clear_bit_unlock_is_negative_byte - Clear a bit in memory and test if bottom - * byte is negative, for unlock. - * @nr: the bit to clear - * @addr: the address to start counting from + * xor_unlock_is_negative_byte - XOR a single byte in memory and test if + * it is negative, for unlock. + * @mask: Change the bits which are set in this mask. + * @addr: The address of the word containing the byte to change. * + * Changes some of bits 0-6 in the word pointed to by @addr. * This operation is atomic and provides release barrier semantics. + * Used to optimise some folio operations which are commonly paired + * with an unlock or end of writeback. Bit 7 is used as PG_waiters to + * indicate whether anybody is waiting for the unlock. * - * This is a bit of a one-trick-pony for the filemap code, which clears - * PG_locked and tests PG_waiters, + * Return: Whether the top bit of the byte is set. */ -static inline bool -clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) +static inline bool xor_unlock_is_negative_byte(unsigned long mask, + volatile unsigned long *addr) { kcsan_release(); - instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long)); - return arch_clear_bit_unlock_is_negative_byte(nr, addr); + instrument_atomic_write(addr, sizeof(long)); + return arch_xor_unlock_is_negative_byte(mask, addr); } /* Let everybody know we have it. */ -#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte +#define xor_unlock_is_negative_byte xor_unlock_is_negative_byte #endif #endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_LOCK_H */ --- a/include/asm-generic/bitops/lock.h~bitops-add-xor_unlock_is_negative_byte +++ a/include/asm-generic/bitops/lock.h @@ -66,27 +66,16 @@ arch___clear_bit_unlock(unsigned int nr, raw_atomic_long_set_release((atomic_long_t *)p, old); } -/** - * arch_clear_bit_unlock_is_negative_byte - Clear a bit in memory and test if bottom - * byte is negative, for unlock. - * @nr: the bit to clear - * @addr: the address to start counting from - * - * This is a bit of a one-trick-pony for the filemap code, which clears - * PG_locked and tests PG_waiters, - */ -#ifndef arch_clear_bit_unlock_is_negative_byte -static inline bool arch_clear_bit_unlock_is_negative_byte(unsigned int nr, - volatile unsigned long *p) +#ifndef arch_xor_unlock_is_negative_byte +static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask, + volatile unsigned long *p) { long old; - unsigned long mask = BIT_MASK(nr); - p += BIT_WORD(nr); - old = raw_atomic_long_fetch_andnot_release(mask, (atomic_long_t *)p); + old = raw_atomic_long_fetch_xor_release(mask, (atomic_long_t *)p); return !!(old & BIT(7)); } -#define arch_clear_bit_unlock_is_negative_byte arch_clear_bit_unlock_is_negative_byte +#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte #endif #include <asm-generic/bitops/instrumented-lock.h> --- a/kernel/kcsan/kcsan_test.c~bitops-add-xor_unlock_is_negative_byte +++ a/kernel/kcsan/kcsan_test.c @@ -700,10 +700,10 @@ static void test_barrier_nothreads(struc KCSAN_EXPECT_RW_BARRIER(mutex_lock(&test_mutex), false); KCSAN_EXPECT_RW_BARRIER(mutex_unlock(&test_mutex), true); -#ifdef clear_bit_unlock_is_negative_byte - KCSAN_EXPECT_READ_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var), true); - KCSAN_EXPECT_WRITE_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var), true); - KCSAN_EXPECT_RW_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var), true); +#ifdef xor_unlock_is_negative_byte + KCSAN_EXPECT_READ_BARRIER(xor_unlock_is_negative_byte(1, &test_var), true); + KCSAN_EXPECT_WRITE_BARRIER(xor_unlock_is_negative_byte(1, &test_var), true); + KCSAN_EXPECT_RW_BARRIER(xor_unlock_is_negative_byte(1, &test_var), true); #endif kcsan_nestable_atomic_end(); } --- a/kernel/kcsan/selftest.c~bitops-add-xor_unlock_is_negative_byte +++ a/kernel/kcsan/selftest.c @@ -228,10 +228,10 @@ static bool __init test_barrier(void) spin_lock(&test_spinlock); KCSAN_CHECK_RW_BARRIER(spin_unlock(&test_spinlock)); -#ifdef clear_bit_unlock_is_negative_byte - KCSAN_CHECK_RW_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var)); - KCSAN_CHECK_READ_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var)); - KCSAN_CHECK_WRITE_BARRIER(clear_bit_unlock_is_negative_byte(0, &test_var)); +#ifdef xor_unlock_is_negative_byte + KCSAN_CHECK_RW_BARRIER(xor_unlock_is_negative_byte(1, &test_var)); + KCSAN_CHECK_READ_BARRIER(xor_unlock_is_negative_byte(1, &test_var)); + KCSAN_CHECK_WRITE_BARRIER(xor_unlock_is_negative_byte(1, &test_var)); #endif kcsan_nestable_atomic_end(); --- a/mm/filemap.c~bitops-add-xor_unlock_is_negative_byte +++ a/mm/filemap.c @@ -1482,6 +1482,11 @@ void folio_add_wait_queue(struct folio * } EXPORT_SYMBOL_GPL(folio_add_wait_queue); +#ifdef xor_unlock_is_negative_byte +#define clear_bit_unlock_is_negative_byte(nr, p) \ + xor_unlock_is_negative_byte(1 << nr, p) +#endif + #ifndef clear_bit_unlock_is_negative_byte /* --- a/mm/kasan/kasan_test.c~bitops-add-xor_unlock_is_negative_byte +++ a/mm/kasan/kasan_test.c @@ -1099,9 +1099,10 @@ static void kasan_bitops_test_and_modify KUNIT_EXPECT_KASAN_FAIL(test, __test_and_change_bit(nr, addr)); KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = test_bit(nr, addr)); -#if defined(clear_bit_unlock_is_negative_byte) - KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = - clear_bit_unlock_is_negative_byte(nr, addr)); +#if defined(xor_unlock_is_negative_byte) + if (nr < 7) + KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = + xor_unlock_is_negative_byte(1 << nr, addr)); #endif } _ Patches currently in -mm which might be from willy@xxxxxxxxxxxxx are mm-make-lock_folio_maybe_drop_mmap-vma-lock-aware.patch mm-call-wp_page_copy-under-the-vma-lock.patch mm-handle-shared-faults-under-the-vma-lock.patch mm-handle-cow-faults-under-the-vma-lock.patch mm-handle-read-faults-under-the-vma-lock.patch mm-handle-write-faults-to-ro-pages-under-the-vma-lock.patch iomap-hold-state_lock-over-call-to-ifs_set_range_uptodate.patch iomap-protect-read_bytes_pending-with-the-state_lock.patch mm-add-folio_end_read.patch ext4-use-folio_end_read.patch buffer-use-folio_end_read.patch iomap-use-folio_end_read.patch bitops-add-xor_unlock_is_negative_byte.patch alpha-implement-xor_unlock_is_negative_byte.patch m68k-implement-xor_unlock_is_negative_byte.patch mips-implement-xor_unlock_is_negative_byte.patch powerpc-implement-arch_xor_unlock_is_negative_byte-on-32-bit.patch riscv-implement-xor_unlock_is_negative_byte.patch s390-implement-arch_xor_unlock_is_negative_byte.patch mm-delete-checks-for-xor_unlock_is_negative_byte.patch mm-add-folio_xor_flags_has_waiters.patch mm-make-__end_folio_writeback-return-void.patch mm-use-folio_xor_flags_has_waiters-in-folio_end_writeback.patch