The quilt patch titled Subject: lib/lockref.c: support lockref reference count if enable LOCK_STAT has been removed from the -mm tree. Its filename was support-lockref-reference-count-if-enable-lock_stat.patch This patch was dropped because it was nacked ------------------------------------------------------ From: yongli-oc <yongli-oc@xxxxxxxxxxx> Subject: lib/lockref.c: support lockref reference count if enable LOCK_STAT Date: Wed, 13 Nov 2024 16:57:03 +0800 Swap the positions of lock and count to support CMPXCHG_LOCKREF if SPINLOCK_SIZE > 4 when enable LOCK_STAT. The reference count can always be used regardless of the spinlock_t size. One of the struct lockref is used in dcache.h. The spinlock_t is 4 bytes, with the 4 bytes reference count, the struct lockref is a 8 bytes variable, the count can be inc/dec by a 64 bits atomic operation, when the spinlock is unlocked. One of the struct lockref is used in dcache.h. The spinlock_t is 4 bytes, with the 4 bytes reference count, the struct lockref is a 8 bytes variable, the count can be inc/dec by a 64 bits atomic operation, when the spinlock is unlocked. If the spinlock is more than 4 bytes, such as enable the kernel config DEBUG_SPINLOCK or LOCK_STAT, the count inc/dec should got the spinlock first, the sequence likes "lock; inc_ref; unlock;", which will caused spinlock contention increased so much. The chart below shows the spinlock contention with and without the lockref patch. it got by "cat /proc/lock_stat" after ubuntu 22.04 boot up. I tried each case two times. the con-bounces and contentions of 6.6.28 with LOCK_STAT enable are much more than it of the 6.6.28 lockref patched. With the lockref patch, the count can be operated by atomic too. The statistics result is similar to the 6.6.28 kernel production environment. lock_stat version 0.4 -------------------------------------------------- bootup class name con-bounces contentions times --------kernel 6.6.28, enable LOCK_STAT----------- 1 &dentry->d_lock: 385336 413098 2 &dentry->d_lock: 378268 402761 -------------------------------------------------- -------6.6.28 with lockref patch and LOCK_STAT---- 1 &dentry->d_lock: 79941 82431 2 &dentry->d_lock: 77817 80301 -------------------------------------------------- Link: https://lkml.kernel.org/r/20241113085703.148839-1-yongli-oc@xxxxxxxxxxx Signed-off-by: yongli-oc <yongli-oc@xxxxxxxxxxx> Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- include/linux/lockref.h | 17 ++++++++++- lib/lockref.c | 58 +++++++++++++++++++++++++++----------- 2 files changed, 58 insertions(+), 17 deletions(-) --- a/include/linux/lockref.h~support-lockref-reference-count-if-enable-lock_stat +++ a/include/linux/lockref.h @@ -22,15 +22,30 @@ (IS_ENABLED(CONFIG_ARCH_USE_CMPXCHG_LOCKREF) && \ IS_ENABLED(CONFIG_SMP) && SPINLOCK_SIZE <= 4) +#define USE_CMPXCHG_LOCKREF_ALTERNATIVE \ + (IS_ENABLED(CONFIG_ARCH_USE_CMPXCHG_LOCKREF) && \ + IS_ENABLED(CONFIG_SMP) && SPINLOCK_SIZE > 4) + struct lockref { union { #if USE_CMPXCHG_LOCKREF aligned_u64 lock_count; -#endif struct { spinlock_t lock; int count; }; +#elif USE_CMPXCHG_LOCKREF_ALTERNATIVE + aligned_u64 lock_count; + struct { + int count; + spinlock_t lock; + } __packed; +#else + struct { + spinlock_t lock; + int count; + }; +#endif }; }; --- a/lib/lockref.c~support-lockref-reference-count-if-enable-lock_stat +++ a/lib/lockref.c @@ -8,22 +8,48 @@ * Note that the "cmpxchg()" reloads the "old" value for the * failure case. */ -#define CMPXCHG_LOOP(CODE, SUCCESS) do { \ - int retry = 100; \ - struct lockref old; \ - BUILD_BUG_ON(sizeof(old) != 8); \ - old.lock_count = READ_ONCE(lockref->lock_count); \ - while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) { \ - struct lockref new = old; \ - CODE \ - if (likely(try_cmpxchg64_relaxed(&lockref->lock_count, \ - &old.lock_count, \ - new.lock_count))) { \ - SUCCESS; \ - } \ - if (!--retry) \ - break; \ - } \ +#define CMPXCHG_LOOP(CODE, SUCCESS) do { \ + int retry = 100; \ + struct lockref old; \ + BUILD_BUG_ON(sizeof(old) != 8); \ + old.lock_count = READ_ONCE(lockref->lock_count); \ + while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) { \ + struct lockref new = old; \ + CODE \ + if (likely(try_cmpxchg64_relaxed(&lockref->lock_count, \ + &old.lock_count, \ + new.lock_count))) { \ + SUCCESS; \ + } \ + if (!--retry) \ + break; \ + } \ +} while (0) + +#elif USE_CMPXCHG_LOCKREF_ALTERNATIVE + +/* + * Note that the "cmpxchg()" reloads the "old" value for the + * failure case. + */ +#define CMPXCHG_LOOP(CODE, SUCCESS) do { \ + int retry = 100; \ + struct lockref old; \ + BUILD_BUG_ON(offsetof(struct lockref, lock) != 4); \ + BUILD_BUG_ON(offsetof(spinlock_t, rlock) != 0); \ + BUILD_BUG_ON(offsetof(raw_spinlock_t, raw_lock) != 0); \ + old.lock_count = READ_ONCE(lockref->lock_count); \ + while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) {\ + struct lockref new = old; \ + CODE \ + if (likely(try_cmpxchg64_relaxed(&lockref->lock_count, \ + &old.lock_count, \ + new.lock_count))) { \ + SUCCESS; \ + } \ + if (!--retry) \ + break; \ + } \ } while (0) #else _ Patches currently in -mm which might be from yongli-oc@xxxxxxxxxxx are