The following commit has been merged into the locking/core branch of tip: Commit-ID: 94af3a04e3f386d4f060d903826e85aa006ce252 Gitweb: https://git.kernel.org/tip/94af3a04e3f386d4f060d903826e85aa006ce252 Author: Uros Bizjak <ubizjak@xxxxxxxxx> AuthorDate: Mon, 22 Apr 2024 14:00:38 +02:00 Committer: Ingo Molnar <mingo@xxxxxxxxxx> CommitterDate: Wed, 24 Apr 2024 11:46:28 +02:00 locking/qspinlock/x86: Micro-optimize virt_spin_lock() Optimize virt_spin_lock() to use simpler and faster: atomic_try_cmpxchg(*ptr, &val, new) instead of: atomic_cmpxchg(*ptr, val, new) == val The x86 CMPXCHG instruction returns success in the ZF flag, so this change saves a compare after the CMPXCHG. Also optimize retry loop a bit. atomic_try_cmpxchg() fails iff &lock->val != 0, so there is no need to load and compare the lock value again - cpu_relax() can be unconditinally called in this case. This allows us to generate optimized: 1f: ba 01 00 00 00 mov $0x1,%edx 24: 8b 03 mov (%rbx),%eax 26: 85 c0 test %eax,%eax 28: 75 63 jne 8d <...> 2a: f0 0f b1 13 lock cmpxchg %edx,(%rbx) 2e: 75 5d jne 8d <...> .. 8d: f3 90 pause 8f: eb 93 jmp 24 <...> instead of: 1f: ba 01 00 00 00 mov $0x1,%edx 24: 8b 03 mov (%rbx),%eax 26: 85 c0 test %eax,%eax 28: 75 13 jne 3d <...> 2a: f0 0f b1 13 lock cmpxchg %edx,(%rbx) 2e: 85 c0 test %eax,%eax 30: 75 f2 jne 24 <...> .. 3d: f3 90 pause 3f: eb e3 jmp 24 <...> Signed-off-by: Uros Bizjak <ubizjak@xxxxxxxxx> Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx> Cc: Waiman Long <longman@xxxxxxxxxx> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Link: https://lore.kernel.org/r/20240422120054.199092-1-ubizjak@xxxxxxxxx --- arch/x86/include/asm/qspinlock.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index cde8357..a053c12 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -85,6 +85,8 @@ DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key); #define virt_spin_lock virt_spin_lock static inline bool virt_spin_lock(struct qspinlock *lock) { + int val; + if (!static_branch_likely(&virt_spin_lock_key)) return false; @@ -94,10 +96,13 @@ static inline bool virt_spin_lock(struct qspinlock *lock) * horrible lock 'holder' preemption issues. */ - do { - while (atomic_read(&lock->val) != 0) - cpu_relax(); - } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); + __retry: + val = atomic_read(&lock->val); + + if (val || !atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)) { + cpu_relax(); + goto __retry; + } return true; }