[tip: locking/core] locking/qspinlock/x86: Micro-optimize virt_spin_lock()

"tip-bot2 for Uros Bizjak" <tip-bot2@xxxxxxxxxxxxx> · Wed, 24 Apr 2024 10:04:26 -0000

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     94af3a04e3f386d4f060d903826e85aa006ce252
Gitweb:        https://git.kernel.org/tip/94af3a04e3f386d4f060d903826e85aa006ce252
Author:        Uros Bizjak <ubizjak@xxxxxxxxx>
AuthorDate:    Mon, 22 Apr 2024 14:00:38 +02:00
Committer:     Ingo Molnar <mingo@xxxxxxxxxx>
CommitterDate: Wed, 24 Apr 2024 11:46:28 +02:00

locking/qspinlock/x86: Micro-optimize virt_spin_lock()

Optimize virt_spin_lock() to use simpler and faster:

  atomic_try_cmpxchg(*ptr, &val, new)

instead of:

  atomic_cmpxchg(*ptr, val, new) == val

The x86 CMPXCHG instruction returns success in the ZF flag, so
this change saves a compare after the CMPXCHG.

Also optimize retry loop a bit. atomic_try_cmpxchg() fails iff
&lock->val != 0, so there is no need to load and compare the
lock value again - cpu_relax() can be unconditinally called in
this case. This allows us to generate optimized:

  1f:	ba 01 00 00 00       	mov    $0x1,%edx
  24:	8b 03                	mov    (%rbx),%eax
  26:	85 c0                	test   %eax,%eax
  28:	75 63                	jne    8d <...>
  2a:	f0 0f b1 13          	lock cmpxchg %edx,(%rbx)
  2e:	75 5d                	jne    8d <...>
..
  8d:	f3 90                	pause
  8f:	eb 93                	jmp    24 <...>

instead of:

  1f:	ba 01 00 00 00       	mov    $0x1,%edx
  24:	8b 03                	mov    (%rbx),%eax
  26:	85 c0                	test   %eax,%eax
  28:	75 13                	jne    3d <...>
  2a:	f0 0f b1 13          	lock cmpxchg %edx,(%rbx)
  2e:	85 c0                	test   %eax,%eax
  30:	75 f2                	jne    24 <...>
..
  3d:	f3 90                	pause
  3f:	eb e3                	jmp    24 <...>

Signed-off-by: Uros Bizjak <ubizjak@xxxxxxxxx>
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Waiman Long <longman@xxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Link: https://lore.kernel.org/r/20240422120054.199092-1-ubizjak@xxxxxxxxx
---
 arch/x86/include/asm/qspinlock.h | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index cde8357..a053c12 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -85,6 +85,8 @@ DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key);
 #define virt_spin_lock virt_spin_lock
 static inline bool virt_spin_lock(struct qspinlock *lock)
 {
+	int val;
+
 	if (!static_branch_likely(&virt_spin_lock_key))
 		return false;
 
@@ -94,10 +96,13 @@ static inline bool virt_spin_lock(struct qspinlock *lock)
 	 * horrible lock 'holder' preemption issues.
 	 */
 
-	do {
-		while (atomic_read(&lock->val) != 0)
-			cpu_relax();
-	} while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0);
+ __retry:
+	val = atomic_read(&lock->val);
+
+	if (val || !atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)) {
+		cpu_relax();
+		goto __retry;
+	}
 
 	return true;
 }