[RFC][PATCH 5/7] qspinlock: Optimize the pending case

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Replace the initial set-pending cmpxchg() loop with an unconditional
test-and-set bit (x86: bts) instruction.

It looses the direct trylock state transition; however since that should
be very unlikely (we've just done a trylock) that shouldn't be a
problem.

Signed-off-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
---
 include/asm-generic/qspinlock_types.h |    2 +
 kernel/locking/qspinlock.c            |   60 +++++++++++++++++++---------------
 2 files changed, 36 insertions(+), 26 deletions(-)

--- a/include/asm-generic/qspinlock_types.h
+++ b/include/asm-generic/qspinlock_types.h
@@ -59,6 +59,8 @@ typedef struct qspinlock {
 #define _Q_TAIL_CPU_BITS	(32 - _Q_TAIL_CPU_OFFSET)
 #define _Q_TAIL_CPU_MASK	(((1U << _Q_TAIL_CPU_BITS) - 1) << _Q_TAIL_CPU_OFFSET)
 
+#define _Q_TAIL_MASK		(_Q_TAIL_IDX_MASK | _Q_TAIL_CPU_MASK)
+
 #define _Q_LOCKED_VAL		(1U << _Q_LOCKED_OFFSET)
 #define _Q_PENDING_VAL		(1U << _Q_PENDING_OFFSET)
 
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -83,6 +83,37 @@ static inline struct mcs_spinlock *decod
 	return per_cpu_ptr(&mcs_nodes[idx], cpu);
 }
 
+/*
+ * 0,0,1 -> 0,1,* ; pending
+ *
+ * Ignore the locked bit; if we set pending and locked happens to be clear
+ * we'll fall through on the subsequent wait.
+ */
+static int __always_inline
+try_set_pending(struct qspinlock *lock, u32 val)
+{
+	if (val & ~_Q_LOCKED_MASK)
+		return 0; /* fail; queue */
+
+	/*
+	 * If we find the pending bit was already set; fail and queue.
+	 */
+	if (atomic_test_and_set_bit(_Q_PENDING_OFFSET, &lock->val))
+		return 0;
+
+	/*
+	 * If we raced and someone concurrently set the tail; no problem. He
+	 * need not have observed our pending bit and can have claimed the
+	 * lock.
+	 *
+	 * The next node in line however will wait for the pending to go away
+	 * again though, so in effect we've just flipped order between two
+	 * contenders which already had undetermined order as per the race.
+	 */
+
+	return 1;
+}
+
 #define _Q_LOCKED_PENDING_MASK	(_Q_LOCKED_MASK | _Q_PENDING_MASK)
 
 /**
@@ -115,34 +146,10 @@ void queue_spin_lock_slowpath(struct qsp
 	BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
 
 	/*
-	 * trylock || pending
-	 *
-	 * 0,0,0 -> 0,0,1 ; trylock
 	 * 0,0,1 -> 0,1,1 ; pending
 	 */
-	for (;;) {
-		/*
-		 * If we observe any contention; queue.
-		 */
-		if (val & ~_Q_LOCKED_MASK)
-			goto queue;
-
-		new = _Q_LOCKED_VAL;
-		if (val == new)
-			new |= _Q_PENDING_VAL;
-
-		old = atomic_cmpxchg(&lock->val, val, new);
-		if (old == val)
-			break;
-
-		val = old;
-	}
-
-	/*
-	 * we won the trylock
-	 */
-	if (new == _Q_LOCKED_VAL)
-		return;
+	if (!try_set_pending(lock, val))
+		goto queue;
 
 	/*
 	 * we're pending, wait for the owner to go away.
@@ -186,6 +193,7 @@ void queue_spin_lock_slowpath(struct qsp
 	 * 0,0,0 -> 0,0,1 ; trylock
 	 * p,y,x -> n,y,x ; prev = xchg(lock, node)
 	 */
+	val = atomic_read(&lock->val);
 	for (;;) {
 		new = _Q_LOCKED_VAL;
 		if (val)


--
To unsubscribe from this list: send the line "unsubscribe linux-arch" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel]     [Kernel Newbies]     [x86 Platform Driver]     [Netdev]     [Linux Wireless]     [Netfilter]     [Bugtraq]     [Linux Filesystems]     [Yosemite Discussion]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]

  Powered by Linux