[RFC] Convert ia64 spinlocks to use "tickets"

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Early last year x86 changed their spinlock implementation to
use a fair "ticket" system (just like the one at deli counter
in the grocery store).  Here is the same conversion for ia64.

Notes:
1) This increases the size of a spinlock from 4 bytes to 8. This
is not because I think that we'll have more that 65535 cpus
waiting for the same lock (if that ever happens we really need
to look at how the callers are using spinlocks!) but because
ia64 doesn't have a version of the fetchadd instruction that
works on anything smaller than 32-bit values.

2) I just did a naive implementation of the routines in C. The
assembler generated by gcc doesn't look too hideous ... but if
we switch to tickets we should definitely look to see if we
can improve things.

3) This boots on a 4-socket Montecito (8 cores, 16 threads)
and has been building kernels using "make -j12" for a little
while.  But this counts as rather light testing.

4) I haven't done a performance comparison against the old
lock implementation yet.


Signed-off-by: Tony Luck <tony.luck@xxxxxxxxx>

---

 arch/ia64/Kconfig                      |    4 
 arch/ia64/include/asm/spinlock.h       |  170 ++++++++++++++++-----------------
 arch/ia64/include/asm/spinlock_types.h |    2 
 arch/ia64/kernel/head.S                |   89 -----------------
 arch/ia64/kernel/ia64_ksyms.c          |   20 ---
 arch/ia64/oprofile/backtrace.c         |   20 ---
 6 files changed, 87 insertions(+), 218 deletions(-)

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 170042b..e82a0db 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -60,9 +60,7 @@ config IOMMU_HELPER
        bool
 
 config GENERIC_LOCKBREAK
-	bool
-	default y
-	depends on SMP && PREEMPT
+	def_bool n
 
 config RWSEM_XCHGADD_ALGORITHM
 	bool
diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
index 13ab715..8ba3385 100644
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -19,103 +19,101 @@
 
 #define __raw_spin_lock_init(x)			((x)->lock = 0)
 
-#ifdef ASM_SUPPORTED
 /*
- * Try to get the lock.  If we fail to get the lock, make a non-standard call to
- * ia64_spinlock_contention().  We do not use a normal call because that would force all
- * callers of __raw_spin_lock() to be non-leaf routines.  Instead, ia64_spinlock_contention() is
- * carefully coded to touch only those registers that __raw_spin_lock() marks "clobbered".
+ * Ticket locks are conceptually two parts, one indicating the current head of
+ * the queue, and the other indicating the current tail. The lock is acquired
+ * by atomically noting the tail and incrementing it by one (thus adding
+ * ourself to the queue and noting our position), then waiting until the head
+ * becomes equal to the the initial value of the tail.
+ *
+ *   63                     32  31                      0
+ *  +----------------------------------------------------+
+ *  |  next_ticket_number      |     now_serving         |
+ *  +----------------------------------------------------+
  */
 
-#define IA64_SPINLOCK_CLOBBERS "ar.ccv", "ar.pfs", "p14", "p15", "r27", "r28", "r29", "r30", "b6", "memory"
+#define TICKET_SHIFT	32
 
-static inline void
-__raw_spin_lock_flags (raw_spinlock_t *lock, unsigned long flags)
+static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
 {
-	register volatile unsigned int *ptr asm ("r31") = &lock->lock;
-
-#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
-# ifdef CONFIG_ITANIUM
-	/* don't use brl on Itanium... */
-	asm volatile ("{\n\t"
-		      "  mov ar.ccv = r0\n\t"
-		      "  mov r28 = ip\n\t"
-		      "  mov r30 = 1;;\n\t"
-		      "}\n\t"
-		      "cmpxchg4.acq r30 = [%1], r30, ar.ccv\n\t"
-		      "movl r29 = ia64_spinlock_contention_pre3_4;;\n\t"
-		      "cmp4.ne p14, p0 = r30, r0\n\t"
-		      "mov b6 = r29;;\n\t"
-		      "mov r27=%2\n\t"
-		      "(p14) br.cond.spnt.many b6"
-		      : "=r"(ptr) : "r"(ptr), "r" (flags) : IA64_SPINLOCK_CLOBBERS);
-# else
-	asm volatile ("{\n\t"
-		      "  mov ar.ccv = r0\n\t"
-		      "  mov r28 = ip\n\t"
-		      "  mov r30 = 1;;\n\t"
-		      "}\n\t"
-		      "cmpxchg4.acq r30 = [%1], r30, ar.ccv;;\n\t"
-		      "cmp4.ne p14, p0 = r30, r0\n\t"
-		      "mov r27=%2\n\t"
-		      "(p14) brl.cond.spnt.many ia64_spinlock_contention_pre3_4;;"
-		      : "=r"(ptr) : "r"(ptr), "r" (flags) : IA64_SPINLOCK_CLOBBERS);
-# endif /* CONFIG_MCKINLEY */
-#else
-# ifdef CONFIG_ITANIUM
-	/* don't use brl on Itanium... */
-	/* mis-declare, so we get the entry-point, not it's function descriptor: */
-	asm volatile ("mov r30 = 1\n\t"
-		      "mov r27=%2\n\t"
-		      "mov ar.ccv = r0;;\n\t"
-		      "cmpxchg4.acq r30 = [%0], r30, ar.ccv\n\t"
-		      "movl r29 = ia64_spinlock_contention;;\n\t"
-		      "cmp4.ne p14, p0 = r30, r0\n\t"
-		      "mov b6 = r29;;\n\t"
-		      "(p14) br.call.spnt.many b6 = b6"
-		      : "=r"(ptr) : "r"(ptr), "r" (flags) : IA64_SPINLOCK_CLOBBERS);
-# else
-	asm volatile ("mov r30 = 1\n\t"
-		      "mov r27=%2\n\t"
-		      "mov ar.ccv = r0;;\n\t"
-		      "cmpxchg4.acq r30 = [%0], r30, ar.ccv;;\n\t"
-		      "cmp4.ne p14, p0 = r30, r0\n\t"
-		      "(p14) brl.call.spnt.many b6=ia64_spinlock_contention;;"
-		      : "=r"(ptr) : "r"(ptr), "r" (flags) : IA64_SPINLOCK_CLOBBERS);
-# endif /* CONFIG_MCKINLEY */
-#endif
+	int	*p = (int *)&lock->lock, turn;
+
+	turn = ia64_fetchadd(1, p+1, acq);
+
+	while (ACCESS_ONCE(*p) != turn)
+		cpu_relax();
 }
 
-#define __raw_spin_lock(lock) __raw_spin_lock_flags(lock, 0)
+static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
+{
+	long tmp = ACCESS_ONCE(lock->lock), try;
 
-/* Unlock by doing an ordered store and releasing the cacheline with nta */
-static inline void __raw_spin_unlock(raw_spinlock_t *x) {
-	barrier();
-	asm volatile ("st4.rel.nta [%0] = r0\n\t" :: "r"(x));
+	if (!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1L << TICKET_SHIFT) - 1))) {
+		try = tmp + (1L << TICKET_SHIFT);
+
+		return ia64_cmpxchg(acq, &lock->lock, tmp, try, sizeof (tmp)) == tmp;
+	}
+	return 0;
 }
 
-#else /* !ASM_SUPPORTED */
-#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
-# define __raw_spin_lock(x)								\
-do {											\
-	__u32 *ia64_spinlock_ptr = (__u32 *) (x);					\
-	__u64 ia64_spinlock_val;							\
-	ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);			\
-	if (unlikely(ia64_spinlock_val)) {						\
-		do {									\
-			while (*ia64_spinlock_ptr)					\
-				ia64_barrier();						\
-			ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);	\
-		} while (ia64_spinlock_val);						\
-	}										\
-} while (0)
-#define __raw_spin_unlock(x)	do { barrier(); ((raw_spinlock_t *) x)->lock = 0; } while (0)
-#endif /* !ASM_SUPPORTED */
+static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
+{
+	int	*p = (int *)&lock->lock;
+
+	(void)ia64_fetchadd(1, p, rel);
+}
+
+static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
+{
+	long tmp = ACCESS_ONCE(lock->lock);
 
-#define __raw_spin_is_locked(x)		((x)->lock != 0)
-#define __raw_spin_trylock(x)		(cmpxchg_acq(&(x)->lock, 0, 1) == 0)
-#define __raw_spin_unlock_wait(lock) \
-	do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0)
+	return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1L << TICKET_SHIFT) - 1));
+}
+
+static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
+{
+	long tmp = ACCESS_ONCE(lock->lock);
+
+	return (((tmp >> TICKET_SHIFT) - tmp) & ((1L << TICKET_SHIFT) - 1)) > 1;
+}
+
+static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
+{
+	return __ticket_spin_is_locked(lock);
+}
+
+static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
+{
+	return __ticket_spin_is_contended(lock);
+}
+#define __raw_spin_is_contended	__raw_spin_is_contended
+
+static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
+{
+	__ticket_spin_lock(lock);
+}
+
+static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
+{
+	return __ticket_spin_trylock(lock);
+}
+
+static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
+{
+	__ticket_spin_unlock(lock);
+}
+
+static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
+						  unsigned long flags)
+{
+	__raw_spin_lock(lock);
+}
+
+static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
+{
+	while (__raw_spin_is_locked(lock))
+		cpu_relax();
+}
 
 #define __raw_read_can_lock(rw)		(*(volatile int *)(rw) >= 0)
 #define __raw_write_can_lock(rw)	(*(volatile int *)(rw) == 0)
diff --git a/arch/ia64/include/asm/spinlock_types.h b/arch/ia64/include/asm/spinlock_types.h
index 474e46f..b61d136 100644
--- a/arch/ia64/include/asm/spinlock_types.h
+++ b/arch/ia64/include/asm/spinlock_types.h
@@ -6,7 +6,7 @@
 #endif
 
 typedef struct {
-	volatile unsigned int lock;
+	volatile unsigned long lock;
 } raw_spinlock_t;
 
 #define __RAW_SPIN_LOCK_UNLOCKED	{ 0 }
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index 23f846d..9f4ea89 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -1130,95 +1130,6 @@ SET_REG(b5);
 #endif /* CONFIG_IA64_BRL_EMU */
 
 #ifdef CONFIG_SMP
-	/*
-	 * This routine handles spinlock contention.  It uses a non-standard calling
-	 * convention to avoid converting leaf routines into interior routines.  Because
-	 * of this special convention, there are several restrictions:
-	 *
-	 * - do not use gp relative variables, this code is called from the kernel
-	 *   and from modules, r1 is undefined.
-	 * - do not use stacked registers, the caller owns them.
-	 * - do not use the scratch stack space, the caller owns it.
-	 * - do not use any registers other than the ones listed below
-	 *
-	 * Inputs:
-	 *   ar.pfs - saved CFM of caller
-	 *   ar.ccv - 0 (and available for use)
-	 *   r27    - flags from spin_lock_irqsave or 0.  Must be preserved.
-	 *   r28    - available for use.
-	 *   r29    - available for use.
-	 *   r30    - available for use.
-	 *   r31    - address of lock, available for use.
-	 *   b6     - return address
-	 *   p14    - available for use.
-	 *   p15    - used to track flag status.
-	 *
-	 * If you patch this code to use more registers, do not forget to update
-	 * the clobber lists for spin_lock() in arch/ia64/include/asm/spinlock.h.
-	 */
-
-#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
-
-GLOBAL_ENTRY(ia64_spinlock_contention_pre3_4)
-	.prologue
-	.save ar.pfs, r0	// this code effectively has a zero frame size
-	.save rp, r28
-	.body
-	nop 0
-	tbit.nz p15,p0=r27,IA64_PSR_I_BIT
-	.restore sp		// pop existing prologue after next insn
-	mov b6 = r28
-	.prologue
-	.save ar.pfs, r0
-	.altrp b6
-	.body
-	;;
-(p15)	ssm psr.i		// reenable interrupts if they were on
-				// DavidM says that srlz.d is slow and is not required in this case
-.wait:
-	// exponential backoff, kdb, lockmeter etc. go in here
-	hint @pause
-	ld4 r30=[r31]		// don't use ld4.bias; if it's contended, we won't write the word
-	nop 0
-	;;
-	cmp4.ne p14,p0=r30,r0
-(p14)	br.cond.sptk.few .wait
-(p15)	rsm psr.i		// disable interrupts if we reenabled them
-	br.cond.sptk.few b6	// lock is now free, try to acquire
-	.global ia64_spinlock_contention_pre3_4_end	// for kernprof
-ia64_spinlock_contention_pre3_4_end:
-END(ia64_spinlock_contention_pre3_4)
-
-#else
-
-GLOBAL_ENTRY(ia64_spinlock_contention)
-	.prologue
-	.altrp b6
-	.body
-	tbit.nz p15,p0=r27,IA64_PSR_I_BIT
-	;;
-.wait:
-(p15)	ssm psr.i		// reenable interrupts if they were on
-				// DavidM says that srlz.d is slow and is not required in this case
-.wait2:
-	// exponential backoff, kdb, lockmeter etc. go in here
-	hint @pause
-	ld4 r30=[r31]		// don't use ld4.bias; if it's contended, we won't write the word
-	;;
-	cmp4.ne p14,p0=r30,r0
-	mov r30 = 1
-(p14)	br.cond.sptk.few .wait2
-(p15)	rsm psr.i		// disable interrupts if we reenabled them
-	;;
-	cmpxchg4.acq r30=[r31], r30, ar.ccv
-	;;
-	cmp4.ne p14,p0=r0,r30
-(p14)	br.cond.sptk.few .wait
-
-	br.ret.sptk.many b6	// lock is now taken
-END(ia64_spinlock_contention)
-
-#endif
 
 #ifdef CONFIG_HOTPLUG_CPU
 GLOBAL_ENTRY(ia64_jump_to_sal)
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index 8ebccb5..14d39e3 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -84,26 +84,6 @@ EXPORT_SYMBOL(ia64_save_scratch_fpregs);
 #include <asm/unwind.h>
 EXPORT_SYMBOL(unw_init_running);
 
-#ifdef ASM_SUPPORTED
-# ifdef CONFIG_SMP
-#  if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
-/*
- * This is not a normal routine and we don't want a function descriptor for it, so we use
- * a fake declaration here.
- */
-extern char ia64_spinlock_contention_pre3_4;
-EXPORT_SYMBOL(ia64_spinlock_contention_pre3_4);
-#  else
-/*
- * This is not a normal routine and we don't want a function descriptor for it, so we use
- * a fake declaration here.
- */
-extern char ia64_spinlock_contention;
-EXPORT_SYMBOL(ia64_spinlock_contention);
-#  endif
-# endif
-#endif
-
 #if defined(CONFIG_IA64_ESI) || defined(CONFIG_IA64_ESI_MODULE)
 extern void esi_call_phys (void);
 EXPORT_SYMBOL_GPL(esi_call_phys);
diff --git a/arch/ia64/oprofile/backtrace.c b/arch/ia64/oprofile/backtrace.c
index adb0156..5cdd7e4 100644
--- a/arch/ia64/oprofile/backtrace.c
+++ b/arch/ia64/oprofile/backtrace.c
@@ -32,24 +32,6 @@ typedef struct
 	u64 *prev_pfs_loc;	/* state for WAR for old spinlock ool code */
 } ia64_backtrace_t;
 
-#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
-/*
- * Returns non-zero if the PC is in the spinlock contention out-of-line code
- * with non-standard calling sequence (on older compilers).
- */
-static __inline__ int in_old_ool_spinlock_code(unsigned long pc)
-{
-	extern const char ia64_spinlock_contention_pre3_4[] __attribute__ ((weak));
-	extern const char ia64_spinlock_contention_pre3_4_end[] __attribute__ ((weak));
-	unsigned long sc_start = (unsigned long)ia64_spinlock_contention_pre3_4;
-	unsigned long sc_end = (unsigned long)ia64_spinlock_contention_pre3_4_end;
-	return (sc_start && sc_end && pc >= sc_start && pc < sc_end);
-}
-#else
-/* Newer spinlock code does a proper br.call and works fine with the unwinder */
-#define in_old_ool_spinlock_code(pc)	0
-#endif
-
 /* Returns non-zero if the PC is in the Interrupt Vector Table */
 static __inline__ int in_ivt_code(unsigned long pc)
 {
@@ -80,7 +62,7 @@ static __inline__ int next_frame(ia64_backtrace_t *bt)
 	 */
 	if (bt->prev_pfs_loc && bt->regs && bt->frame.pfs_loc == bt->prev_pfs_loc)
 		bt->frame.pfs_loc = &bt->regs->ar_pfs;
-	bt->prev_pfs_loc = (in_old_ool_spinlock_code(bt->frame.ip) ? bt->frame.pfs_loc : NULL);
+	bt->prev_pfs_loc = NULL;
 
 	return unw_unwind(&bt->frame) == 0;
 }

--
To unsubscribe from this list: send the line "unsubscribe linux-ia64" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel]     [Sparc Linux]     [DCCP]     [Linux ARM]     [Yosemite News]     [Linux SCSI]     [Linux x86_64]     [Linux for Ham Radio]

  Powered by Linux