Re: patch: include/asm-mips/system.h __cmpxchg64 bugfix and cleanup

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Should I apply my patch on top of this one?

Ralf Baechle wrote:
On Thu, Oct 12, 2006 at 02:03:39PM -0400, bile@xxxxxxxxxxxxxx wrote:

In include/asm-mips/system.h __cmpxchg_u64 is doing

if(cpu_has_llsc) {
} else if(cpu_has_llsc) {

the first should be (cpu_has_llsc && R10000_LLSC_WAR).

While this probably gets cleaned up during optimization I took
the liberty of cleaning up the code along with the fix so it's
not doing the double check and only includes the R10000 workaround
at compile time.

Please include a Signed-off-by: line when submitting patches.

diff --git a/include/asm-mips/system.h b/include/asm-mips/system.h
index dcb4701..bfae6ff 100644
--- a/include/asm-mips/system.h
+++ b/include/asm-mips/system.h
@@ -206,7 +206,7 @@ static inline unsigned long __xchg_u32(v
 {
 	__u32 retval;
- if (cpu_has_llsc && R10000_LLSC_WAR) {
+	if (cpu_has_llsc) {

Thanks for spotting this one.

@@ -216,25 +216,11 @@ static inline unsigned long __xchg_u32(v
 		"	move	%2, %z4					\n"
 		"	.set	mips3					\n"
 		"	sc	%2, %1					\n"
+#ifdef R10000_LLSC_WAR

This isn't quite right since R10000_LLSC_WAR is defined to 0 if the
workaround is not enabled, so "#if R10000_LLSC_WAR" would be needed
here.

 		"	beqzl	%2, 1b					\n"
-#ifdef CONFIG_SMP
-		"	sync						\n"
-#endif

Now the real reason why I don't want to apply your patch is below patch
which is playing fancy tricks with the branch prediction of modern
processors.  The R10000 ll/sc workaround _relies_ on the missprediction
of the branch likely branch to work correctly, so this optimization
could not be applied to it that is we need two different versions.

  Ralf

commit 23d60625d8a383a7faf632a50c2298c9c65aa0b2
Author: Ralf Baechle <ralf@xxxxxxxxxxxxxx>
Date:   Thu Sep 28 01:45:21 2006 +0100

    [MIPS] Improve branch prediction in ll/sc atomic operations.
Now that finally all supported versions of binutils have functioning
    support for .subsection use .subsection to tweak the branch prediction
I did not modify the R10000 errata variants because it seems unclear if
    this will invalidate the workaround which actually relies on the cheesy
    prediction of branch likely to cause a misspredict if the sc was
    successful.
Signed-off-by: Ralf Baechle <ralf@xxxxxxxxxxxxxx>

diff --git a/include/asm-mips/atomic.h b/include/asm-mips/atomic.h
index e64abc0..91bf6a7 100644
--- a/include/asm-mips/atomic.h
+++ b/include/asm-mips/atomic.h
@@ -9,7 +9,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 1996, 97, 99, 2000, 03, 04 by Ralf Baechle
+ * Copyright (C) 1996, 97, 99, 2000, 03, 04, 06 Ralf Baechle
  */
/*
@@ -76,7 +76,10 @@ static __inline__ void atomic_add(int i,
 		"1:	ll	%0, %1		# atomic_add		\n"
 		"	addu	%0, %2					\n"
 		"	sc	%0, %1					\n"
-		"	beqz	%0, 1b					\n"
+		"	beqz	%0, 2f					\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter));
@@ -118,7 +121,10 @@ static __inline__ void atomic_sub(int i,
 		"1:	ll	%0, %1		# atomic_sub		\n"
 		"	subu	%0, %2					\n"
 		"	sc	%0, %1					\n"
-		"	beqz	%0, 1b					\n"
+		"	beqz	%0, 2f					\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter));
@@ -161,9 +167,12 @@ static __inline__ int atomic_add_return(
 		"1:	ll	%1, %2		# atomic_add_return	\n"
 		"	addu	%0, %1, %3				\n"
 		"	sc	%0, %2					\n"
-		"	beqz	%0, 1b					\n"
+		"	beqz	%0, 2f					\n"
 		"	addu	%0, %1, %3				\n"
 		"	sync						\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
@@ -208,9 +217,12 @@ static __inline__ int atomic_sub_return(
 		"1:	ll	%1, %2		# atomic_sub_return	\n"
 		"	subu	%0, %1, %3				\n"
 		"	sc	%0, %2					\n"
-		"	beqz	%0, 1b					\n"
+		"	beqz	%0, 2f					\n"
 		"	subu	%0, %1, %3				\n"
 		"	sync						\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
@@ -269,11 +281,14 @@ static __inline__ int atomic_sub_if_posi
 		"	bltz	%0, 1f					\n"
 		"	sc	%0, %2					\n"
 		"	.set	noreorder				\n"
-		"	beqz	%0, 1b					\n"
+		"	beqz	%0, 2f					\n"
 		"	 subu	%0, %1, %3				\n"
 		"	.set	reorder					\n"
 		"	sync						\n"
 		"1:							\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
@@ -430,7 +445,10 @@ static __inline__ void atomic64_add(long
 		"1:	lld	%0, %1		# atomic64_add		\n"
 		"	addu	%0, %2					\n"
 		"	scd	%0, %1					\n"
-		"	beqz	%0, 1b					\n"
+		"	beqz	%0, 2f					\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter));
@@ -472,7 +490,10 @@ static __inline__ void atomic64_sub(long
 		"1:	lld	%0, %1		# atomic64_sub		\n"
 		"	subu	%0, %2					\n"
 		"	scd	%0, %1					\n"
-		"	beqz	%0, 1b					\n"
+		"	beqz	%0, 2f					\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter));
@@ -515,9 +536,12 @@ static __inline__ long atomic64_add_retu
 		"1:	lld	%1, %2		# atomic64_add_return	\n"
 		"	addu	%0, %1, %3				\n"
 		"	scd	%0, %2					\n"
-		"	beqz	%0, 1b					\n"
+		"	beqz	%0, 2f					\n"
 		"	addu	%0, %1, %3				\n"
 		"	sync						\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
@@ -562,9 +586,12 @@ static __inline__ long atomic64_sub_retu
 		"1:	lld	%1, %2		# atomic64_sub_return	\n"
 		"	subu	%0, %1, %3				\n"
 		"	scd	%0, %2					\n"
-		"	beqz	%0, 1b					\n"
+		"	beqz	%0, 2f					\n"
 		"	subu	%0, %1, %3				\n"
 		"	sync						\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
@@ -623,11 +650,14 @@ static __inline__ long atomic64_sub_if_p
 		"	bltz	%0, 1f					\n"
 		"	scd	%0, %2					\n"
 		"	.set	noreorder				\n"
-		"	beqz	%0, 1b					\n"
+		"	beqz	%0, 2f					\n"
 		"	 dsubu	%0, %1, %3				\n"
 		"	.set	reorder					\n"
 		"	sync						\n"
 		"1:							\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
 		: "Ir" (i), "m" (v->counter)
diff --git a/include/asm-mips/bitops.h b/include/asm-mips/bitops.h
index 1bb89c5..d10bd56 100644
--- a/include/asm-mips/bitops.h
+++ b/include/asm-mips/bitops.h
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 1994 - 1997, 1999, 2000  Ralf Baechle (ralf@xxxxxxx)
+ * Copyright (c) 1994 - 1997, 1999, 2000, 06 Ralf Baechle (ralf@xxxxxxxxxxxxxx)
  * Copyright (c) 1999, 2000  Silicon Graphics, Inc.
  */
 #ifndef _ASM_BITOPS_H
@@ -86,7 +86,10 @@ static inline void set_bit(unsigned long
 		"1:	" __LL "%0, %1			# set_bit	\n"
 		"	or	%0, %2					\n"
 		"	" __SC	"%0, %1					\n"
-		"	beqz	%0, 1b					\n"
+		"	beqz	%0, 2f					\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (*m)
 		: "ir" (1UL << (nr & SZLONG_MASK)), "m" (*m));
@@ -134,7 +137,10 @@ static inline void clear_bit(unsigned lo
 		"1:	" __LL "%0, %1			# clear_bit	\n"
 		"	and	%0, %2					\n"
 		"	" __SC "%0, %1					\n"
-		"	beqz	%0, 1b					\n"
+		"	beqz	%0, 2f					\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (*m)
 		: "ir" (~(1UL << (nr & SZLONG_MASK))), "m" (*m));
@@ -184,7 +190,10 @@ static inline void change_bit(unsigned l
 		"1:	" __LL "%0, %1		# change_bit	\n"
 		"	xor	%0, %2				\n"
 		"	" __SC	"%0, %1				\n"
-		"	beqz	%0, 1b				\n"
+		"	beqz	%0, 2f				\n"
+		"	.subsection 2				\n"
+		"2:	b	1b				\n"
+		"	.previous				\n"
 		"	.set	mips0				\n"
 		: "=&r" (temp), "=m" (*m)
 		: "ir" (1UL << (nr & SZLONG_MASK)), "m" (*m));
@@ -243,11 +252,14 @@ #endif
 		"1:	" __LL "%0, %1		# test_and_set_bit	\n"
 		"	or	%2, %0, %3				\n"
 		"	" __SC	"%2, %1					\n"
-		"	beqz	%2, 1b					\n"
+		"	beqz	%2, 2f					\n"
 		"	 and	%2, %0, %3				\n"
 #ifdef CONFIG_SMP
 		"	sync						\n"
 #endif
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	pop					\n"
 		: "=&r" (temp), "=m" (*m), "=&r" (res)
 		: "r" (1UL << (nr & SZLONG_MASK)), "m" (*m)
@@ -315,11 +327,14 @@ #endif
 		"	or	%2, %0, %3				\n"
 		"	xor	%2, %3					\n"
 		"	" __SC 	"%2, %1					\n"
-		"	beqz	%2, 1b					\n"
+		"	beqz	%2, 2f					\n"
 		"	 and	%2, %0, %3				\n"
 #ifdef CONFIG_SMP
 		"	sync						\n"
 #endif
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	pop					\n"
 		: "=&r" (temp), "=m" (*m), "=&r" (res)
 		: "r" (1UL << (nr & SZLONG_MASK)), "m" (*m)
@@ -385,11 +400,14 @@ #endif
 		"1:	" __LL	"%0, %1		# test_and_change_bit	\n"
 		"	xor	%2, %0, %3				\n"
 		"	" __SC	"\t%2, %1				\n"
-		"	beqz	%2, 1b					\n"
+		"	beqz	%2, 2f					\n"
 		"	 and	%2, %0, %3				\n"
 #ifdef CONFIG_SMP
 		"	sync						\n"
 #endif
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	pop					\n"
 		: "=&r" (temp), "=m" (*m), "=&r" (res)
 		: "r" (1UL << (nr & SZLONG_MASK)), "m" (*m)
diff --git a/include/asm-mips/spinlock.h b/include/asm-mips/spinlock.h
index 4c1a1b5..3df6103 100644
--- a/include/asm-mips/spinlock.h
+++ b/include/asm-mips/spinlock.h
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 1999, 2000 by Ralf Baechle
+ * Copyright (C) 1999, 2000, 06 Ralf Baechle (ralf@xxxxxxxxxxxxxx)
  * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
  */
 #ifndef _ASM_SPINLOCK_H
@@ -49,11 +49,18 @@ static inline void __raw_spin_lock(raw_s
 		__asm__ __volatile__(
 		"	.set	noreorder	# __raw_spin_lock	\n"
 		"1:	ll	%1, %2					\n"
-		"	bnez	%1, 1b					\n"
+		"	bnez	%1, 2f					\n"
 		"	 li	%1, 1					\n"
 		"	sc	%1, %0					\n"
-		"	beqz	%1, 1b					\n"
+		"	beqz	%1, 2f					\n"
 		"	 sync						\n"
+		"	.subsection 2					\n"
+		"2:	ll	%1, %2					\n"
+		"	bnez	%1, 2b					\n"
+		"	 li	%1, 1					\n"
+		"	b	1b					\n"
+		"	 nop						\n"
+		"	.previous					\n"
 		"	.set	reorder					\n"
 		: "=m" (lock->lock), "=&r" (tmp)
 		: "m" (lock->lock)
@@ -97,9 +104,12 @@ static inline unsigned int __raw_spin_tr
 		"1:	ll	%0, %3					\n"
 		"	ori	%2, %0, 1				\n"
 		"	sc	%2, %1					\n"
-		"	beqz	%2, 1b					\n"
+		"	beqz	%2, 2f					\n"
 		"	 andi	%2, %0, 1				\n"
 		"	sync						\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	reorder"
 		: "=&r" (temp), "=m" (lock->lock), "=&r" (res)
 		: "m" (lock->lock)
@@ -152,11 +162,17 @@ static inline void __raw_read_lock(raw_r
 		__asm__ __volatile__(
 		"	.set	noreorder	# __raw_read_lock	\n"
 		"1:	ll	%1, %2					\n"
-		"	bltz	%1, 1b					\n"
+		"	bltz	%1, 2f					\n"
 		"	 addu	%1, 1					\n"
 		"	sc	%1, %0					\n"
 		"	beqz	%1, 1b					\n"
 		"	 sync						\n"
+		"	.subsection 2					\n"
+		"2:	ll	%1, %2					\n"
+		"	bltz	%1, 2b					\n"
+		"	 addu	%1, 1					\n"
+		"	b	1b					\n"
+		"	.previous					\n"
 		"	.set	reorder					\n"
 		: "=m" (rw->lock), "=&r" (tmp)
 		: "m" (rw->lock)
@@ -187,8 +203,10 @@ static inline void __raw_read_unlock(raw
 		"1:	ll	%1, %2					\n"
 		"	sub	%1, 1					\n"
 		"	sc	%1, %0					\n"
-		"	beqz	%1, 1b					\n"
+		"	beqz	%1, 2f					\n"
 		"	 sync						\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
 		"	.set	reorder					\n"
 		: "=m" (rw->lock), "=&r" (tmp)
 		: "m" (rw->lock)
@@ -217,11 +235,17 @@ static inline void __raw_write_lock(raw_
 		__asm__ __volatile__(
 		"	.set	noreorder	# __raw_write_lock	\n"
 		"1:	ll	%1, %2					\n"
-		"	bnez	%1, 1b					\n"
+		"	bnez	%1, 2f					\n"
 		"	 lui	%1, 0x8000				\n"
 		"	sc	%1, %0					\n"
-		"	beqz	%1, 1b					\n"
+		"	beqz	%1, 2f					\n"
 		"	 sync						\n"
+		"	.subsection 2					\n"
+		"2:	ll	%1, %2					\n"
+		"	bnez	%1, 1b					\n"
+		"	 lui	%1, 0x8000				\n"
+		"	b	1b					\n"
+		"	.previous					\n"
 		"	.set	reorder					\n"
 		: "=m" (rw->lock), "=&r" (tmp)
 		: "m" (rw->lock)
@@ -314,11 +338,15 @@ static inline int __raw_write_trylock(ra
 		"	bnez	%1, 2f					\n"
 		"	lui	%1, 0x8000				\n"
 		"	sc	%1, %0					\n"
-		"	beqz	%1, 1b					\n"
-		"	 sync						\n"
-		"	li	%2, 1					\n"
-		"	.set	reorder					\n"
+		"	beqz	%1, 3f					\n"
+		"	 li	%2, 1					\n"
 		"2:							\n"
+		"	sync						\n"
+		"	.subsection 2					\n"
+		"3:	b	1b					\n"
+		"	li	%2, 0					\n"
+		"	.previous					\n"
+		"	.set	reorder					\n"
 		: "=m" (rw->lock), "=&r" (tmp), "=&r" (ret)
 		: "m" (rw->lock)
 		: "memory");
diff --git a/include/asm-mips/system.h b/include/asm-mips/system.h
index dcb4701..fc35526 100644
--- a/include/asm-mips/system.h
+++ b/include/asm-mips/system.h
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 1994, 95, 96, 97, 98, 99, 2003 by Ralf Baechle
+ * Copyright (C) 1994, 95, 96, 97, 98, 99, 2003, 06 by Ralf Baechle
  * Copyright (C) 1996 by Paul M. Antoine
  * Copyright (C) 1999 Silicon Graphics
  * Kevin D. Kissell, kevink@xxxxxxxx and Carsten Langgaard, carstenl@xxxxxxxx
@@ -234,10 +234,13 @@ #endif
 		"	move	%2, %z4					\n"
 		"	.set	mips3					\n"
 		"	sc	%2, %1					\n"
-		"	beqz	%2, 1b					\n"
+		"	beqz	%2, 2f					\n"
 #ifdef CONFIG_SMP
 		"	sync						\n"
 #endif
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (retval), "=m" (*m), "=&r" (dummy)
 		: "R" (*m), "Jr" (val)
@@ -283,10 +286,13 @@ #endif
 		"1:	lld	%0, %3			# xchg_u64	\n"
 		"	move	%2, %z4					\n"
 		"	scd	%2, %1					\n"
-		"	beqz	%2, 1b					\n"
+		"	beqz	%2, 2f					\n"
 #ifdef CONFIG_SMP
 		"	sync						\n"
 #endif
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (retval), "=m" (*m), "=&r" (dummy)
 		: "R" (*m), "Jr" (val)
@@ -364,11 +370,14 @@ #endif
 		"	move	$1, %z4					\n"
 		"	.set	mips3					\n"
 		"	sc	$1, %1					\n"
-		"	beqz	$1, 1b					\n"
+		"	beqz	$1, 3f					\n"
 #ifdef CONFIG_SMP
 		"	sync						\n"
 #endif
 		"2:							\n"
+		"	.subsection 2					\n"
+		"3:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	pop					\n"
 		: "=&r" (retval), "=R" (*m)
 		: "R" (*m), "Jr" (old), "Jr" (new)
@@ -419,11 +428,14 @@ #endif
 		"	bne	%0, %z3, 2f				\n"
 		"	move	$1, %z4					\n"
 		"	scd	$1, %1					\n"
-		"	beqz	$1, 1b					\n"
+		"	beqz	$1, 3f					\n"
 #ifdef CONFIG_SMP
 		"	sync						\n"
 #endif
 		"2:							\n"
+		"	.subsection 2					\n"
+		"3:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	pop					\n"
 		: "=&r" (retval), "=R" (*m)
 		: "R" (*m), "Jr" (old), "Jr" (new)





[Index of Archives]     [Linux MIPS Home]     [LKML Archive]     [Linux ARM Kernel]     [Linux ARM]     [Linux]     [Git]     [Yosemite News]     [Linux SCSI]     [Linux Hams]

  Powered by Linux