Re: MIPS: Get rid of branches to .subsections.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hello.

Ralf Baechle wrote:

It was a nice optimization - on paper at least.  In practice it results in
branches that may exceed the maximum legal range for a branch.  We can
fight that problem with -ffunction-sections but -ffunction-sections again
is incompatible with -pg used by the function tracer.

By rewriting the loop around all simple LL/SC blocks to C we reduce reduce
the amount of inline assembler and at the same time allow GCC to often
fill the branch delay slots with something sensible or whever else clever
                                                         ^^^^^^
   Whichever?

optimization it may have up in its sleeve.

With this optimization gone we also no longer need -ffunction-sections,
so drop it.

This optimization was originall introduced in 2.6.21, commit
                        ^^^^^^^^^
    Originally.

5999eca25c1fd4b9b9aca7833b04d10fe4bc877d (linux-mips.org) rsp.
f65e4fa8e0c6022ad58dc88d1b11b12589ed7f9f (kernel.org).

Original fix for the issues which caused me to pull this optimization by
Paul Gortmaker <paul.gortmaker@xxxxxxxxxxxxx>.

Signed-off-by: Ralf Baechle <ralf@xxxxxxxxxxxxxx>

Index: linux-queue/arch/mips/include/asm/atomic.h
===================================================================
--- linux-queue.orig/arch/mips/include/asm/atomic.h
+++ linux-queue/arch/mips/include/asm/atomic.h

[...]

@@ -443,18 +436,16 @@ static __inline__ void atomic64_add(long
 	} else if (kernel_uses_llsc) {
 		long temp;
- __asm__ __volatile__(
-		"	.set	mips3					\n"
-		"1:	lld	%0, %1		# atomic64_add		\n"
-		"	daddu	%0, %2					\n"
-		"	scd	%0, %1					\n"
-		"	beqz	%0, 2f					\n"
-		"	.subsection 2					\n"
-		"2:	b	1b					\n"
-		"	.previous					\n"
-		"	.set	mips0					\n"
-		: "=&r" (temp), "=m" (v->counter)
-		: "Ir" (i), "m" (v->counter));
+		do {
+			__asm__ __volatile__(
+			"	.set	mips3				\n"
+			"1:	lld	%0, %1		# atomic64_add	\n"

   You've kept the label here but it seems unused...

+			"	daddu	%0, %2				\n"
+			"	scd	%0, %1				\n"
+			"	.set	mips0				\n"
+			: "=&r" (temp), "=m" (v->counter)
+			: "Ir" (i), "m" (v->counter));
+		} while (unlikely(!temp));
 	} else {
 		unsigned long flags;
@@ -488,18 +479,16 @@ static __inline__ void atomic64_sub(long
 	} else if (kernel_uses_llsc) {
 		long temp;
- __asm__ __volatile__(
-		"	.set	mips3					\n"
-		"1:	lld	%0, %1		# atomic64_sub		\n"
-		"	dsubu	%0, %2					\n"
-		"	scd	%0, %1					\n"
-		"	beqz	%0, 2f					\n"
-		"	.subsection 2					\n"
-		"2:	b	1b					\n"
-		"	.previous					\n"
-		"	.set	mips0					\n"
-		: "=&r" (temp), "=m" (v->counter)
-		: "Ir" (i), "m" (v->counter));
+		do {
+			__asm__ __volatile__(
+			"	.set	mips3				\n"
+			"1:	lld	%0, %1		# atomic64_sub	\n"

   Same here...

+			"	dsubu	%0, %2				\n"
+			"	scd	%0, %1				\n"
+			"	.set	mips0				\n"
+			: "=&r" (temp), "=m" (v->counter)
+			: "Ir" (i), "m" (v->counter));
+		} while (unlikely(!temp));
 	} else {
 		unsigned long flags;
@@ -535,20 +524,19 @@ static __inline__ long atomic64_add_retu
 	} else if (kernel_uses_llsc) {
 		long temp;
- __asm__ __volatile__(
-		"	.set	mips3					\n"
-		"1:	lld	%1, %2		# atomic64_add_return	\n"
-		"	daddu	%0, %1, %3				\n"
-		"	scd	%0, %2					\n"
-		"	beqz	%0, 2f					\n"
-		"	daddu	%0, %1, %3				\n"
-		"	.subsection 2					\n"
-		"2:	b	1b					\n"
-		"	.previous					\n"
-		"	.set	mips0					\n"
-		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
-		: "Ir" (i), "m" (v->counter)
-		: "memory");
+		do {
+			__asm__ __volatile__(
+			"	.set	mips3				\n"
+			"1:	lld	%1, %2	# atomic64_add_return	\n"

   ... and here.

+			"	daddu	%0, %1, %3			\n"
+			"	scd	%0, %2				\n"
+			"	.set	mips0				\n"
+			: "=&r" (result), "=&r" (temp), "=m" (v->counter)
+			: "Ir" (i), "m" (v->counter)
+			: "memory");
+		} while (unlikely(!result));
+
+		result = temp + i;
 	} else {
 		unsigned long flags;
@@ -587,20 +575,19 @@ static __inline__ long atomic64_sub_retu
 	} else if (kernel_uses_llsc) {
 		long temp;
- __asm__ __volatile__(
-		"	.set	mips3					\n"
-		"1:	lld	%1, %2		# atomic64_sub_return	\n"
-		"	dsubu	%0, %1, %3				\n"
-		"	scd	%0, %2					\n"
-		"	beqz	%0, 2f					\n"
-		"	dsubu	%0, %1, %3				\n"
-		"	.subsection 2					\n"
-		"2:	b	1b					\n"
-		"	.previous					\n"
-		"	.set	mips0					\n"
-		: "=&r" (result), "=&r" (temp), "=m" (v->counter)
-		: "Ir" (i), "m" (v->counter)
-		: "memory");
+		do {
+			__asm__ __volatile__(
+			"	.set	mips3				\n"
+			"1:	lld	%1, %2	# atomic64_sub_return	\n"

   ... and here.

+			"	dsubu	%0, %1, %3			\n"
+			"	scd	%0, %2				\n"
+			"	.set	mips0				\n"
+			: "=&r" (result), "=&r" (temp), "=m" (v->counter)
+			: "Ir" (i), "m" (v->counter)
+			: "memory");
+		} while (unlikely(!result));
+
+		result = temp - i;
 	} else {
 		unsigned long flags;

Index: linux-queue/arch/mips/include/asm/bitops.h
===================================================================
--- linux-queue.orig/arch/mips/include/asm/bitops.h
+++ linux-queue/arch/mips/include/asm/bitops.h
@@ -213,24 +205,22 @@ static inline void change_bit(unsigned l
 		"	" __SC	"%0, %1				\n"
 		"	beqzl	%0, 1b				\n"
 		"	.set	mips0				\n"
-		: "=&r" (temp), "=m" (*m)
-		: "ir" (1UL << bit), "m" (*m));
+		: "=&r" (temp), "+m" (*m)
+		: "ir" (1UL << bit));
 	} else if (kernel_uses_llsc) {
 		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
 		unsigned long temp;
- __asm__ __volatile__(
-		"	.set	mips3				\n"
-		"1:	" __LL "%0, %1		# change_bit	\n"
-		"	xor	%0, %2				\n"
-		"	" __SC	"%0, %1				\n"
-		"	beqz	%0, 2f				\n"
-		"	.subsection 2				\n"
-		"2:	b	1b				\n"
-		"	.previous				\n"
-		"	.set	mips0				\n"
-		: "=&r" (temp), "=m" (*m)
-		: "ir" (1UL << bit), "m" (*m));
+		do {
+			__asm__ __volatile__(
+			"	.set	mips3				\n"
+			"1:	" __LL "%0, %1		# change_bit	\n"

   ... and here too.

+			"	xor	%0, %2				\n"
+			"	" __SC	"%0, %1				\n"
+			"	.set	mips0				\n"
+			: "=&r" (temp), "+m" (*m)
+			: "ir" (1UL << bit));
+		} while (unlikely(!temp));
 	} else {
 		volatile unsigned long *a = addr;
 		unsigned long mask;
@@ -272,30 +262,26 @@ static inline int test_and_set_bit(unsig
 		"	beqzl	%2, 1b					\n"
 		"	and	%2, %0, %3				\n"
 		"	.set	mips0					\n"
-		: "=&r" (temp), "=m" (*m), "=&r" (res)
-		: "r" (1UL << bit), "m" (*m)
+		: "=&r" (temp), "+m" (*m), "=&r" (res)
+		: "r" (1UL << bit)
 		: "memory");
 	} else if (kernel_uses_llsc) {
 		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
 		unsigned long temp;
- __asm__ __volatile__(
-		"	.set	push					\n"
-		"	.set	noreorder				\n"
-		"	.set	mips3					\n"
-		"1:	" __LL "%0, %1		# test_and_set_bit	\n"
-		"	or	%2, %0, %3				\n"
-		"	" __SC	"%2, %1					\n"
-		"	beqz	%2, 2f					\n"
-		"	 and	%2, %0, %3				\n"
-		"	.subsection 2					\n"
-		"2:	b	1b					\n"
-		"	 nop						\n"
-		"	.previous					\n"
-		"	.set	pop					\n"
-		: "=&r" (temp), "=m" (*m), "=&r" (res)
-		: "r" (1UL << bit), "m" (*m)
-		: "memory");
+		do {
+			__asm__ __volatile__(
+			"	.set	mips3				\n"
+			"1:	" __LL "%0, %1	# test_and_set_bit	\n"

   ... and here as well.

+			"	or	%2, %0, %3			\n"
+			"	" __SC	"%2, %1				\n"
+			"	.set	mips0				\n"
+			: "=&r" (temp), "+m" (*m), "=&r" (res)
+			: "r" (1UL << bit)
+			: "memory");
+		} while (unlikely(!res));
+
+		res = temp & (1UL << bit);
 	} else {
 		volatile unsigned long *a = addr;
 		unsigned long mask;
@@ -340,30 +326,26 @@ static inline int test_and_set_bit_lock(
 		"	beqzl	%2, 1b					\n"
 		"	and	%2, %0, %3				\n"
 		"	.set	mips0					\n"
-		: "=&r" (temp), "=m" (*m), "=&r" (res)
-		: "r" (1UL << bit), "m" (*m)
+		: "=&r" (temp), "+m" (*m), "=&r" (res)
+		: "r" (1UL << bit)
 		: "memory");
 	} else if (kernel_uses_llsc) {
 		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
 		unsigned long temp;
- __asm__ __volatile__(
-		"	.set	push					\n"
-		"	.set	noreorder				\n"
-		"	.set	mips3					\n"
-		"1:	" __LL "%0, %1		# test_and_set_bit	\n"
-		"	or	%2, %0, %3				\n"
-		"	" __SC	"%2, %1					\n"
-		"	beqz	%2, 2f					\n"
-		"	 and	%2, %0, %3				\n"
-		"	.subsection 2					\n"
-		"2:	b	1b					\n"
-		"	 nop						\n"
-		"	.previous					\n"
-		"	.set	pop					\n"
-		: "=&r" (temp), "=m" (*m), "=&r" (res)
-		: "r" (1UL << bit), "m" (*m)
-		: "memory");
+		do {
+			__asm__ __volatile__(
+			"	.set	mips3				\n"
+			"1:	" __LL "%0, %1	# test_and_set_bit	\n"

   ... and here.

+			"	or	%2, %0, %3			\n"
+			"	" __SC	"%2, %1				\n"
+			"	.set	mips0				\n"
+			: "=&r" (temp), "+m" (*m), "=&r" (res)
+			: "r" (1UL << bit)
+			: "memory");
+		} while(unlikely(!res));
+
+		res = temp & (1UL << bit);
 	} else {
 		volatile unsigned long *a = addr;
 		unsigned long mask;
@@ -410,49 +392,43 @@ static inline int test_and_clear_bit(uns
[...]
 	} else if (kernel_uses_llsc) {
 		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
 		unsigned long temp;
- __asm__ __volatile__(
-		"	.set	push					\n"
-		"	.set	noreorder				\n"
-		"	.set	mips3					\n"
-		"1:	" __LL	"%0, %1		# test_and_clear_bit	\n"
-		"	or	%2, %0, %3				\n"
-		"	xor	%2, %3					\n"
-		"	" __SC 	"%2, %1					\n"
-		"	beqz	%2, 2f					\n"
-		"	 and	%2, %0, %3				\n"
-		"	.subsection 2					\n"
-		"2:	b	1b					\n"
-		"	 nop						\n"
-		"	.previous					\n"
-		"	.set	pop					\n"
-		: "=&r" (temp), "=m" (*m), "=&r" (res)
-		: "r" (1UL << bit), "m" (*m)
-		: "memory");
+		do {
+			__asm__ __volatile__(
+			"	.set	mips3				\n"
+			"1:	" __LL	"%0, %1	# test_and_clear_bit	\n"

   ... and here.

+			"	or	%2, %0, %3			\n"
+			"	xor	%2, %3				\n"
+			"	" __SC 	"%2, %1				\n"
+			"	.set	mips0				\n"
+			: "=&r" (temp), "+m" (*m), "=&r" (res)
+			: "r" (1UL << bit)
+			: "memory");
+		} while (unlikely(!res));
+
+		res = temp & (1UL << bit);
 	} else {
 		volatile unsigned long *a = addr;
 		unsigned long mask;

WBR, Sergei




[Index of Archives]     [Linux MIPS Home]     [LKML Archive]     [Linux ARM Kernel]     [Linux ARM]     [Linux]     [Git]     [Yosemite News]     [Linux SCSI]     [Linux Hams]

  Powered by Linux