Re: [PATCH 2/5] kbuild: allow archs to select build for link dead code/data elimination

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, 5 Aug 2016, Nicholas Piggin wrote:

> Introduce LINKER_DCE option for architectures to select if they want
> to build with -ffunction-sections, -fdata-sections, and link with
> --gc-sections. It requires some work (documented) to ensure all
> unreferenced entrypoints are live, and requires toolchain and
> build verification, so it is made a per-arch option for now.
> 
> On a random powerpc64le build, this yelds a significant size saving,
> it boots and runs fine, but there is a lot I haven't tested as yet,
> so these savings may be reduced if there are bugs in the link.
> 
>     text      data        bss        dec   filename
> 11169741   1180744    1923176	14273661   vmlinux
> 10445269   1004127    1919707	13369103   vmlinux.dce
> 
> ~700K text, ~170K data, 6% removed from kernel image size.
> 
> Signed-off-by: Nicholas Piggin <npiggin@xxxxxxxxx>

I played with that too. However this needs distinct sections for 
exception tables and the like otherwise the backward references from the 
final exception table to those functions responsible for those exception 
entries has the effect of pulling in all those functions even if their 
entry point is never referenced, making --gc-sections less effective.  
I managed to fix this only with a change to gas (accepted upstream).

But once that is solved, you then have the missing forward reference 
problem i.e. nothing actually references those individual exception 
entry sections and ld happily drops them all. Having a KEEP() on each of 
them is unworkable and defeats the purpose anyway.  That requires a 
dummy reloc to trick ld into pulling in those sections when the parent 
section is also pulled in.

Please see attached a subset of the slides I presented at ELC and Linaro 
Connect last year to illustrate those issues.

Also attached a sample patch partially implementing those changes.

In short I'm very glad to see that this might steer interest across 
multiple architectures.  I felt like this was becoming much more 
intrusive than I expected and that maybe LTO was a better bet after all. 
But LTO has its evils too and I'm willing to look at gc-sections again 
if there is interest from others as well.


Nicolas

Attachment: gc_slides.html.gz
Description: application/gzip

commit 1d7ec46257dc546bc7b87439788514fc4650a2b1
Author: Nicolas Pitre <nicolas.pitre@xxxxxxxxxx>
Date:   Mon Oct 26 10:16:14 2015 -0400

    ARM: pushlinkedsection introduction
    
    Signed-off-by: Nicolas Pitre <nico@xxxxxxxxxx>

diff --git a/Makefile b/Makefile
index d5b3739119..75541414cb 100644
--- a/Makefile
+++ b/Makefile
@@ -775,6 +775,10 @@ ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y)
 	KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO
 endif
 
+# Named subsections
+KBUILD_AFLAGS	+= -Wa,--sectname-subst
+KBUILD_CFLAGS	+= -Wa,--sectname-subst
+
 include scripts/Makefile.kasan
 include scripts/Makefile.extrawarn
 
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index b2bc8e1147..70161c9bfa 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -88,6 +88,17 @@
 #endif
 
 /*
+ * Special .pushsection wrapper with explicit dependency to prevent
+ * garbage collection of the specified section.  This is needed when no
+ * explicit symbol references are made to this section.
+ */
+	.macro	.pushlinkedsection name:vararg
+	.reloc	. - 1, R_ARM_NONE, 9909f
+	.pushsection \name
+9909:
+	.endm
+
+/*
  * Enable and disable interrupts
  */
 #if __LINUX_ARM_ARCH__ >= 6
@@ -239,7 +250,7 @@
 
 #define USER(x...)				\
 9999:	x;					\
-	.pushsection __ex_table,"a";		\
+	.pushlinkedsection __ex_table.%S,"a";	\
 	.align	3;				\
 	.long	9999b,9001f;			\
 	.popsection
@@ -253,7 +264,7 @@
  * ALT_SMP( W(instr) ... )
  */
 #define ALT_UP(instr...)					\
-	.pushsection ".alt.smp.init", "a"			;\
+	.pushlinkedsection ".alt.smp.init.%S", "a"		;\
 	.long	9998b						;\
 9997:	instr							;\
 	.if . - 9997b == 2					;\
@@ -265,7 +276,7 @@
 	.popsection
 #define ALT_UP_B(label)					\
 	.equ	up_b_offset, label - 9998b			;\
-	.pushsection ".alt.smp.init", "a"			;\
+	.pushlinkedsection ".alt.smp.init.%S", "a"		;\
 	.long	9998b						;\
 	W(b)	. + up_b_offset					;\
 	.popsection
@@ -375,7 +386,7 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 	.error	"Unsupported inc macro argument"
 	.endif
 
-	.pushsection __ex_table,"a"
+	.pushlinkedsection __ex_table.%S,"a"
 	.align	3
 	.long	9999b, \abort
 	.popsection
@@ -416,7 +427,7 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 	.error	"Unsupported inc macro argument"
 	.endif
 
-	.pushsection __ex_table,"a"
+	.pushlinkedsection __ex_table.%S,"a"
 	.align	3
 	.long	9999b, \abort
 	.popsection
diff --git a/arch/arm/include/asm/bug.h b/arch/arm/include/asm/bug.h
index e7335a9214..0cbb6ef4b5 100644
--- a/arch/arm/include/asm/bug.h
+++ b/arch/arm/include/asm/bug.h
@@ -3,6 +3,7 @@
 
 #include <linux/linkage.h>
 #include <linux/types.h>
+#include <asm/compiler.h>
 #include <asm/opcodes.h>
 
 #ifdef CONFIG_BUG
@@ -39,9 +40,9 @@ do {								\
 		".pushsection .rodata.str, \"aMS\", %progbits, 1\n" \
 		"2:\t.asciz " #__file "\n" 			\
 		".popsection\n" 				\
-		".pushsection __bug_table,\"a\"\n"		\
+		__pushlinkedsection("__bug_table.%S,\"a\"") "\n"\
 		".align 2\n"					\
-		"3:\t.word 1b, 2b\n"				\
+		"\t.word 1b, 2b\n"				\
 		"\t.hword " #__line ", 0\n"			\
 		".popsection");					\
 	unreachable();						\
diff --git a/arch/arm/include/asm/compiler.h b/arch/arm/include/asm/compiler.h
index 29fe85e594..3bfdd749a3 100644
--- a/arch/arm/include/asm/compiler.h
+++ b/arch/arm/include/asm/compiler.h
@@ -24,5 +24,14 @@
 	  ".endif; "				\
 	".endif\n\t"
 
+/*
+ * Special .pushsection wrapper with explicit dependency to prevent
+ * garbage collection of the specified section.  This is needed when no
+ * explicit symbol references are made to this section.
+ */
+#define __pushlinkedsection(name)		\
+	".reloc	. - 1, R_ARM_NONE, 9909f\n"	\
+	"\t.pushsection " name "\n"		\
+	"9909:	"
 
 #endif /* __ASM_ARM_COMPILER_H */
diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h
index 6795368ad0..3540e42084 100644
--- a/arch/arm/include/asm/futex.h
+++ b/arch/arm/include/asm/futex.h
@@ -5,11 +5,12 @@
 
 #include <linux/futex.h>
 #include <linux/uaccess.h>
+#include <asm/compiler.h>
 #include <asm/errno.h>
 
 #define __futex_atomic_ex_table(err_reg)			\
 	"3:\n"							\
-	"	.pushsection __ex_table,\"a\"\n"		\
+	"	" __pushlinkedsection("__ex_table.%S,\"a\"") "\n" \
 	"	.align	3\n"					\
 	"	.long	1b, 4f, 2b, 4f\n"			\
 	"	.popsection\n"					\
diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h
index 34f7b6980d..54e2a5ec11 100644
--- a/arch/arm/include/asm/jump_label.h
+++ b/arch/arm/include/asm/jump_label.h
@@ -4,6 +4,7 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/types.h>
+#include <asm/compiler.h>
 #include <asm/unified.h>
 
 #define JUMP_LABEL_NOP_SIZE 4
@@ -12,7 +13,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran
 {
 	asm_volatile_goto("1:\n\t"
 		 WASM(nop) "\n\t"
-		 ".pushsection __jump_table,  \"aw\"\n\t"
+		 __pushlinkedsection("__jump_table.%S, \"aw\") "\n\t"
 		 ".word 1b, %l[l_yes], %c0\n\t"
 		 ".popsection\n\t"
 		 : :  "i" (&((char *)key)[branch]) :  : l_yes);
@@ -26,7 +27,7 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool
 {
 	asm_volatile_goto("1:\n\t"
 		 WASM(b) " %l[l_yes]\n\t"
-		 ".pushsection __jump_table,  \"aw\"\n\t"
+		 __pushlinkedsection("__jump_table.%S, \"aw\"") "\n\t"
 		 ".word 1b, %l[l_yes], %c0\n\t"
 		 ".popsection\n\t"
 		 : :  "i" (&((char *)key)[branch]) :  : l_yes);
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 98d58bb04a..d5cc34e9a7 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -18,6 +18,8 @@
 #include <linux/types.h>
 #include <linux/sizes.h>
 
+#include <asm/compiler.h>
+
 #ifdef CONFIG_NEED_MACH_MEMORY_H
 #include <mach/memory.h>
 #endif
@@ -172,7 +174,7 @@ extern const void *__pv_table_begin, *__pv_table_end;
 #define __pv_stub(from,to,instr,type)			\
 	__asm__("@ __pv_stub\n"				\
 	"1:	" instr "	%0, %1, %2\n"		\
-	"	.pushsection .pv_table,\"a\"\n"		\
+	"	" __pushlinkedsection(".pv_table.%S,\"a\"") "\n" \
 	"	.long	1b\n"				\
 	"	.popsection\n"				\
 	: "=r" (to)					\
@@ -181,7 +183,7 @@ extern const void *__pv_table_begin, *__pv_table_end;
 #define __pv_stub_mov_hi(t)				\
 	__asm__ volatile("@ __pv_stub_mov\n"		\
 	"1:	mov	%R0, %1\n"			\
-	"	.pushsection .pv_table,\"a\"\n"		\
+	"	" __pushlinkedsection(".pv_table.%S,\"a\"") "\n" \
 	"	.long	1b\n"				\
 	"	.popsection\n"				\
 	: "=r" (t)					\
@@ -191,7 +193,7 @@ extern const void *__pv_table_begin, *__pv_table_end;
 	__asm__ volatile("@ __pv_add_carry_stub\n"	\
 	"1:	adds	%Q0, %1, %2\n"			\
 	"	adc	%R0, %R0, #0\n"			\
-	"	.pushsection .pv_table,\"a\"\n"		\
+	"	" __pushlinkedsection(".pv_table.%S,\"a\"") "\n" \
 	"	.long	1b\n"				\
 	"	.popsection\n"				\
 	: "+r" (y)					\
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
index 8a1e8e995d..8c535eacea 100644
--- a/arch/arm/include/asm/processor.h
+++ b/arch/arm/include/asm/processor.h
@@ -19,6 +19,7 @@
 
 #ifdef __KERNEL__
 
+#include <asm/compiler.h>
 #include <asm/hw_breakpoint.h>
 #include <asm/ptrace.h>
 #include <asm/types.h>
@@ -93,7 +94,7 @@ unsigned long get_wchan(struct task_struct *p);
 #ifdef CONFIG_SMP
 #define __ALT_SMP_ASM(smp, up)						\
 	"9998:	" smp "\n"						\
-	"	.pushsection \".alt.smp.init\", \"a\"\n"		\
+	"	" __pushlinkedsection("\".alt.smp.init.%S\", \"a\"") "\n" \
 	"	.long	9998b\n"					\
 	"	" up "\n"						\
 	"	.popsection\n"
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 8cc85a4ebe..5e7e404894 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -357,7 +357,7 @@ do {									\
 	"	mov	%1, #0\n"				\
 	"	b	2b\n"					\
 	"	.popsection\n"					\
-	"	.pushsection __ex_table,\"a\"\n"		\
+	"	" __pushlinkedsection("__ex_table.%S,\"a\"") "\n" \
 	"	.align	3\n"					\
 	"	.long	1b, 3b\n"				\
 	"	.popsection"					\
@@ -429,7 +429,7 @@ do {									\
 	"3:	mov	%0, %3\n"				\
 	"	b	2b\n"					\
 	"	.popsection\n"					\
-	"	.pushsection __ex_table,\"a\"\n"		\
+	"	" __pushlinkedsection("__ex_table.%S,\"a\"") "\n" \
 	"	.align	3\n"					\
 	"	.long	1b, 3b\n"				\
 	"	.popsection"					\
@@ -479,7 +479,7 @@ do {									\
 	"4:	mov	%0, %3\n"				\
 	"	b	3b\n"					\
 	"	.popsection\n"					\
-	"	.pushsection __ex_table,\"a\"\n"		\
+	"	" __pushlinkedsection("__ex_table.%S,\"a\"") "\n" \
 	"	.align	3\n"					\
 	"	.long	1b, 4b\n"				\
 	"	.long	2b, 4b\n"				\
diff --git a/arch/arm/include/asm/word-at-a-time.h b/arch/arm/include/asm/word-at-a-time.h
index 5831dce4b5..348a462d3e 100644
--- a/arch/arm/include/asm/word-at-a-time.h
+++ b/arch/arm/include/asm/word-at-a-time.h
@@ -8,6 +8,7 @@
  * Heavily based on the x86 algorithm.
  */
 #include <linux/kernel.h>
+#include <asm/compiler.h>
 
 struct word_at_a_time {
 	const unsigned long one_bits, high_bits;
@@ -84,7 +85,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr)
 #endif
 	"	b	2b\n"
 	"	.popsection\n"
-	"	.pushsection __ex_table,\"a\"\n"
+	"	" __pushlinkedsection("__ex_table.%S,\"a\"") "\n"
 	"	.align	3\n"
 	"	.long	1b, 3b\n"
 	"	.popsection"
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 3e1c26eb32..5047757c34 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -564,7 +564,7 @@ ENDPROC(__und_usr)
 4:	str     r4, [sp, #S_PC]			@ retry current instruction
 	ret	r9
 	.popsection
-	.pushsection __ex_table,"a"
+	.pushlinkedsection __ex_table.%S,"a"
 	.long	1b, 4b
 #if CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7
 	.long	2b, 4b
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 8b60fde5ce..6885382931 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -36,7 +36,7 @@
 #define ARM_CPU_KEEP(x)
 #endif
 
-#if (defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK)) || \
+#if 0 // (defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK)) || \
 	defined(CONFIG_GENERIC_BUG)
 #define ARM_EXIT_KEEP(x)	x
 #define ARM_EXIT_DISCARD(x)
diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S
index fab5a50503..238c7de114 100644
--- a/arch/arm/lib/backtrace.S
+++ b/arch/arm/lib/backtrace.S
@@ -104,7 +104,7 @@ for_each_frame:	tst	frame, mask		@ Check for address exceptions
 no_frame:	ldmfd	sp!, {r4 - r8, pc}
 ENDPROC(c_backtrace)
 		
-		.pushsection __ex_table,"a"
+		.pushlinkedsection __ex_table.%S,"a"
 		.align	3
 		.long	1001b, 1006b
 		.long	1002b, 1006b
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
index 8ecfd15c3a..e2c6a5649f 100644
--- a/arch/arm/lib/getuser.S
+++ b/arch/arm/lib/getuser.S
@@ -132,7 +132,7 @@ __get_user_bad:
 ENDPROC(__get_user_bad)
 ENDPROC(__get_user_bad8)
 
-.pushsection __ex_table, "a"
+.pushlinkedsection __ex_table.%S, "a"
 	.long	1b, __get_user_bad
 	.long	2b, __get_user_bad
 	.long	3b, __get_user_bad
diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S
index 38d660d370..b52f4a264e 100644
--- a/arch/arm/lib/putuser.S
+++ b/arch/arm/lib/putuser.S
@@ -88,7 +88,7 @@ __put_user_bad:
 	ret	lr
 ENDPROC(__put_user_bad)
 
-.pushsection __ex_table, "a"
+.pushlinkedsection __ex_table.%S, "a"
 	.long	1b, __put_user_bad
 	.long	2b, __put_user_bad
 	.long	3b, __put_user_bad
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 00b7f7de28..a2e6f47edb 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -22,6 +22,7 @@
 #include <linux/sched.h>
 #include <linux/uaccess.h>
 
+#include <asm/compiler.h>
 #include <asm/cp15.h>
 #include <asm/system_info.h>
 #include <asm/unaligned.h>
@@ -206,7 +207,7 @@ union offset_union {
 	"3:	mov	%0, #1\n"			\
 	"	b	2b\n"				\
 	"	.popsection\n"				\
-	"	.pushsection __ex_table,\"a\"\n"	\
+	"	" __pushlinkedsection("__ex_table.%S,\"a\"") "\n" \
 	"	.align	3\n"				\
 	"	.long	1b, 3b\n"			\
 	"	.popsection\n"				\
@@ -266,7 +267,7 @@ union offset_union {
 		"4:	mov	%0, #1\n"			\
 		"	b	3b\n"				\
 		"	.popsection\n"				\
-		"	.pushsection __ex_table,\"a\"\n"	\
+		"	" __pushlinkedsection("__ex_table.%S,\"a\"") "\n" \
 		"	.align	3\n"				\
 		"	.long	1b, 4b\n"			\
 		"	.long	2b, 4b\n"			\
@@ -306,7 +307,7 @@ union offset_union {
 		"6:	mov	%0, #1\n"			\
 		"	b	5b\n"				\
 		"	.popsection\n"				\
-		"	.pushsection __ex_table,\"a\"\n"	\
+		"	" __pushlinkedsection("__ex_table.%S,\"a\"") "\n" \
 		"	.align	3\n"				\
 		"	.long	1b, 6b\n"			\
 		"	.long	2b, 6b\n"			\
diff --git a/arch/arm/nwfpe/entry.S b/arch/arm/nwfpe/entry.S
index 39c20afad7..8f566c87c2 100644
--- a/arch/arm/nwfpe/entry.S
+++ b/arch/arm/nwfpe/entry.S
@@ -119,7 +119,7 @@ next:
 .Lfix:	ret	r9			@ let the user eat segfaults
 	.popsection
 
-	.pushsection __ex_table,"a"
+	.pushlinkedsection __ex_table.%S,"a"
 	.align	3
 	.long	.Lx1, .Lfix
 	.popsection

[Index of Archives]     [Linux Kernel]     [Kernel Newbies]     [x86 Platform Driver]     [Netdev]     [Linux Wireless]     [Netfilter]     [Bugtraq]     [Linux Filesystems]     [Yosemite Discussion]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]

  Powered by Linux