On Fri, 5 Aug 2016, Nicholas Piggin wrote: > Introduce LINKER_DCE option for architectures to select if they want > to build with -ffunction-sections, -fdata-sections, and link with > --gc-sections. It requires some work (documented) to ensure all > unreferenced entrypoints are live, and requires toolchain and > build verification, so it is made a per-arch option for now. > > On a random powerpc64le build, this yelds a significant size saving, > it boots and runs fine, but there is a lot I haven't tested as yet, > so these savings may be reduced if there are bugs in the link. > > text data bss dec filename > 11169741 1180744 1923176 14273661 vmlinux > 10445269 1004127 1919707 13369103 vmlinux.dce > > ~700K text, ~170K data, 6% removed from kernel image size. > > Signed-off-by: Nicholas Piggin <npiggin@xxxxxxxxx> I played with that too. However this needs distinct sections for exception tables and the like otherwise the backward references from the final exception table to those functions responsible for those exception entries has the effect of pulling in all those functions even if their entry point is never referenced, making --gc-sections less effective. I managed to fix this only with a change to gas (accepted upstream). But once that is solved, you then have the missing forward reference problem i.e. nothing actually references those individual exception entry sections and ld happily drops them all. Having a KEEP() on each of them is unworkable and defeats the purpose anyway. That requires a dummy reloc to trick ld into pulling in those sections when the parent section is also pulled in. Please see attached a subset of the slides I presented at ELC and Linaro Connect last year to illustrate those issues. Also attached a sample patch partially implementing those changes. In short I'm very glad to see that this might steer interest across multiple architectures. I felt like this was becoming much more intrusive than I expected and that maybe LTO was a better bet after all. But LTO has its evils too and I'm willing to look at gc-sections again if there is interest from others as well. Nicolas
Attachment:
gc_slides.html.gz
Description: application/gzip
commit 1d7ec46257dc546bc7b87439788514fc4650a2b1 Author: Nicolas Pitre <nicolas.pitre@xxxxxxxxxx> Date: Mon Oct 26 10:16:14 2015 -0400 ARM: pushlinkedsection introduction Signed-off-by: Nicolas Pitre <nico@xxxxxxxxxx> diff --git a/Makefile b/Makefile index d5b3739119..75541414cb 100644 --- a/Makefile +++ b/Makefile @@ -775,6 +775,10 @@ ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y) KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO endif +# Named subsections +KBUILD_AFLAGS += -Wa,--sectname-subst +KBUILD_CFLAGS += -Wa,--sectname-subst + include scripts/Makefile.kasan include scripts/Makefile.extrawarn diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index b2bc8e1147..70161c9bfa 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -88,6 +88,17 @@ #endif /* + * Special .pushsection wrapper with explicit dependency to prevent + * garbage collection of the specified section. This is needed when no + * explicit symbol references are made to this section. + */ + .macro .pushlinkedsection name:vararg + .reloc . - 1, R_ARM_NONE, 9909f + .pushsection \name +9909: + .endm + +/* * Enable and disable interrupts */ #if __LINUX_ARM_ARCH__ >= 6 @@ -239,7 +250,7 @@ #define USER(x...) \ 9999: x; \ - .pushsection __ex_table,"a"; \ + .pushlinkedsection __ex_table.%S,"a"; \ .align 3; \ .long 9999b,9001f; \ .popsection @@ -253,7 +264,7 @@ * ALT_SMP( W(instr) ... ) */ #define ALT_UP(instr...) \ - .pushsection ".alt.smp.init", "a" ;\ + .pushlinkedsection ".alt.smp.init.%S", "a" ;\ .long 9998b ;\ 9997: instr ;\ .if . - 9997b == 2 ;\ @@ -265,7 +276,7 @@ .popsection #define ALT_UP_B(label) \ .equ up_b_offset, label - 9998b ;\ - .pushsection ".alt.smp.init", "a" ;\ + .pushlinkedsection ".alt.smp.init.%S", "a" ;\ .long 9998b ;\ W(b) . + up_b_offset ;\ .popsection @@ -375,7 +386,7 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) .error "Unsupported inc macro argument" .endif - .pushsection __ex_table,"a" + .pushlinkedsection __ex_table.%S,"a" .align 3 .long 9999b, \abort .popsection @@ -416,7 +427,7 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) .error "Unsupported inc macro argument" .endif - .pushsection __ex_table,"a" + .pushlinkedsection __ex_table.%S,"a" .align 3 .long 9999b, \abort .popsection diff --git a/arch/arm/include/asm/bug.h b/arch/arm/include/asm/bug.h index e7335a9214..0cbb6ef4b5 100644 --- a/arch/arm/include/asm/bug.h +++ b/arch/arm/include/asm/bug.h @@ -3,6 +3,7 @@ #include <linux/linkage.h> #include <linux/types.h> +#include <asm/compiler.h> #include <asm/opcodes.h> #ifdef CONFIG_BUG @@ -39,9 +40,9 @@ do { \ ".pushsection .rodata.str, \"aMS\", %progbits, 1\n" \ "2:\t.asciz " #__file "\n" \ ".popsection\n" \ - ".pushsection __bug_table,\"a\"\n" \ + __pushlinkedsection("__bug_table.%S,\"a\"") "\n"\ ".align 2\n" \ - "3:\t.word 1b, 2b\n" \ + "\t.word 1b, 2b\n" \ "\t.hword " #__line ", 0\n" \ ".popsection"); \ unreachable(); \ diff --git a/arch/arm/include/asm/compiler.h b/arch/arm/include/asm/compiler.h index 29fe85e594..3bfdd749a3 100644 --- a/arch/arm/include/asm/compiler.h +++ b/arch/arm/include/asm/compiler.h @@ -24,5 +24,14 @@ ".endif; " \ ".endif\n\t" +/* + * Special .pushsection wrapper with explicit dependency to prevent + * garbage collection of the specified section. This is needed when no + * explicit symbol references are made to this section. + */ +#define __pushlinkedsection(name) \ + ".reloc . - 1, R_ARM_NONE, 9909f\n" \ + "\t.pushsection " name "\n" \ + "9909: " #endif /* __ASM_ARM_COMPILER_H */ diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index 6795368ad0..3540e42084 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h @@ -5,11 +5,12 @@ #include <linux/futex.h> #include <linux/uaccess.h> +#include <asm/compiler.h> #include <asm/errno.h> #define __futex_atomic_ex_table(err_reg) \ "3:\n" \ - " .pushsection __ex_table,\"a\"\n" \ + " " __pushlinkedsection("__ex_table.%S,\"a\"") "\n" \ " .align 3\n" \ " .long 1b, 4f, 2b, 4f\n" \ " .popsection\n" \ diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h index 34f7b6980d..54e2a5ec11 100644 --- a/arch/arm/include/asm/jump_label.h +++ b/arch/arm/include/asm/jump_label.h @@ -4,6 +4,7 @@ #ifndef __ASSEMBLY__ #include <linux/types.h> +#include <asm/compiler.h> #include <asm/unified.h> #define JUMP_LABEL_NOP_SIZE 4 @@ -12,7 +13,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran { asm_volatile_goto("1:\n\t" WASM(nop) "\n\t" - ".pushsection __jump_table, \"aw\"\n\t" + __pushlinkedsection("__jump_table.%S, \"aw\") "\n\t" ".word 1b, %l[l_yes], %c0\n\t" ".popsection\n\t" : : "i" (&((char *)key)[branch]) : : l_yes); @@ -26,7 +27,7 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool { asm_volatile_goto("1:\n\t" WASM(b) " %l[l_yes]\n\t" - ".pushsection __jump_table, \"aw\"\n\t" + __pushlinkedsection("__jump_table.%S, \"aw\"") "\n\t" ".word 1b, %l[l_yes], %c0\n\t" ".popsection\n\t" : : "i" (&((char *)key)[branch]) : : l_yes); diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 98d58bb04a..d5cc34e9a7 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -18,6 +18,8 @@ #include <linux/types.h> #include <linux/sizes.h> +#include <asm/compiler.h> + #ifdef CONFIG_NEED_MACH_MEMORY_H #include <mach/memory.h> #endif @@ -172,7 +174,7 @@ extern const void *__pv_table_begin, *__pv_table_end; #define __pv_stub(from,to,instr,type) \ __asm__("@ __pv_stub\n" \ "1: " instr " %0, %1, %2\n" \ - " .pushsection .pv_table,\"a\"\n" \ + " " __pushlinkedsection(".pv_table.%S,\"a\"") "\n" \ " .long 1b\n" \ " .popsection\n" \ : "=r" (to) \ @@ -181,7 +183,7 @@ extern const void *__pv_table_begin, *__pv_table_end; #define __pv_stub_mov_hi(t) \ __asm__ volatile("@ __pv_stub_mov\n" \ "1: mov %R0, %1\n" \ - " .pushsection .pv_table,\"a\"\n" \ + " " __pushlinkedsection(".pv_table.%S,\"a\"") "\n" \ " .long 1b\n" \ " .popsection\n" \ : "=r" (t) \ @@ -191,7 +193,7 @@ extern const void *__pv_table_begin, *__pv_table_end; __asm__ volatile("@ __pv_add_carry_stub\n" \ "1: adds %Q0, %1, %2\n" \ " adc %R0, %R0, #0\n" \ - " .pushsection .pv_table,\"a\"\n" \ + " " __pushlinkedsection(".pv_table.%S,\"a\"") "\n" \ " .long 1b\n" \ " .popsection\n" \ : "+r" (y) \ diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index 8a1e8e995d..8c535eacea 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h @@ -19,6 +19,7 @@ #ifdef __KERNEL__ +#include <asm/compiler.h> #include <asm/hw_breakpoint.h> #include <asm/ptrace.h> #include <asm/types.h> @@ -93,7 +94,7 @@ unsigned long get_wchan(struct task_struct *p); #ifdef CONFIG_SMP #define __ALT_SMP_ASM(smp, up) \ "9998: " smp "\n" \ - " .pushsection \".alt.smp.init\", \"a\"\n" \ + " " __pushlinkedsection("\".alt.smp.init.%S\", \"a\"") "\n" \ " .long 9998b\n" \ " " up "\n" \ " .popsection\n" diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 8cc85a4ebe..5e7e404894 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -357,7 +357,7 @@ do { \ " mov %1, #0\n" \ " b 2b\n" \ " .popsection\n" \ - " .pushsection __ex_table,\"a\"\n" \ + " " __pushlinkedsection("__ex_table.%S,\"a\"") "\n" \ " .align 3\n" \ " .long 1b, 3b\n" \ " .popsection" \ @@ -429,7 +429,7 @@ do { \ "3: mov %0, %3\n" \ " b 2b\n" \ " .popsection\n" \ - " .pushsection __ex_table,\"a\"\n" \ + " " __pushlinkedsection("__ex_table.%S,\"a\"") "\n" \ " .align 3\n" \ " .long 1b, 3b\n" \ " .popsection" \ @@ -479,7 +479,7 @@ do { \ "4: mov %0, %3\n" \ " b 3b\n" \ " .popsection\n" \ - " .pushsection __ex_table,\"a\"\n" \ + " " __pushlinkedsection("__ex_table.%S,\"a\"") "\n" \ " .align 3\n" \ " .long 1b, 4b\n" \ " .long 2b, 4b\n" \ diff --git a/arch/arm/include/asm/word-at-a-time.h b/arch/arm/include/asm/word-at-a-time.h index 5831dce4b5..348a462d3e 100644 --- a/arch/arm/include/asm/word-at-a-time.h +++ b/arch/arm/include/asm/word-at-a-time.h @@ -8,6 +8,7 @@ * Heavily based on the x86 algorithm. */ #include <linux/kernel.h> +#include <asm/compiler.h> struct word_at_a_time { const unsigned long one_bits, high_bits; @@ -84,7 +85,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) #endif " b 2b\n" " .popsection\n" - " .pushsection __ex_table,\"a\"\n" + " " __pushlinkedsection("__ex_table.%S,\"a\"") "\n" " .align 3\n" " .long 1b, 3b\n" " .popsection" diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 3e1c26eb32..5047757c34 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -564,7 +564,7 @@ ENDPROC(__und_usr) 4: str r4, [sp, #S_PC] @ retry current instruction ret r9 .popsection - .pushsection __ex_table,"a" + .pushlinkedsection __ex_table.%S,"a" .long 1b, 4b #if CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7 .long 2b, 4b diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 8b60fde5ce..6885382931 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -36,7 +36,7 @@ #define ARM_CPU_KEEP(x) #endif -#if (defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK)) || \ +#if 0 // (defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK)) || \ defined(CONFIG_GENERIC_BUG) #define ARM_EXIT_KEEP(x) x #define ARM_EXIT_DISCARD(x) diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S index fab5a50503..238c7de114 100644 --- a/arch/arm/lib/backtrace.S +++ b/arch/arm/lib/backtrace.S @@ -104,7 +104,7 @@ for_each_frame: tst frame, mask @ Check for address exceptions no_frame: ldmfd sp!, {r4 - r8, pc} ENDPROC(c_backtrace) - .pushsection __ex_table,"a" + .pushlinkedsection __ex_table.%S,"a" .align 3 .long 1001b, 1006b .long 1002b, 1006b diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S index 8ecfd15c3a..e2c6a5649f 100644 --- a/arch/arm/lib/getuser.S +++ b/arch/arm/lib/getuser.S @@ -132,7 +132,7 @@ __get_user_bad: ENDPROC(__get_user_bad) ENDPROC(__get_user_bad8) -.pushsection __ex_table, "a" +.pushlinkedsection __ex_table.%S, "a" .long 1b, __get_user_bad .long 2b, __get_user_bad .long 3b, __get_user_bad diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S index 38d660d370..b52f4a264e 100644 --- a/arch/arm/lib/putuser.S +++ b/arch/arm/lib/putuser.S @@ -88,7 +88,7 @@ __put_user_bad: ret lr ENDPROC(__put_user_bad) -.pushsection __ex_table, "a" +.pushlinkedsection __ex_table.%S, "a" .long 1b, __put_user_bad .long 2b, __put_user_bad .long 3b, __put_user_bad diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 00b7f7de28..a2e6f47edb 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -22,6 +22,7 @@ #include <linux/sched.h> #include <linux/uaccess.h> +#include <asm/compiler.h> #include <asm/cp15.h> #include <asm/system_info.h> #include <asm/unaligned.h> @@ -206,7 +207,7 @@ union offset_union { "3: mov %0, #1\n" \ " b 2b\n" \ " .popsection\n" \ - " .pushsection __ex_table,\"a\"\n" \ + " " __pushlinkedsection("__ex_table.%S,\"a\"") "\n" \ " .align 3\n" \ " .long 1b, 3b\n" \ " .popsection\n" \ @@ -266,7 +267,7 @@ union offset_union { "4: mov %0, #1\n" \ " b 3b\n" \ " .popsection\n" \ - " .pushsection __ex_table,\"a\"\n" \ + " " __pushlinkedsection("__ex_table.%S,\"a\"") "\n" \ " .align 3\n" \ " .long 1b, 4b\n" \ " .long 2b, 4b\n" \ @@ -306,7 +307,7 @@ union offset_union { "6: mov %0, #1\n" \ " b 5b\n" \ " .popsection\n" \ - " .pushsection __ex_table,\"a\"\n" \ + " " __pushlinkedsection("__ex_table.%S,\"a\"") "\n" \ " .align 3\n" \ " .long 1b, 6b\n" \ " .long 2b, 6b\n" \ diff --git a/arch/arm/nwfpe/entry.S b/arch/arm/nwfpe/entry.S index 39c20afad7..8f566c87c2 100644 --- a/arch/arm/nwfpe/entry.S +++ b/arch/arm/nwfpe/entry.S @@ -119,7 +119,7 @@ next: .Lfix: ret r9 @ let the user eat segfaults .popsection - .pushsection __ex_table,"a" + .pushlinkedsection __ex_table.%S,"a" .align 3 .long .Lx1, .Lfix .popsection