Re: [RFC PATCH 27/28] x86/kernel: Switch to PIE linking for the core kernel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 25/09/2024 17:01, Ard Biesheuvel wrote:
From: Ard Biesheuvel <ardb@xxxxxxxxxx>

Build the kernel as a Position Independent Executable (PIE). This
results in more efficient relocation processing for the virtual
displacement of the kernel (for KASLR). More importantly, it instructs
the linker to generate what is actually needed (a program that can be
moved around in memory before execution), which is better than having to
rely on the linker to create a position dependent binary that happens to
tolerate being moved around after poking it in exactly the right manner.

Note that this means that all codegen should be compatible with PIE,
including Rust objects, so this needs to switch to the small code model
with the PIE relocation model as well.

Signed-off-by: Ard Biesheuvel <ardb@xxxxxxxxxx>
---
  arch/x86/Kconfig                        |  2 +-
  arch/x86/Makefile                       | 11 +++++++----
  arch/x86/boot/compressed/misc.c         |  2 ++
  arch/x86/kernel/vmlinux.lds.S           |  5 +++++
  drivers/firmware/efi/libstub/x86-stub.c |  2 ++
  5 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 54cb1f14218b..dbb4d284b0e1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2187,7 +2187,7 @@ config RANDOMIZE_BASE
  # Relocation on x86 needs some additional build support
  config X86_NEED_RELOCS
  	def_bool y
-	depends on RANDOMIZE_BASE || (X86_32 && RELOCATABLE)
+	depends on X86_32 && RELOCATABLE
config PHYSICAL_ALIGN
  	hex "Alignment value to which kernel should be aligned"
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 83d20f402535..c1dcff444bc8 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -206,9 +206,8 @@ else
                  PIE_CFLAGS-$(CONFIG_SMP) += -mstack-protector-guard-reg=gs
          endif
- # Don't emit relaxable GOTPCREL relocations
-        KBUILD_AFLAGS_KERNEL += -Wa,-mrelax-relocations=no
-        KBUILD_CFLAGS_KERNEL += -Wa,-mrelax-relocations=no $(PIE_CFLAGS-y)
+        KBUILD_CFLAGS_KERNEL	+= $(PIE_CFLAGS-y)
+        KBUILD_RUSTFLAGS_KERNEL	+= -Ccode-model=small -Crelocation-model=pie
  endif
#
@@ -264,12 +263,16 @@ else
  LDFLAGS_vmlinux :=
  endif
+ifdef CONFIG_X86_64
+ldflags-pie-$(CONFIG_LD_IS_LLD)	:= --apply-dynamic-relocs
+ldflags-pie-$(CONFIG_LD_IS_BFD)	:= -z call-nop=suffix-nop
+LDFLAGS_vmlinux			+= --pie -z text $(ldflags-pie-y)
+
  #
  # The 64-bit kernel must be aligned to 2MB.  Pass -z max-page-size=0x200000 to
  # the linker to force 2MB page size regardless of the default page size used
  # by the linker.
  #
-ifdef CONFIG_X86_64
  LDFLAGS_vmlinux += -z max-page-size=0x200000
  endif
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 89f01375cdb7..79e3ffe16f61 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -495,6 +495,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output)
  		error("Destination virtual address changed when not relocatable");
  #endif
+ boot_params_ptr->kaslr_va_shift = virt_addr - LOAD_PHYSICAL_ADDR;
+
  	debug_putstr("\nDecompressing Linux... ");
if (init_unaccepted_memory()) {
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index f7e832c2ac61..d172e6e8eaaf 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -459,6 +459,11 @@ xen_elfnote_phys32_entry_offset =
DISCARDS + /DISCARD/ : {
+		*(.dynsym .gnu.hash .hash .dynamic .dynstr)
+		*(.interp .dynbss .eh_frame .sframe)
+	}
+
  	/*
  	 * Make sure that the .got.plt is either completely empty or it
  	 * contains only the lazy dispatch entries.
diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
index f8e465da344d..5c03954924fe 100644
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -912,6 +912,8 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry)
  	if (status != EFI_SUCCESS)
  		return status;
+ boot_params_ptr->kaslr_va_shift = virt_addr - LOAD_PHYSICAL_ADDR;
+
  	entry = decompress_kernel((void *)addr, virt_addr, error);
  	if (entry == ULONG_MAX) {
  		efi_free(alloc_size, addr);

This patch causes a build failure here (on 64-bit):

  LD      .tmp_vmlinux2
  NM      .tmp_vmlinux2.syms
  KSYMS   .tmp_vmlinux2.kallsyms.S
  AS      .tmp_vmlinux2.kallsyms.o
  LD      vmlinux
  BTFIDS  vmlinux
WARN: resolve_btfids: unresolved symbol bpf_lsm_key_free
FAILED elf_update(WRITE): invalid section entry size
make[5]: *** [scripts/Makefile.vmlinux:34: vmlinux] Error 255
make[5]: *** Deleting file 'vmlinux'
make[4]: *** [Makefile:1153: vmlinux] Error 2
make[3]: *** [debian/rules:74: build-arch] Error 2
dpkg-buildpackage: error: make -f debian/rules binary subprocess returned exit status 2
make[2]: *** [scripts/Makefile.package:121: bindeb-pkg] Error 2
make[1]: *** [/home/opc/linux-mainline-worktree2/Makefile:1544: bindeb-pkg] Error 2
make: *** [Makefile:224: __sub-make] Error 2

The parent commit builds fine. With V=1:

+ ldflags='-m elf_x86_64 -z noexecstack --pie -z text -z call-nop=suffix-nop -z max-page-size=0x200000 --build-id=sha1 --orphan-handling=warn --script=./arch/x86/kernel/vmlinux.lds -Map=vmlinux.map' + ld -m elf_x86_64 -z noexecstack --pie -z text -z call-nop=suffix-nop -z max-page-size=0x200000 --build-id=sha1 --orphan-handling=warn --script=./arch/x86/kernel/vmlinux.lds -Map=vmlinux.map -o vmlinux --whole-archive vmlinux.a .vmlinux.export.o init/version-timestamp.o --no-whole-archive --start-group --end-group .tmp_vmlinux2.kallsyms.o .tmp_vmlinux1.btf.o
+ is_enabled CONFIG_DEBUG_INFO_BTF
+ grep -q '^CONFIG_DEBUG_INFO_BTF=y' include/config/auto.conf
+ info BTFIDS vmlinux
+ printf '  %-7s %s\n' BTFIDS vmlinux
  BTFIDS  vmlinux
+ ./tools/bpf/resolve_btfids/resolve_btfids vmlinux
WARN: resolve_btfids: unresolved symbol bpf_lsm_key_free
FAILED elf_update(WRITE): invalid section entry size

I can send the full config off-list if necessary, but looks like it
might be enough to set CONFIG_DEBUG_INFO_BTF=y.


Vegard




[Index of Archives]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]     [Linux Resources]

  Powered by Linux