On Tue, 6 Sept 2022 at 12:42, Evgeniy Baskov <baskov@xxxxxxxxx> wrote: > > Doing it that way allows setting up stricter memory attributes, > simplifies boot code path and removes potential relocation > of kernel image. > > Wire up required interfaces and minimally initialize zero page > fields needed for it to function correctly. > > Signed-off-by: Evgeniy Baskov <baskov@xxxxxxxxx> > > create mode 100644 arch/x86/include/asm/shared/extract.h > create mode 100644 drivers/firmware/efi/libstub/x86-extract-direct.c > --- > arch/x86/boot/compressed/head_32.S | 6 +- > arch/x86/boot/compressed/head_64.S | 45 ++++ > arch/x86/include/asm/shared/extract.h | 25 ++ > drivers/firmware/efi/Kconfig | 14 ++ > drivers/firmware/efi/libstub/Makefile | 1 + > drivers/firmware/efi/libstub/efistub.h | 5 + > .../firmware/efi/libstub/x86-extract-direct.c | 220 ++++++++++++++++++ > drivers/firmware/efi/libstub/x86-stub.c | 45 ++-- > 8 files changed, 343 insertions(+), 18 deletions(-) > create mode 100644 arch/x86/include/asm/shared/extract.h > create mode 100644 drivers/firmware/efi/libstub/x86-extract-direct.c > > diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S > index b46a1c4109cf..d2866f06bc9f 100644 > --- a/arch/x86/boot/compressed/head_32.S > +++ b/arch/x86/boot/compressed/head_32.S > @@ -155,7 +155,11 @@ SYM_FUNC_START(efi32_stub_entry) > add $0x4, %esp > movl 8(%esp), %esi /* save boot_params pointer */ > call efi_main > - /* efi_main returns the possibly relocated address of startup_32 */ > + > + /* > + * efi_main returns the possibly > + * relocated address of exteracted kernel entry point. extracted > + */ > jmp *%eax > SYM_FUNC_END(efi32_stub_entry) > SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry) > diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S > index 37ce094571b5..b6bae8e7ee71 100644 > --- a/arch/x86/boot/compressed/head_64.S > +++ b/arch/x86/boot/compressed/head_64.S > @@ -555,9 +555,54 @@ SYM_FUNC_START(efi64_stub_entry) > and $~0xf, %rsp /* realign the stack */ > movq %rdx, %rbx /* save boot_params pointer */ > call efi_main > + > +#ifdef CONFIG_EFI_STUB_EXTRACT_DIRECT > + cld > + cli > + > + movq %rbx, %rdi /* boot_params */ > + movq %rax, %rsi /* decompressed kernel address */ > + > + /* Make sure we have GDT with 32-bit code segment */ > + leaq gdt64(%rip), %rax > + addq %rax, 2(%rax) > + lgdt (%rax) > + > + /* Setup data segments. */ > + xorl %eax, %eax > + movl %eax, %ds > + movl %eax, %es > + movl %eax, %ss > + movl %eax, %fs > + movl %eax, %gs > + > + pushq %rsi > + pushq %rdi > + > + call startup32_enable_nx_if_supported > + > + call trampoline_pgtable_init > + movq %rax, %rdx > + > + > + /* Swap %rsi and %rsi */ > + popq %rsi > + popq %rdi > + > + /* Save the trampoline address in RCX */ > + movq trampoline_32bit(%rip), %rcx > + > + /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ > + pushq $__KERNEL32_CS > + leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rcx), %rax > + pushq %rax > + lretq > +#else > movq %rbx,%rsi > leaq rva(startup_64)(%rax), %rax > jmp *%rax > +#endif > + > SYM_FUNC_END(efi64_stub_entry) > SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry) > #endif > diff --git a/arch/x86/include/asm/shared/extract.h b/arch/x86/include/asm/shared/extract.h > new file mode 100644 > index 000000000000..163678145884 > --- /dev/null > +++ b/arch/x86/include/asm/shared/extract.h > @@ -0,0 +1,25 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef ASM_SHARED_EXTRACT_H > +#define ASM_SHARED_EXTRACT_H > + > +#define MAP_WRITE 0x02 /* Writable memory */ > +#define MAP_EXEC 0x04 /* Executable memory */ > +#define MAP_ALLOC 0x10 /* Range needs to be allocated */ > +#define MAP_PROTECT 0x20 /* Set exact memory attributes for memory range */ > + > +struct efi_iofunc { > + void (*putstr)(const char *msg); > + void (*puthex)(unsigned long x); > + unsigned long (*map_range)(unsigned long start, > + unsigned long end, > + unsigned int flags); This looks a bit random - having a map_range() routine as a member of the console I/O struct. Can we make this abstraction a bit more natural? > +}; > + > +void *efi_extract_kernel(struct boot_params *rmode, > + struct efi_iofunc *iofunc, > + unsigned char *input_data, > + unsigned long input_len, > + unsigned char *output, > + unsigned long output_len); > + > +#endif /* ASM_SHARED_EXTRACT_H */ > diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig > index 6cb7384ad2ac..2418402a0bda 100644 > --- a/drivers/firmware/efi/Kconfig > +++ b/drivers/firmware/efi/Kconfig > @@ -91,6 +91,20 @@ config EFI_DXE_MEM_ATTRIBUTES > Use DXE services to check and alter memory protection > attributes during boot via EFISTUB to ensure that memory > ranges used by the kernel are writable and executable. > + This option also enables stricter memory attributes > + on compressed kernel PE image. > + > +config EFI_STUB_EXTRACT_DIRECT > + bool "Extract kernel directly from UEFI environment" > + depends on EFI && EFI_STUB && X86_64 > + default y What is the reason for making this configurable? Couldn't we just enable it unconditionally? > + help > + Extract kernel before exiting EFI boot services > + This allows maintaining W^X for kernel image for > + the whole execution of compressed kernel code. > + This also slightly improves efficiency of extraction > + code by removing the need to copy the kernel around > + and rebuild page tables. > > config EFI_PARAMS_FROM_FDT > bool > diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile > index d0537573501e..1cea7d913356 100644 > --- a/drivers/firmware/efi/libstub/Makefile > +++ b/drivers/firmware/efi/libstub/Makefile > @@ -69,6 +69,7 @@ lib-$(CONFIG_EFI_GENERIC_STUB) += efi-stub.o fdt.o string.o \ > lib-$(CONFIG_ARM) += arm32-stub.o > lib-$(CONFIG_ARM64) += arm64-stub.o > lib-$(CONFIG_X86) += x86-stub.o > +lib-$(CONFIG_EFI_STUB_EXTRACT_DIRECT) += x86-extract-direct.o > lib-$(CONFIG_RISCV) += riscv-stub.o > CFLAGS_arm32-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) > > diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h > index 22fe28385db7..cdd1bb50c786 100644 > --- a/drivers/firmware/efi/libstub/efistub.h > +++ b/drivers/firmware/efi/libstub/efistub.h > @@ -968,6 +968,11 @@ static inline void > efi_enable_reset_attack_mitigation(void) { } > #endif > > +#ifdef CONFIG_X86 > +unsigned long extract_kernel_direct(struct boot_params *boot_params); > +void startup_32(struct boot_params *boot_params); > +#endif > + Please put this somewhere else > void efi_retrieve_tpm2_eventlog(void); > > #endif > diff --git a/drivers/firmware/efi/libstub/x86-extract-direct.c b/drivers/firmware/efi/libstub/x86-extract-direct.c > new file mode 100644 > index 000000000000..6076bd75cfd6 > --- /dev/null > +++ b/drivers/firmware/efi/libstub/x86-extract-direct.c > @@ -0,0 +1,220 @@ > +// SPDX-License-Identifier: GPL-2.0-only > + > +#include <linux/acpi.h> > +#include <linux/efi.h> > +#include <linux/elf.h> > +#include <linux/stddef.h> > + > +#include <asm/efi.h> > +#include <asm/e820/types.h> > +#include <asm/desc.h> > +#include <asm/boot.h> > +#include <asm/bootparam_utils.h> > +#include <asm/shared/extract.h> > +#include <asm/shared/pgtable.h> > + > +#include "efistub.h" > + > +static void do_puthex(unsigned long value); > +static void do_putstr(const char *msg); > + Can we get rid of these forward declarations? > +static unsigned long do_map_range(unsigned long start, > + unsigned long end, > + unsigned int flags) > +{ > + efi_status_t status; > + > + unsigned long size = end - start; > + > + if (flags & MAP_ALLOC) { > + if (start == (unsigned long)startup_32) > + start = LOAD_PHYSICAL_ADDR; > + > + unsigned long addr; > + > + status = efi_low_alloc_above(size, CONFIG_PHYSICAL_ALIGN, > + &addr, start); > + if (status != EFI_SUCCESS) > + efi_err("Unable to allocate memory for uncompressed kernel"); > + > + if (start != addr) { > + efi_debug("Unable to allocate at given address" > + " (desired=0x%lx, actual=0x%lx)", > + (unsigned long)start, addr); > + start = addr; > + } > + } > + > + if (flags & (MAP_PROTECT | MAP_ALLOC)) { > + unsigned long attr = 0; > + > + if (!(flags & MAP_EXEC)) > + attr |= EFI_MEMORY_XP; > + > + if (!(flags & MAP_WRITE)) > + attr |= EFI_MEMORY_RO; > + > + status = efi_adjust_memory_range_protection(start, > + end - start, > + attr); > + if (status != EFI_SUCCESS) > + efi_err("Unable to protect memory range"); > + } > + > + return start; > +} > + > +/* > + * Trampoline takes 3 pages and can be loaded in first megabyte of memory > + * with its end placed between 0 and 640k where BIOS might start. > + * (see arch/x86/boot/compressed/pgtable_64.c) > + */ > + > +#ifdef CONFIG_64BIT > +static efi_status_t prepare_trampoline(void) > +{ > + efi_status_t status; > + > + status = efi_allocate_pages(TRAMPOLINE_32BIT_SIZE, > + (unsigned long *)&trampoline_32bit, > + TRAMPOLINE_32BIT_PLACEMENT_MAX); > + > + if (status != EFI_SUCCESS) > + return status; > + > + unsigned long trampoline_start = (unsigned long)trampoline_32bit; > + > + memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE); > + > + /* First page of trampoline is a top level page table */ > + efi_adjust_memory_range_protection(trampoline_start, > + PAGE_SIZE, > + EFI_MEMORY_XP); > + > + /* Second page of trampoline is the code (with a padding) */ > + > + void *caddr = (void *)trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET; > + > + memcpy(caddr, trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE); > + > + efi_adjust_memory_range_protection((unsigned long)caddr, > + PAGE_SIZE, > + EFI_MEMORY_RO); > + > + /* And the last page of trampoline is the stack */ > + > + efi_adjust_memory_range_protection(trampoline_start + 2 * PAGE_SIZE, > + PAGE_SIZE, > + EFI_MEMORY_XP); > + > + return EFI_SUCCESS; > +} > +#else > +static inline efi_status_t prepare_trampoline(void) > +{ > + return EFI_SUCCESS; > +} > +#endif > + > +static efi_status_t init_loader_data(struct boot_params *params) > +{ > + struct efi_info *efi = (void *)¶ms->efi_info; > + efi_status_t status; > + > + unsigned long map_size, desc_size, buff_size; > + u32 desc_ver; > + efi_memory_desc_t *mmap; > + > + struct efi_boot_memmap map = { > + .map = &mmap, > + .map_size = &map_size, > + .desc_size = &desc_size, > + .desc_ver = &desc_ver, > + .key_ptr = NULL, > + .buff_size = &buff_size, > + }; > + > + status = efi_get_memory_map(&map); efi_get_memory_map() has been updated in the mean time, so this needs a rewrite. > + > + if (status != EFI_SUCCESS) { > + efi_err("Unable to get EFI memory map...\n"); > + return status; > + } > + > + const char *signature = efi_is_64bit() ? EFI64_LOADER_SIGNATURE > + : EFI32_LOADER_SIGNATURE; > + > + memcpy(&efi->efi_loader_signature, signature, sizeof(__u32)); > + > + efi->efi_memdesc_size = desc_size; > + efi->efi_memdesc_version = desc_ver; > + efi->efi_memmap_size = map_size; > + > + efi_set_u64_split((unsigned long)mmap, > + &efi->efi_memmap, &efi->efi_memmap_hi); > + > + efi_set_u64_split((unsigned long)efi_system_table, > + &efi->efi_systab, &efi->efi_systab_hi); > + > + return EFI_SUCCESS; > +} > + > +static void free_loader_data(struct boot_params *params) > +{ > + struct efi_info *efi = (void *)¶ms->efi_info; > + unsigned long mmap = efi->efi_memmap; > + > +#ifdef CONFIG_64BIT > + mmap |= ((unsigned long)efi->efi_memmap_hi << 32); > +#endif > + > + efi_bs_call(free_pool, (void *)mmap); > + > + efi->efi_memdesc_size = 0; > + efi->efi_memdesc_version = 0; > + efi->efi_memmap_size = 0; > + efi_set_u64_split(0, &efi->efi_memmap, &efi->efi_memmap_hi); > +} > + > +unsigned long extract_kernel_direct(struct boot_params *params) > +{ > + > + extern unsigned char input_data[]; > + extern unsigned int output_len, input_len; > + > + void *res; > + efi_status_t status; > + struct efi_iofunc iof = { 0 }; > + > + status = prepare_trampoline(); > + > + if (status != EFI_SUCCESS) > + return 0; > + > + /* Prepare environment for do_extract_kernel() call */ > + status = init_loader_data(params); > + > + if (status != EFI_SUCCESS) > + return 0; > + > + iof.puthex = do_puthex; > + iof.putstr = do_putstr; > + iof.map_range = do_map_range; > + > + res = efi_extract_kernel(params, &iof, input_data, input_len, > + (unsigned char *)startup_32, output_len); > + > + free_loader_data(params); > + > + return (unsigned long)res; > +} > + > +static void do_puthex(unsigned long value) > +{ > + efi_printk("%08lx", value); > +} > + > +static void do_putstr(const char *msg) > +{ > + efi_printk("%s", msg); > +} > diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c > index 678f9c2ccafc..680184034cb7 100644 > --- a/drivers/firmware/efi/libstub/x86-stub.c > +++ b/drivers/firmware/efi/libstub/x86-stub.c > @@ -230,26 +230,25 @@ static void > setup_memory_protection(unsigned long image_base, unsigned long image_size) > { > /* > - * Allow execution of possible trampoline used > - * for switching between 4- and 5-level page tables > - * and relocated kernel image. > - */ > + * Allow execution of possible trampoline used > + * for switching between 4- and 5-level page tables > + * and relocated kernel image. > + */ > Drop this hunk please > efi_adjust_memory_range_protection(TRAMPOLINE_PLACEMENT_BASE, > TRAMPOLINE_PLACEMENT_SIZE, 0); > > #ifdef CONFIG_64BIT > - if (image_base != (unsigned long)startup_32) > - efi_adjust_memory_range_protection(image_base, image_size, 0); > + efi_adjust_memory_range_protection(image_base, image_size, 0); > #else > /* > - * Clear protection flags on a whole range of possible > - * addresses used for KASLR. We don't need to do that > - * on x86_64, since KASLR/extraction is performed after > - * dedicated identity page tables are built and we only > - * need to remove possible protection on relocated image > - * itself disregarding further relocations. > - */ > + * Clear protection flags on a whole range of possible > + * addresses used for KASLR. We don't need to do that > + * on x86_64, since KASLR/extraction is performed after > + * dedicated identity page tables are built and we only > + * need to remove possible protection on relocated image > + * itself disregarding further relocations. > + */ Drop this hunk please > efi_adjust_memory_range_protection(LOAD_PHYSICAL_ADDR, > KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR, > 0); > @@ -270,8 +269,10 @@ static void setup_quirks(struct boot_params *boot_params, > retrieve_apple_device_properties(boot_params); > } > > - if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES)) > + if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES) && > + !IS_ENABLED(CONFIG_EFI_STUB_EXTRACT_DIRECT)) { > setup_memory_protection(image_base, image_size); > + } > } > > /* > @@ -710,8 +711,10 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle) > } > > /* > - * On success, we return the address of startup_32, which has potentially been > - * relocated by efi_relocate_kernel. > + * On success, we return: > + * - the address of startup_32, which has potentially been > + * relocated by efi_relocate_kernel, if libstub direct extraction is disabled. > + * - extracted kernel entry point if libstub direct extraction is enabled. > * On failure, we exit to the firmware via efi_exit instead of returning. > */ > unsigned long efi_main(efi_handle_t handle, > @@ -736,6 +739,7 @@ unsigned long efi_main(efi_handle_t handle, > efi_dxe_table = NULL; > } > > +#ifndef CONFIG_EFI_STUB_EXTRACT_DIRECT > /* > * If the kernel isn't already loaded at a suitable address, > * relocate it. > @@ -789,6 +793,7 @@ unsigned long efi_main(efi_handle_t handle, > */ > image_offset = 0; > } > +#endif > > #ifdef CONFIG_CMDLINE_BOOL > status = efi_parse_options(CONFIG_CMDLINE); > @@ -845,7 +850,13 @@ unsigned long efi_main(efi_handle_t handle, > > setup_efi_pci(boot_params); > > - setup_quirks(boot_params, bzimage_addr, buffer_end - buffer_start); > + setup_quirks(boot_params, buffer_start, buffer_end - buffer_start); > + > +#ifdef CONFIG_EFI_STUB_EXTRACT_DIRECT > + bzimage_addr = extract_kernel_direct(boot_params); > + if (!bzimage_addr) > + goto fail; > +#endif > > status = exit_boot(boot_params, handle); > if (status != EFI_SUCCESS) { > -- > 2.35.1 >