This is a note to let you know that I've just added the patch titled x86/decompressor: Call trampoline as a normal function to the 6.1-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: x86-decompressor-call-trampoline-as-a-normal-function.patch and it can be found in the queue-6.1 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. >From e8972a76aa90c05a0078043413f806c02fcb3487 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel <ardb@xxxxxxxxxx> Date: Mon, 7 Aug 2023 18:27:06 +0200 Subject: x86/decompressor: Call trampoline as a normal function From: Ard Biesheuvel <ardb@xxxxxxxxxx> commit e8972a76aa90c05a0078043413f806c02fcb3487 upstream. Move the long return to switch to 32-bit mode into the trampoline code so it can be called as an ordinary function. This will allow it to be called directly from C code in a subsequent patch. While at it, reorganize the code somewhat to keep the prologue and epilogue of the function together, making the code a bit easier to follow. Also, given that the trampoline is now entered in 64-bit mode, a simple RIP-relative reference can be used to take the address of the exit point. Signed-off-by: Ard Biesheuvel <ardb@xxxxxxxxxx> Signed-off-by: Borislav Petkov (AMD) <bp@xxxxxxxxx> Acked-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> Link: https://lore.kernel.org/r/20230807162720.545787-10-ardb@xxxxxxxxxx Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- arch/x86/boot/compressed/head_64.S | 79 ++++++++++++++++--------------------- arch/x86/boot/compressed/pgtable.h | 2 2 files changed, 36 insertions(+), 45 deletions(-) --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -450,39 +450,8 @@ SYM_CODE_START(startup_64) /* Save the trampoline address in RCX */ movq %rax, %rcx - /* Set up 32-bit addressable stack */ - leaq TRAMPOLINE_32BIT_STACK_END(%rcx), %rsp - - /* - * Preserve live 64-bit registers on the stack: this is necessary - * because the architecture does not guarantee that GPRs will retain - * their full 64-bit values across a 32-bit mode switch. - */ - pushq %r15 - pushq %rbp - pushq %rbx - - /* - * Push the 64-bit address of trampoline_return() onto the new stack. - * It will be used by the trampoline to return to the main code. Due to - * the 32-bit mode switch, it cannot be kept it in a register either. - */ - leaq trampoline_return(%rip), %rdi - pushq %rdi - - /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ - pushq $__KERNEL32_CS leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax - pushq %rax - lretq -trampoline_return: - /* Restore live 64-bit registers */ - popq %rbx - popq %rbp - popq %r15 - - /* Restore the stack, the 32-bit trampoline uses its own stack */ - leaq rva(boot_stack_end)(%rbx), %rsp + call *%rax /* * cleanup_trampoline() would restore trampoline memory. @@ -579,7 +548,6 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated jmp *%rax SYM_FUNC_END(.Lrelocated) - .code32 /* * This is the 32-bit trampoline that will be copied over to low memory. * @@ -588,6 +556,39 @@ SYM_FUNC_END(.Lrelocated) * Non zero RDX means trampoline needs to enable 5-level paging. */ SYM_CODE_START(trampoline_32bit_src) + /* + * Preserve live 64-bit registers on the stack: this is necessary + * because the architecture does not guarantee that GPRs will retain + * their full 64-bit values across a 32-bit mode switch. + */ + pushq %r15 + pushq %rbp + pushq %rbx + + /* Set up 32-bit addressable stack and push the old RSP value */ + leaq (TRAMPOLINE_32BIT_STACK_END - 8)(%rcx), %rbx + movq %rsp, (%rbx) + movq %rbx, %rsp + + /* Take the address of the trampoline exit code */ + leaq .Lret(%rip), %rbx + + /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ + pushq $__KERNEL32_CS + leaq 0f(%rip), %rax + pushq %rax + lretq + +.Lret: + /* Restore the preserved 64-bit registers */ + movq (%rsp), %rsp + popq %rbx + popq %rbp + popq %r15 + retq + + .code32 +0: /* Set up data and stack segments */ movl $__KERNEL_DS, %eax movl %eax, %ds @@ -651,12 +652,9 @@ SYM_CODE_START(trampoline_32bit_src) 1: movl %eax, %cr4 - /* Calculate address of paging_enabled() once we are executing in the trampoline */ - leal .Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax - /* Prepare the stack for far return to Long Mode */ pushl $__KERNEL_CS - pushl %eax + pushl %ebx /* Enable paging again. */ movl %cr0, %eax @@ -666,12 +664,6 @@ SYM_CODE_START(trampoline_32bit_src) lret SYM_CODE_END(trampoline_32bit_src) - .code64 -SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled) - /* Return from the trampoline */ - retq -SYM_FUNC_END(.Lpaging_enabled) - /* * The trampoline code has a size limit. * Make sure we fail to compile if the trampoline code grows @@ -679,7 +671,6 @@ SYM_FUNC_END(.Lpaging_enabled) */ .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE - .code32 SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode) /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */ 1: --- a/arch/x86/boot/compressed/pgtable.h +++ b/arch/x86/boot/compressed/pgtable.h @@ -6,7 +6,7 @@ #define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0 #define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE -#define TRAMPOLINE_32BIT_CODE_SIZE 0x80 +#define TRAMPOLINE_32BIT_CODE_SIZE 0xA0 #define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE Patches currently in stable-queue which might be from ardb@xxxxxxxxxx are queue-6.1/x86-boot-compressed-avoid-touching-ecx-in-startup32_set_idt_entry.patch queue-6.1/x86-boot-compressed-only-build-mem_encrypt.s-if-amd_mem_encrypt-y.patch queue-6.1/efi-capsule-loader-fix-incorrect-allocation-size.patch queue-6.1/x86-decompressor-move-global-symbol-references-to-c-code.patch queue-6.1/x86-boot-compressed-move-startup32_load_idt-into-.text-section.patch queue-6.1/x86-decompressor-only-call-the-trampoline-when-changing-paging-levels.patch queue-6.1/x86-boot-compressed-rename-efi_thunk_64.s-to-efi-mixed.s.patch queue-6.1/x86-decompressor-merge-trampoline-cleanup-with-switching-code.patch queue-6.1/x86-boot-compressed-move-efi32_pe_entry-out-of-head_64.s.patch queue-6.1/efi-verify-that-variable-services-are-supported.patch queue-6.1/x86-decompressor-call-trampoline-directly-from-c-code.patch queue-6.1/x86-boot-compressed-pull-global-variable-reference-into-startup32_load_idt.patch queue-6.1/x86-boot-compressed-simplify-idt-gdt-preserve-restore-in-the-efi-thunk.patch queue-6.1/x86-decompressor-assign-paging-related-global-variables-earlier.patch queue-6.1/x86-boot-compressed-move-32-bit-entrypoint-code-into-.text-section.patch queue-6.1/x86-boot-compressed-move-efi32_pe_entry-into-.text-section.patch queue-6.1/x86-decompressor-avoid-the-need-for-a-stack-in-the-32-bit-trampoline.patch queue-6.1/x86-boot-compressed-move-startup32_check_sev_cbit-out-of-head_64.s.patch queue-6.1/x86-boot-compressed-efi-merge-multiple-definitions-of-image_offset-into-one.patch queue-6.1/x86-boot-compressed-move-startup32_load_idt-out-of-head_64.s.patch queue-6.1/decompress-use-8-byte-alignment.patch queue-6.1/x86-boot-compressed-move-efi32_entry-out-of-head_64.s.patch queue-6.1/efi-libstub-use-efi_loader_code-region-when-moving-the-kernel-in-memory.patch queue-6.1/crypto-arm64-neonbs-fix-out-of-bounds-access-on-short-input.patch queue-6.1/x86-boot-compressed-move-bootargs-parsing-out-of-32-bit-startup-code.patch queue-6.1/efivarfs-request-at-most-512-bytes-for-variable-names.patch queue-6.1/x86-boot-compressed-adhere-to-calling-convention-in-get_sev_encryption_bit.patch queue-6.1/x86-boot-compressed-move-startup32_check_sev_cbit-into-.text.patch queue-6.1/x86-decompressor-store-boot_params-pointer-in-callee-save-register.patch queue-6.1/x86-efistub-branch-straight-to-kernel-entry-point-from-c-code.patch queue-6.1/x86-decompressor-call-trampoline-as-a-normal-function.patch queue-6.1/x86-decompressor-pass-pgtable-address-to-trampoline-directly.patch queue-6.1/x86-decompressor-use-standard-calling-convention-for-trampoline.patch queue-6.1/x86-efi-make-the-deprecated-efi-handover-protocol-optional.patch