The following commit has been merged into the x86/boot branch of tip: Commit-ID: 8dbec5c77bc32f04583d3973c8178a74e72fdf18 Gitweb: https://git.kernel.org/tip/8dbec5c77bc32f04583d3973c8178a74e72fdf18 Author: David Woodhouse <dwmw@xxxxxxxxxxxx> AuthorDate: Thu, 05 Dec 2024 15:05:15 Committer: Ingo Molnar <mingo@xxxxxxxxxx> CommitterDate: Fri, 06 Dec 2024 10:42:00 +01:00 x86/kexec: Add data section to relocate_kernel Now that the relocate_kernel page is handled sanely by a linker script we can have actual data, and just use %rip-relative addressing to access it. Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx> Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx> Cc: Baoquan He <bhe@xxxxxxxxxx> Cc: Vivek Goyal <vgoyal@xxxxxxxxxx> Cc: Dave Young <dyoung@xxxxxxxxxx> Cc: Eric Biederman <ebiederm@xxxxxxxxxxxx> Cc: Ard Biesheuvel <ardb@xxxxxxxxxx> Cc: "H. Peter Anvin" <hpa@xxxxxxxxx> Link: https://lore.kernel.org/r/20241205153343.3275139-10-dwmw2@xxxxxxxxxxxxx --- arch/x86/kernel/machine_kexec_64.c | 8 ++- arch/x86/kernel/relocate_kernel_64.S | 62 +++++++++++++-------------- arch/x86/kernel/vmlinux.lds.S | 1 +- 3 files changed, 38 insertions(+), 33 deletions(-) diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 23dffdc..63dca5c 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -343,6 +343,7 @@ void machine_kexec(struct kimage *image) unsigned long start_address, unsigned int preserve_context, unsigned int host_mem_enc_active); + unsigned long reloc_start = (unsigned long)__relocate_kernel_start; unsigned long page_list[PAGES_NR]; unsigned int host_mem_enc_active; int save_ftrace_enabled; @@ -389,7 +390,12 @@ void machine_kexec(struct kimage *image) page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) << PAGE_SHIFT); - relocate_kernel_ptr = control_page; + /* + * Allow for the possibility that relocate_kernel might not be at + * the very start of the page. + */ + relocate_kernel_ptr = control_page + (unsigned long)relocate_kernel - + reloc_start; /* * The segment registers are funny things, they have both a diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 2670044..f13866a 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -23,23 +23,21 @@ #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) /* - * control_page + KEXEC_CONTROL_CODE_MAX_SIZE - * ~ control_page + PAGE_SIZE are used as data storage and stack for - * jumping back + * The .text.relocate_kernel and .data.relocate_kernel sections are copied + * into the control page, and the remainder of the page is used as the stack. */ -#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset)) + .section .data.relocate_kernel,"a"; /* Minimal CPU state */ -#define RSP DATA(0x0) -#define CR0 DATA(0x8) -#define CR3 DATA(0x10) -#define CR4 DATA(0x18) - -/* other data */ -#define CP_PA_TABLE_PAGE DATA(0x20) -#define CP_PA_SWAP_PAGE DATA(0x28) -#define CP_PA_BACKUP_PAGES_MAP DATA(0x30) -#define CP_VA_CONTROL_PAGE DATA(0x38) +SYM_DATA_LOCAL(saved_rsp, .quad 0) +SYM_DATA_LOCAL(saved_cr0, .quad 0) +SYM_DATA_LOCAL(saved_cr3, .quad 0) +SYM_DATA_LOCAL(saved_cr4, .quad 0) + /* other data */ +SYM_DATA_LOCAL(va_control_page, .quad 0) +SYM_DATA_LOCAL(pa_table_page, .quad 0) +SYM_DATA_LOCAL(pa_swap_page, .quad 0) +SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0) .section .text.relocate_kernel,"ax"; .code64 @@ -63,14 +61,13 @@ SYM_CODE_START_NOALIGN(relocate_kernel) pushq %r15 pushf - movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 - movq %rsp, RSP(%r11) + movq %rsp, saved_rsp(%rip) movq %cr0, %rax - movq %rax, CR0(%r11) + movq %rax, saved_cr0(%rip) movq %cr3, %rax - movq %rax, CR3(%r11) + movq %rax, saved_cr3(%rip) movq %cr4, %rax - movq %rax, CR4(%r11) + movq %rax, saved_cr4(%rip) /* Save CR4. Required to enable the right paging mode later. */ movq %rax, %r13 @@ -83,10 +80,11 @@ SYM_CODE_START_NOALIGN(relocate_kernel) movq %r8, %r12 /* - * get physical address of control page now + * get physical and virtual address of control page now * this is impossible after page table switch */ movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 + movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 /* get physical address of page table now too */ movq PTR(PA_TABLE_PAGE)(%rsi), %r9 @@ -95,10 +93,10 @@ SYM_CODE_START_NOALIGN(relocate_kernel) movq PTR(PA_SWAP_PAGE)(%rsi), %r10 /* save some information for jumping back */ - movq %r9, CP_PA_TABLE_PAGE(%r11) - movq %r10, CP_PA_SWAP_PAGE(%r11) - movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11) - movq %r11, CP_VA_CONTROL_PAGE(%r11) + movq %r9, pa_table_page(%rip) + movq %r10, pa_swap_page(%rip) + movq %rdi, pa_backup_pages_map(%rip) + movq %r11, va_control_page(%rip) /* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */ movq %rcx, %r11 @@ -229,13 +227,13 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) /* get the re-entry point of the peer system */ movq 0(%rsp), %rbp leaq relocate_kernel(%rip), %r8 - movq CP_PA_SWAP_PAGE(%r8), %r10 - movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi - movq CP_PA_TABLE_PAGE(%r8), %rax + movq pa_swap_page(%rip), %r10 + movq pa_backup_pages_map(%rip), %rdi + movq pa_table_page(%rip), %rax movq %rax, %cr3 lea PAGE_SIZE(%r8), %rsp call swap_pages - movq CP_VA_CONTROL_PAGE(%r8), %rax + movq va_control_page(%rip), %rax addq $(virtual_mapped - relocate_kernel), %rax pushq %rax ANNOTATE_UNRET_SAFE @@ -246,11 +244,11 @@ SYM_CODE_END(identity_mapped) SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) UNWIND_HINT_END_OF_STACK ANNOTATE_NOENDBR // RET target, above - movq RSP(%r8), %rsp - movq CR4(%r8), %rax + movq saved_rsp(%rip), %rsp + movq saved_cr4(%rip), %rax movq %rax, %cr4 - movq CR3(%r8), %rax - movq CR0(%r8), %r8 + movq saved_cr3(%rip), %rax + movq saved_cr0(%rip), %r8 movq %rax, %cr3 movq %r8, %cr0 diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 78ce1a0..0c89399 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -101,6 +101,7 @@ const_pcpu_hot = pcpu_hot; . = ALIGN(0x100); \ __relocate_kernel_start = .; \ *(.text.relocate_kernel); \ + *(.data.relocate_kernel); \ __relocate_kernel_end = .; ASSERT(__relocate_kernel_end - __relocate_kernel_start <= KEXEC_CONTROL_CODE_MAX_SIZE,