From: Varad Gautam <varad.gautam@xxxxxxxx> Sending INIT/SIPI to APs from ap_init() resets them into 16-bit mode to loop into sipi_entry(). To drive the APs into 32-bit mode, the SIPI vector needs: 1. A GDT descriptor reachable from 16-bit code (gdt32_descr). 2. A 32-bit entrypoint reachable from 16-bit code (ap_start32). 3. The locations of GDT and the 32-bit entrypoint. Setting these up at compile time (like on non-EFI builds) is not possible since EFI builds with -shared -fPIC and efistart64.S cannot reference any absolute addresses. Relative addressing is unavailable on 16-bit mode. Moreover, EFI may not load the 32-bit entrypoint to be reachable from 16-bit mode. To overcome these problems, 1. Fill the GDT descriptor at runtime after relocating [sipi_entry-sipi_end] to lowmem. Since sipi_entry does not know the address of this descriptor, use the last two bytes of SIPI page to communicate it. 2. Place a call gate in the GDT to point to ap_start32. 3. Popluate sipi_entry() to lcall to ap_start32. With this, the APs can transition to 32-bit mode and loop at a known location. Signed-off-by: Varad Gautam <varad.gautam@xxxxxxxx> Co-developed-by: Sean Christopherson <seanjc@xxxxxxxxxx> Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> --- lib/x86/setup.c | 2 +- lib/x86/smp.c | 81 +++++++++++++++++++++++++++++++++++++++++++- lib/x86/smp.h | 3 ++ x86/efi/efistart64.S | 35 ++++++++++++++++++- x86/trampolines.S | 38 +++++++++++++++++++-- 5 files changed, 153 insertions(+), 6 deletions(-) diff --git a/lib/x86/setup.c b/lib/x86/setup.c index c7c0983..2d199b4 100644 --- a/lib/x86/setup.c +++ b/lib/x86/setup.c @@ -347,11 +347,11 @@ efi_status_t setup_efi(efi_bootinfo_t *efi_bootinfo) setup_idt(); load_idt(); mask_pic_interrupts(); + setup_page_table(); enable_apic(); ap_init(); enable_x2apic(); smp_init(); - setup_page_table(); return EFI_SUCCESS; } diff --git a/lib/x86/smp.c b/lib/x86/smp.c index 2f554ce..022d627 100644 --- a/lib/x86/smp.c +++ b/lib/x86/smp.c @@ -24,10 +24,17 @@ static volatile bool ipi_wait; static int _cpu_count; static atomic_t active_cpus; extern u8 rm_trampoline, rm_trampoline_end; -#ifdef __i386__ +#if defined(__i386__) || defined(CONFIG_EFI) extern u8 ap_rm_gdt_descr; #endif +#ifdef CONFIG_EFI +extern u8 ap_rm_gdt, ap_rm_gdt_end; +extern u8 ap_start32; +extern u32 smp_stacktop; +extern u8 stacktop; +#endif + /* The BSP is online from time zero. */ atomic_t cpu_online_count = { .counter = 1 }; @@ -163,6 +170,74 @@ static void setup_rm_gdt(void) * the vector code. */ sgdt(rm_gdt); +#elif defined(CONFIG_EFI) + idt_entry_t *gate_descr; + + /* + * The realmode trampoline on EFI has the following layout: + * + * |rm_trampoline: + * |sipi_entry: + * | <AP bootstrapping code called from SIPI> + * |ap_rm_gdt: + * | <GDT used for 16-bit -> 32-bit trasition> + * |ap_rm_gdt_descr: + * | <GDT descriptor for ap_rm_gdt> + * |sipi_end: + * | <End of trampoline> + * |rm_trampoline_end: + * + * After relocating to the lowmem address pointed to by realmode_trampoline, + * the realmode GDT descriptor needs to contain the relocated address of + * ap_rm_gdt. + */ + volatile struct descriptor_table_ptr *rm_gdt_descr = + (struct descriptor_table_ptr *) (&ap_rm_gdt_descr - &rm_trampoline); + rm_gdt_descr->base = (ulong) ((u32) (&ap_rm_gdt - &rm_trampoline)); + rm_gdt_descr->limit = (u16) (&ap_rm_gdt_end - &ap_rm_gdt - 1); + + /* + * Since 1. compile time calculation of offsets is not allowed when + * building with -shared, and 2. rip-relative addressing is not supported in + * 16-bit mode, the relocated address of ap_rm_gdt_descr needs to be stored at + * a location known to / accessible from the trampoline. + * + * Use the last two bytes of the trampoline page (REALMODE_GDT_LOWMEM) to store + * a pointer to relocated ap_rm_gdt_descr addr. This way, the trampoline code can + * find the relocated descriptor using the lowmem address at pa=REALMODE_GDT_LOWMEM, + * and this relocated descriptor points to the relocated GDT. + */ + *((u16 *)(REALMODE_GDT_LOWMEM)) = (u16) (u64) rm_gdt_descr; + + /* + * Set up a call gate to the 32-bit entrypoint (ap_start32) within GDT, since + * EFI may not load the 32-bit AP entrypoint (ap_start32) low enough + * to be reachable from the SIPI vector. + * + * Since kvm-unit-tests builds with -shared, this location needs to be fetched + * at runtime, and rip-relative addressing is not supported in 16-bit mode. This + * prevents using a long jump to ap_start32 (`ljmpl $cs, $ap_start32`). + * + * As an alternative, a far return via `push $cs; push $label; lret` would require + * an intermediate trampoline since $label must still be within 0 - 0xFFFF for + * 16-bit far return to work. + * + * Using a call gate allows for an easier 16-bit -> 32-bit transition via `lcall`. + * + * GDT layout: + * + * Entry | Segment + * 0 | NULL descr + * 1 | Code segment descr + * 2 | Data segment descr + * 3 | Call gate descr + * + * This layout is only used for reaching 32-bit mode. APs load a 64-bit GDT + * later during boot, which does not need to follow this layout. + */ + gate_descr = ((void *)(&ap_rm_gdt - &rm_trampoline) + 3 * sizeof(gdt_entry_t)); + set_desc_entry(gate_descr, sizeof(gdt_entry_t), (void *) &ap_start32, + 0x8 /* sel */, 0xc /* type */, 0 /* dpl */); #endif } @@ -184,6 +259,10 @@ void ap_init(void) setup_rm_gdt(); +#ifdef CONFIG_EFI + smp_stacktop = ((u64) (&stacktop)) - PAGE_SIZE; +#endif + /* INIT */ apic_icr_write(APIC_DEST_ALLBUT | APIC_DEST_PHYSICAL | APIC_DM_INIT | APIC_INT_ASSERT, 0); diff --git a/lib/x86/smp.h b/lib/x86/smp.h index 77f8a19..b805be5 100644 --- a/lib/x86/smp.h +++ b/lib/x86/smp.h @@ -6,6 +6,9 @@ #include "libcflat.h" #include "atomic.h" +/* Address where to store the address of realmode GDT descriptor. */ +#define REALMODE_GDT_LOWMEM (PAGE_SIZE - 2) + /* Offsets into the per-cpu page. */ struct percpu_data { uint32_t smp_id; diff --git a/x86/efi/efistart64.S b/x86/efi/efistart64.S index 3a4135e..a514612 100644 --- a/x86/efi/efistart64.S +++ b/x86/efi/efistart64.S @@ -12,6 +12,9 @@ .globl stacktop stacktop: +.globl smp_stacktop +smp_stacktop: .long 0 + .align PAGE_SIZE .globl ptl2 ptl2: @@ -33,16 +36,46 @@ ptl4: .text .code16 +REALMODE_GDT_LOWMEM = PAGE_SIZE - 2 .globl rm_trampoline rm_trampoline: .globl sipi_entry sipi_entry: - jmp sipi_entry + mov %cr0, %eax + or $1, %eax + mov %eax, %cr0 + + /* Retrieve relocated ap_rm_gdt_descr address at REALMODE_GDT_LOWMEM. */ + mov (REALMODE_GDT_LOWMEM), %ebx + lgdtl (%ebx) + + lcall $0x18, $0x0 + +.globl ap_rm_gdt +ap_rm_gdt: + .quad 0 + .quad 0x00cf9b000000ffff // flat 32-bit code segment + .quad 0x00cf93000000ffff // flat 32-bit data segment + .quad 0 // call gate to 32-bit AP entrypoint +.globl ap_rm_gdt_end +ap_rm_gdt_end: + +.globl ap_rm_gdt_descr +ap_rm_gdt_descr: + .word 0 + .long 0 .globl sipi_end sipi_end: .globl rm_trampoline_end rm_trampoline_end: + +#include "../trampolines.S" + +.code64: + +ap_start64: + jmp ap_start64 diff --git a/x86/trampolines.S b/x86/trampolines.S index deb5057..6a3df9c 100644 --- a/x86/trampolines.S +++ b/x86/trampolines.S @@ -36,6 +36,23 @@ rm_trampoline_end: #ifdef __x86_64__ .code32 +/* + * EFI builds with "-shared -fPIC" and so cannot directly reference any absolute + * address. In 64-bit mode, RIP-relative addressing neatly solves the problem, + * but 32-bit code doesn't have that luxury. Make a dummy CALL to get RIP into + * a GPR in order to emulate RIP-relative for 32-bit transition code. + */ +.macro load_absolute_addr, addr, reg +#ifdef CONFIG_EFI + call 1f +1: + pop \reg + add \addr - 1b, \reg +#else + mov \addr, \reg +#endif +.endm + MSR_GS_BASE = 0xc0000101 .macro setup_percpu_area @@ -61,7 +78,9 @@ MSR_GS_BASE = 0xc0000101 .endm prepare_64: - lgdt gdt_descr + load_absolute_addr $gdt_descr, %edx + lgdtl (%edx) + setup_segments xor %eax, %eax @@ -72,7 +91,12 @@ enter_long_mode: bts $5, %eax // pae mov %eax, %cr4 + /* Note, EFI doesn't yet support 5-level paging. */ +#ifdef CONFIG_EFI + load_absolute_addr $ptl4, %eax +#else mov pt_root, %eax +#endif mov %eax, %cr3 efer = 0xc0000080 @@ -87,11 +111,19 @@ efer = 0xc0000080 mov %eax, %cr0 ret +.globl ap_start32 ap_start32: setup_segments + + load_absolute_addr $smp_stacktop, %edx mov $-4096, %esp - lock xaddl %esp, smp_stacktop + lock xaddl %esp, (%edx) + setup_percpu_area call prepare_64 - ljmpl $8, $ap_start64 + + load_absolute_addr $ap_start64, %edx + pushl $0x08 + pushl %edx + lretl #endif -- 2.36.1.476.g0c4daa206d-goog