Add per-cpu selectors to the GDT, and set GS_BASE by loading a "real" segment. Using MSR_GS_BASE is wrong and broken, it's a 64-bit only MSR and does not exist on 32-bit CPUs. The current code works only because 32-bit KVM VMX incorrectly disables interception of MSR_GS_BASE, and no one runs KVM on an actual 32-bit physical CPU, i.e. the MSR exists in hardware and so everything "works". 32-bit KVM SVM is not buggy and correctly injects #GP on the WRMSR, i.e. the tests have never worked on 32-bit SVM. While at it, tweak the TSS setup to look like the percpu setup; both are setting up the address field of the descriptor. Fixes: dfe6cb6 ("Add 32 bit smp initialization code") Reported-by: Sean Christopherson <seanjc@xxxxxxxxxx> Message-Id: <20210422030504.3488253-2-seanjc@xxxxxxxxxx> [Patch rewritten, keeping Sean's commit message. - Paolo] Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> --- lib/x86/desc.c | 3 ++- lib/x86/desc.h | 5 +++-- x86/cstart.S | 35 ++++++++++++++++++++++------------- 3 files changed, 27 insertions(+), 16 deletions(-) diff --git a/lib/x86/desc.c b/lib/x86/desc.c index 983d4d8..2672e02 100644 --- a/lib/x86/desc.c +++ b/lib/x86/desc.c @@ -333,7 +333,8 @@ void setup_tss32(void) tss_intr.esp = tss_intr.esp0 = tss_intr.esp1 = tss_intr.esp2 = (u32)intr_alt_stack + 4096; tss_intr.cs = 0x08; - tss_intr.ds = tss_intr.es = tss_intr.fs = tss_intr.gs = tss_intr.ss = 0x10; + tss_intr.ds = tss_intr.es = tss_intr.fs = tss_intr.ss = 0x10; + tss_intr.gs = read_gs(); tss_intr.iomap_base = (u16)desc_size; set_gdt_entry(TSS_INTR, (u32)&tss_intr, desc_size - 1, 0x89, 0x0f); } diff --git a/lib/x86/desc.h b/lib/x86/desc.h index 0fe5cbf..77b2c59 100644 --- a/lib/x86/desc.h +++ b/lib/x86/desc.h @@ -103,8 +103,9 @@ typedef struct __attribute__((packed)) { * 0x38 (0x3b) ring-3 code segment (32-bit) same * 0x40 (0x43) ring-3 data segment (32-bit) ring-3 data segment (32/64-bit) * 0x48 (0x4b) **unused** ring-3 code segment (64-bit) - * 0x50--0x78 free to use for test cases same - * 0x80 primary TSS (CPU 0) same + * 0x50-0x78 free to use for test cases same + * 0x80-0x870 primary TSS (CPU 0..254) same + * 0x878-0x1068 percpu area (CPU 0..254) not used * * Note that the same segment can be used for 32-bit and 64-bit data segments * (the L bit is only defined for code segments) diff --git a/x86/cstart.S b/x86/cstart.S index 489c561..bcf7218 100644 --- a/x86/cstart.S +++ b/x86/cstart.S @@ -58,6 +58,10 @@ tss_descr: .rept max_cpus .quad 0x000089000000ffff // 32-bit avail tss .endr +percpu_descr: + .rept max_cpus + .quad 0x00cf93000000ffff // 32-bit data segment for perCPU area + .endr gdt32_end: i = 0 @@ -89,13 +93,21 @@ mb_flags = 0x0 .long mb_magic, mb_flags, 0 - (mb_magic + mb_flags) mb_cmdline = 16 -MSR_GS_BASE = 0xc0000101 - .macro setup_percpu_area lea -4096(%esp), %eax - mov $0, %edx - mov $MSR_GS_BASE, %ecx - wrmsr + + /* fill GS_BASE in the GDT, do not clobber %ebx (multiboot info) */ + mov (APIC_DEFAULT_PHYS_BASE + APIC_ID), %ecx + shr $24, %ecx + mov %ax, percpu_descr+2(,%ecx,8) + + shr $16, %eax + mov %al, percpu_descr+4(,%ecx,8) + mov %ah, percpu_descr+7(,%ecx,8) + + lea percpu_descr-gdt32(,%ecx,8), %eax + mov %ax, %gs + .endm .macro setup_segments @@ -184,20 +196,17 @@ load_tss: lidt idt_descr mov $16, %eax mov %ax, %ss - mov $(APIC_DEFAULT_PHYS_BASE + APIC_ID), %eax - mov (%eax), %eax + mov (APIC_DEFAULT_PHYS_BASE + APIC_ID), %eax shr $24, %eax mov %eax, %ebx - shl $3, %ebx mov $((tss_end - tss) / max_cpus), %edx imul %edx add $tss, %eax - mov %ax, tss_descr+2(%ebx) + mov %ax, tss_descr+2(,%ebx,8) shr $16, %eax - mov %al, tss_descr+4(%ebx) - shr $8, %eax - mov %al, tss_descr+7(%ebx) - lea tss_descr-gdt32(%ebx), %eax + mov %al, tss_descr+4(,%ebx,8) + mov %ah, tss_descr+7(,%ebx,8) + lea tss_descr-gdt32(,%ebx,8), %eax ltr %ax ret -- 2.26.2