On 22/04/21 05:04, Sean Christopherson wrote:
Load the per-cpu GS.base for 32-bit build by building a temporary GDT and loading a "real" segment. Using MSR_GS_BASE is wrong and broken, it's a 64-bit only MSR and does not exist on 32-bit CPUs. The current code works only because 32-bit KVM VMX incorrectly disables interception of MSR_GS_BASE, and no one runs KVM on an actual 32-bit physical CPU, i.e. the MSR exists in hardware and so everything "works". 32-bit KVM SVM is not buggy and correctly injects #GP on the WRMSR, i.e. the tests have never worked on 32-bit SVM.
Hmm, this breaks task switch. But setting up separate descriptors is not hard: diff --git a/x86/cstart.S b/x86/cstart.S index 489c561..7d9ed96 100644 --- a/x86/cstart.S +++ b/x86/cstart.S @@ -58,6 +58,10 @@ tss_descr: .rept max_cpus .quad 0x000089000000ffff // 32-bit avail tss .endr +percpu_descr: + .rept max_cpus + .quad 0x00cf93000000ffff // 32-bit data segment for perCPU area + .endr gdt32_end: i = 0 @@ -89,13 +93,23 @@ mb_flags = 0x0 .long mb_magic, mb_flags, 0 - (mb_magic + mb_flags) mb_cmdline = 16 -MSR_GS_BASE = 0xc0000101 - .macro setup_percpu_area lea -4096(%esp), %eax - mov $0, %edx - mov $MSR_GS_BASE, %ecx - wrmsr + + /* fill GS_BASE in the GDT */ + mov $(APIC_DEFAULT_PHYS_BASE + APIC_ID), %ebx + mov (%ebx), %ebx + shr $24, %ebx + or %ax, percpu_descr+2(,%ebx,8) + + shr $16, %eax + or %al, percpu_descr+4(,%ebx,8) + or %ah, percpu_descr+7(,%ebx,8) + + lgdtl gdt32_descr + lea percpu_descr-gdt32(,%ebx,8), %eax + mov %ax, %gs + .endm .macro setup_segments @@ -188,16 +202,14 @@ load_tss: mov (%eax), %eax shr $24, %eax mov %eax, %ebx - shl $3, %ebx mov $((tss_end - tss) / max_cpus), %edx imul %edx add $tss, %eax - mov %ax, tss_descr+2(%ebx) + mov %ax, tss_descr+2(,%ebx,8) shr $16, %eax - mov %al, tss_descr+4(%ebx) - shr $8, %eax - mov %al, tss_descr+7(%ebx) - lea tss_descr-gdt32(%ebx), %eax + mov %al, tss_descr+4(,%ebx,8) + mov %ah, tss_descr+7(,%ebx,8) + lea tss_descr-gdt32(,%ebx,8), %eax ltr %ax ret Paolo