On Thu, Apr 22, 2021, Paolo Bonzini wrote: > On 22/04/21 05:04, Sean Christopherson wrote: > > Load the per-cpu GS.base for 32-bit build by building a temporary GDT > > and loading a "real" segment. Using MSR_GS_BASE is wrong and broken, > > it's a 64-bit only MSR and does not exist on 32-bit CPUs. The current > > code works only because 32-bit KVM VMX incorrectly disables interception > > of MSR_GS_BASE, and no one runs KVM on an actual 32-bit physical CPU, > > i.e. the MSR exists in hardware and so everything "works". > > > > 32-bit KVM SVM is not buggy and correctly injects #GP on the WRMSR, i.e. > > the tests have never worked on 32-bit SVM. > > Hmm, this breaks task switch. But setting up separate descriptors is > not hard: Much better. > diff --git a/x86/cstart.S b/x86/cstart.S > index 489c561..7d9ed96 100644 > --- a/x86/cstart.S > +++ b/x86/cstart.S > @@ -58,6 +58,10 @@ tss_descr: > .rept max_cpus > .quad 0x000089000000ffff // 32-bit avail tss > .endr > +percpu_descr: > + .rept max_cpus > + .quad 0x00cf93000000ffff // 32-bit data segment for perCPU area > + .endr > gdt32_end: > > i = 0 > @@ -89,13 +93,23 @@ mb_flags = 0x0 > .long mb_magic, mb_flags, 0 - (mb_magic + mb_flags) > mb_cmdline = 16 > > -MSR_GS_BASE = 0xc0000101 > - > .macro setup_percpu_area > lea -4096(%esp), %eax > - mov $0, %edx > - mov $MSR_GS_BASE, %ecx > - wrmsr > + > + /* fill GS_BASE in the GDT */ > + mov $(APIC_DEFAULT_PHYS_BASE + APIC_ID), %ebx Using %ebx crushes the mbi_bootinfo pointer. The easiest fix is to use %edx or %ecx. > + mov (%ebx), %ebx No need to load the address into a reg, just drop the "$" above and encode "mov [imm32], <reg>". Want to fold this into your patch? diff --git a/x86/cstart.S b/x86/cstart.S index 7d9ed96..fb6eda5 100644 --- a/x86/cstart.S +++ b/x86/cstart.S @@ -97,17 +97,16 @@ mb_cmdline = 16 lea -4096(%esp), %eax /* fill GS_BASE in the GDT */ - mov $(APIC_DEFAULT_PHYS_BASE + APIC_ID), %ebx - mov (%ebx), %ebx - shr $24, %ebx - or %ax, percpu_descr+2(,%ebx,8) + mov (APIC_DEFAULT_PHYS_BASE + APIC_ID), %edx + shr $24, %edx + or %ax, percpu_descr+2(,%edx,8) shr $16, %eax - or %al, percpu_descr+4(,%ebx,8) - or %ah, percpu_descr+7(,%ebx,8) + or %al, percpu_descr+4(,%edx,8) + or %ah, percpu_descr+7(,%edx,8) lgdtl gdt32_descr - lea percpu_descr-gdt32(,%ebx,8), %eax + lea percpu_descr-gdt32(,%edx,8), %eax mov %ax, %gs .endm > + shr $24, %ebx > + or %ax, percpu_descr+2(,%ebx,8) > + > + shr $16, %eax > + or %al, percpu_descr+4(,%ebx,8) > + or %ah, percpu_descr+7(,%ebx,8) > + > + lgdtl gdt32_descr > + lea percpu_descr-gdt32(,%ebx,8), %eax > + mov %ax, %gs > + > .endm > > .macro setup_segments > @@ -188,16 +202,14 @@ load_tss: > mov (%eax), %eax > shr $24, %eax > mov %eax, %ebx > - shl $3, %ebx > mov $((tss_end - tss) / max_cpus), %edx > imul %edx > add $tss, %eax > - mov %ax, tss_descr+2(%ebx) > + mov %ax, tss_descr+2(,%ebx,8) > shr $16, %eax > - mov %al, tss_descr+4(%ebx) > - shr $8, %eax > - mov %al, tss_descr+7(%ebx) > - lea tss_descr-gdt32(%ebx), %eax > + mov %al, tss_descr+4(,%ebx,8) > + mov %ah, tss_descr+7(,%ebx,8) Is there a functional change here? If not, can you throw this into a separate patch? Thanks! > + lea tss_descr-gdt32(,%ebx,8), %eax > ltr %ax > ret > > > Paolo >