On 18/02/2017 04:29, Andy Lutomirski wrote: > There's no code here because the patch is trivial, but I want to run > the idea by you all first to see if there are any issues. > > VMX is silly and forces the TSS limit to the minimum on VM exits. KVM > wastes lots of cycles bumping it back up to accomodate the io bitmap. Actually looked at the code now... reload_tss is only invoked for userspace exits, so it is a nice-to-have but it wouldn't show on most workloads. Still it does save 150-200 clock cycles to remove it (I just commented out reload_tss() from __vmx_load_host_state to test). Another 100-150 could be saved if we could just use rdgsbase/wrgsbase, instead of rdmsr/wrmsr, to read and write the kernel GS. Super hacky patch after sig. vmx_save_host_state is really slow... It would be nice if Intel defined an XSAVES state to do it (just like AMD's vmload/vmsave). Paolo diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9856b73a21ad..e76bfec463bc 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2138,9 +2138,12 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) #endif #ifdef CONFIG_X86_64 - rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); + local_irq_disable(); + asm volatile("swapgs; rdgsbase %0" : "=r" (vmx->msr_host_kernel_gs_base)); if (is_long_mode(&vmx->vcpu)) - wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); + asm volatile("wrgsbase %0" : : "r" (vmx->msr_guest_kernel_gs_base)); + asm volatile("swapgs"); + local_irq_enable(); #endif if (boot_cpu_has(X86_FEATURE_MPX)) rdmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs); @@ -2152,14 +2155,20 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) static void __vmx_load_host_state(struct vcpu_vmx *vmx) { + unsigned long flags; + if (!vmx->host_state.loaded) return; ++vmx->vcpu.stat.host_state_reload; vmx->host_state.loaded = 0; #ifdef CONFIG_X86_64 + local_irq_save(flags); + asm("swapgs"); if (is_long_mode(&vmx->vcpu)) - rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); + asm volatile("rdgsbase %0" : "=r" (vmx->msr_guest_kernel_gs_base)); + asm volatile("wrgsbase %0; swapgs" : : "r" (vmx->msr_host_kernel_gs_base)); + local_irq_restore(flags); #endif if (vmx->host_state.gs_ldt_reload_needed) { kvm_load_ldt(vmx->host_state.ldt_sel); @@ -2177,10 +2186,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) loadsegment(es, vmx->host_state.es_sel); } #endif - reload_tss(); -#ifdef CONFIG_X86_64 - wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); -#endif + //reload_tss(); if (vmx->host_state.msr_host_bndcfgs) wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs); load_gdt(this_cpu_ptr(&host_gdt)); @@ -3469,6 +3475,7 @@ static int hardware_enable(void) wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); } cr4_set_bits(X86_CR4_VMXE); + cr4_set_bits(X86_CR4_FSGSBASE); if (vmm_exclusive) { kvm_cpu_vmxon(phys_addr); > I propose that we rework this. Add a percpu variable that indicates > whether the TSS limit needs to be refreshed. On task switch, if the > new task has TIF_IO_BITMAP set, then check that flag and, if set, > refresh TR and clear the flag. On VMX exit, set the flag. > > The TSS limit is (phew!) invisible to userspace, so we don't have ABI > issues to worry about here. We also shouldn't have security issues > because a too-low TSS limit just results in unprivileged IO operations > generating #GP, which is exactly what we want. > > What do you all think? I expect a speedup of a couple hundred cycles > on each VM exit.