Make host_state a property of a loaded_vmcs so that it can be used as a cache of the VMCS fields, e.g. to lazily VMWRITE the corresponding VMCS field. Treating host_state as a cache does not work if it's not VMCS specific as the cache would become incoherent when switching between vmcs01 and vmcs02. Move vmcs_host_cr3 and vmcs_host_cr4 into host_state. Explicitly zero out host_state when allocating a new VMCS for a loaded_vmcs. Unlike the pre-existing vmcs_host_cr{3,4} usage, the segment information is not guaranteed to be (re)initialized when running a new nested VMCS, e.g. HOST_FS_BASE is not written in vmx_set_constant_host_state(). Signed-off-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx> --- arch/x86/kvm/vmx.c | 72 ++++++++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 28 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2f070f192906..c66fcb01f355 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -204,6 +204,21 @@ struct vmcs { char data[0]; }; +/* + * vmcs_host_state tracks registers that are loaded from the VMCS on VMEXIT + * and whose values change infrequently, but are not consant. I.e. this is + * used as a write-through cache of the corresponding VMCS fields. + */ +struct vmcs_host_state { + unsigned long cr3; /* May not match real cr3 */ + unsigned long cr4; /* May not match real cr4 */ + + u16 fs_sel, gs_sel, ldt_sel; +#ifdef CONFIG_X86_64 + u16 ds_sel, es_sel; +#endif +}; + /* * Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also * remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs @@ -215,14 +230,13 @@ struct loaded_vmcs { int cpu; bool launched; bool nmi_known_unmasked; - unsigned long vmcs_host_cr3; /* May not match real cr3 */ - unsigned long vmcs_host_cr4; /* May not match real cr4 */ /* Support for vnmi-less CPUs */ int soft_vnmi_blocked; ktime_t entry_time; s64 vnmi_blocked_time; unsigned long *msr_bitmap; struct list_head loaded_vmcss_on_cpu_link; + struct vmcs_host_state host_state; }; struct shared_msr_entry { @@ -799,12 +813,6 @@ struct vcpu_vmx { struct vmx_msr_entry host[NR_AUTOLOAD_MSRS]; } msr_autoload; - struct { - u16 fs_sel, gs_sel, ldt_sel; -#ifdef CONFIG_X86_64 - u16 ds_sel, es_sel; -#endif - } host_state; struct { int vm86_active; ulong save_rflags; @@ -2571,6 +2579,7 @@ static unsigned long segment_base(u16 selector) static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vmcs_host_state *host_state; #ifdef CONFIG_X86_64 int cpu = raw_smp_processor_id(); #endif @@ -2582,16 +2591,17 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) return; vmx->loaded_cpu_state = vmx->loaded_vmcs; + host_state = &vmx->loaded_cpu_state->host_state; /* * Set host fs and gs selectors. Unfortunately, 22.2.3 does not * allow segment selectors with cpl > 0 or ti == 1. */ - vmx->host_state.ldt_sel = kvm_read_ldt(); + host_state->ldt_sel = kvm_read_ldt(); #ifdef CONFIG_X86_64 - savesegment(ds, vmx->host_state.ds_sel); - savesegment(es, vmx->host_state.es_sel); + savesegment(ds, host_state->ds_sel); + savesegment(es, host_state->es_sel); save_fsgs_for_kvm(); fs_sel = current->thread.fsindex; @@ -2609,12 +2619,12 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) gs_base = segment_base(gs_sel); #endif - vmx->host_state.fs_sel = fs_sel; + host_state->fs_sel = fs_sel; if (!(fs_sel & 7)) vmcs_write16(HOST_FS_SELECTOR, fs_sel); else vmcs_write16(HOST_FS_SELECTOR, 0); - vmx->host_state.gs_sel = gs_sel; + host_state->gs_sel = gs_sel; if (!(gs_sel & 7)) vmcs_write16(HOST_GS_SELECTOR, gs_sel); else @@ -2633,10 +2643,13 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) { + struct vmcs_host_state *host_state; + if (!vmx->loaded_cpu_state) return; WARN_ON_ONCE(vmx->loaded_cpu_state != vmx->loaded_vmcs); + host_state = &vmx->loaded_cpu_state->host_state; ++vmx->vcpu.stat.host_state_reload; vmx->loaded_cpu_state = NULL; @@ -2645,20 +2658,20 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) if (is_long_mode(&vmx->vcpu)) rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); #endif - if (vmx->host_state.ldt_sel || (vmx->host_state.gs_sel & 7)) { - kvm_load_ldt(vmx->host_state.ldt_sel); + if (host_state->ldt_sel || (host_state->gs_sel & 7)) { + kvm_load_ldt(host_state->ldt_sel); #ifdef CONFIG_X86_64 - load_gs_index(vmx->host_state.gs_sel); + load_gs_index(host_state->gs_sel); #else - loadsegment(gs, vmx->host_state.gs_sel); + loadsegment(gs, host_state->gs_sel); #endif } - if (vmx->host_state.fs_sel & 7) - loadsegment(fs, vmx->host_state.fs_sel); + if (host_state->fs_sel & 7) + loadsegment(fs, host_state->fs_sel); #ifdef CONFIG_X86_64 - if (unlikely(vmx->host_state.ds_sel | vmx->host_state.es_sel)) { - loadsegment(ds, vmx->host_state.ds_sel); - loadsegment(es, vmx->host_state.es_sel); + if (unlikely(host_state->ds_sel | host_state->es_sel)) { + loadsegment(ds, host_state->ds_sel); + loadsegment(es, host_state->es_sel); } #endif invalidate_tss_limit(); @@ -4470,6 +4483,9 @@ static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) evmcs->hv_enlightenments_control.msr_bitmap = 1; } } + + memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state)); + return 0; out_vmcs: @@ -5940,12 +5956,12 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) */ cr3 = __read_cr3(); vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */ - vmx->loaded_vmcs->vmcs_host_cr3 = cr3; + vmx->loaded_vmcs->host_state.cr3 = cr3; /* Save the most likely value for this task's CR4 in the VMCS. */ cr4 = cr4_read_shadow(); vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ - vmx->loaded_vmcs->vmcs_host_cr4 = cr4; + vmx->loaded_vmcs->host_state.cr4 = cr4; vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ #ifdef CONFIG_X86_64 @@ -10007,15 +10023,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); cr3 = __get_current_cr3_fast(); - if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) { + if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { vmcs_writel(HOST_CR3, cr3); - vmx->loaded_vmcs->vmcs_host_cr3 = cr3; + vmx->loaded_vmcs->host_state.cr3 = cr3; } cr4 = cr4_read_shadow(); - if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) { + if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { vmcs_writel(HOST_CR4, cr4); - vmx->loaded_vmcs->vmcs_host_cr4 = cr4; + vmx->loaded_vmcs->host_state.cr4 = cr4; } /* When single-stepping over STI and MOV SS, we must clear the -- 2.18.0