On 28/03/19 10:42, Vitaly Kuznetsov wrote: >> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c >> index fc96101e39ac..617443c1c6d7 100644 >> --- a/arch/x86/kvm/vmx/vmx.c >> +++ b/arch/x86/kvm/vmx/vmx.c >> @@ -7354,6 +7354,16 @@ static int vmx_smi_allowed(struct kvm_vcpu *vcpu) >> return 1; >> } >> >> +static void vmx_pre_smi_save_state(struct kvm_vcpu *vcpu) >> +{ >> + struct vcpu_vmx *vmx = to_vmx(vcpu); >> + >> + if (kvm_read_cr4(vcpu) & X86_CR4_VMXE) { >> + vmx->nested.smm.cr4_vmxe = true; >> + WARN_ON(vmx_set_cr4(vcpu, kvm_read_cr4(vcpu) & ~X86_CR4_VMXE)); > > This WARN_ON fires: vmx_set_cr4() has the following check: > > if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4)) > return 1; > > X86_CR4_VMXE can't be unset while nested.vmxon is on.... Could be fixed like this: diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 47fbf91fe902..31ef4973e90a 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2874,15 +2874,29 @@ void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) vmcs_writel(GUEST_CR3, guest_cr3); } +static void __vmx_set_cr4_shadow(struct vcpu_vmx *vmx, unsigned long cr4, + unsigned long hw_cr4) +{ + WARN_ON((hw_cr4 ^ cr4) & ~vmx->vcpu.arch.cr4_guest_owned_bits); + vmcs_writel(CR4_READ_SHADOW, cr4); +} + +static void vmx_set_cr4_shadow(struct vcpu_vmx *vmx, unsigned long cr4) +{ + unsigned long hw_cr4 = vmcs_readl(GUEST_CR4); + __vmx_set_cr4_shadow(vmx, cr4, hw_cr4); +} + int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { + struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long hw_cr4; + /* * Pass through host's Machine Check Enable value to hw_cr4, which * is in force while we are in guest mode. Do not let guests control * this bit, even if host CR4.MCE == 0. */ - unsigned long hw_cr4; - hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE); if (enable_unrestricted_guest) hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST; @@ -2944,8 +2958,8 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE); } - vmcs_writel(CR4_READ_SHADOW, cr4); vmcs_writel(GUEST_CR4, hw_cr4); + __vmx_set_cr4_shadow(vmx, cr4, hw_cr4); return 0; } @@ -7361,7 +7375,13 @@ static void vmx_pre_smi_save_state(struct kvm_vcpu *vcpu) if (kvm_read_cr4(vcpu) & X86_CR4_VMXE) { vmx->nested.smm.cr4_vmxe = true; - WARN_ON(vmx_set_cr4(vcpu, kvm_read_cr4(vcpu) & ~X86_CR4_VMXE)); + /* + * We cannot move nested.vmxon to nested.smm.vmxon until after + * the state has been saved, on the other hand we need to + * clear CR4.VMXE *before* it is saved to the SMRAM state save + * area. Fudge it directly in the VMCS. + */ + vmx_set_cr4_shadow(vmx, kvm_read_cr4(vcpu) & ~X86_CR4_VMXE); } } > >> + } >> +} >> + >> static int vmx_post_smi_save_state(struct kvm_vcpu *vcpu, char *smstate) >> { >> struct vcpu_vmx *vmx = to_vmx(vcpu); >> @@ -7390,6 +7400,16 @@ static int vmx_pre_rsm_load_state(struct kvm_vcpu *vcpu, u64 smbase) >> return 0; >> } >> >> +static void vmx_post_rsm_load_state(struct kvm_vcpu *vcpu) >> +{ >> + struct vcpu_vmx *vmx = to_vmx(vcpu); >> + >> + if (vmx->nested.smm.cr4_vmxe) { >> + WARN_ON(vmx_set_cr4(vcpu, kvm_read_cr4(vcpu) | X86_CR4_VMXE)); >> + vmx->nested.smm.cr4_vmxe = false; > > If we manage to pass the previous problem this will likely fail: > > post_rsm_load_state() is called with HF_SMM_MASK still set. And this is hopefully just a matter of diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 706accf524c3..a508d07d7483 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2625,13 +2625,14 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) return X86EMUL_UNHANDLEABLE; } - ctxt->ops->post_rsm_load_state(ctxt); - if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0) ctxt->ops->set_nmi_mask(ctxt, false); ctxt->ops->set_hflags(ctxt, ctxt->ops->get_hflags(ctxt) & ~(X86EMUL_SMM_INSIDE_NMI_MASK | X86EMUL_SMM_MASK)); + + ctxt->ops->post_rsm_load_state(ctxt); + return X86EMUL_CONTINUE; } but it's probably a good idea to also use vmx_set_cr4_shadow in vmx_post_rsm_load_state. At the same time, we should really try to unify the callbacks so that at least Intel only uses pre_rsm_save and post_rsm_load. After I have a test case, I'll refresh Sean's patches and send them out. Paolo >> + } >> +} >> + >> static int enable_smi_window(struct kvm_vcpu *vcpu) >> { >> return 0; >> @@ -7693,8 +7713,10 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { >> .setup_mce = vmx_setup_mce, >> >> .smi_allowed = vmx_smi_allowed, >> + .pre_smi_save_state = vmx_pre_smi_save_state, >> .post_smi_save_state = vmx_post_smi_save_state, >> .pre_rsm_load_state = vmx_pre_rsm_load_state, >> + .post_rsm_load_state = vmx_post_rsm_load_state, >> .enable_smi_window = enable_smi_window, >> >> .check_nested_events = NULL, >> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h >> index a1e00d0a2482..a3444625ca7f 100644 >> --- a/arch/x86/kvm/vmx/vmx.h >> +++ b/arch/x86/kvm/vmx/vmx.h >> @@ -162,6 +162,8 @@ struct nested_vmx { >> >> /* SMM related state */ >> struct { >> + /* CR4.VMXE=1 on SMM entry? */ >> + bool cr4_vmxe; >> /* in VMX operation on SMM entry? */ >> bool vmxon; >> /* in guest mode on SMM entry? */ >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >> index abfac99fb7c5..1b07706df840 100644 >> --- a/arch/x86/kvm/x86.c >> +++ b/arch/x86/kvm/x86.c >> @@ -5966,6 +5966,11 @@ static int emulator_pre_rsm_load_state(struct x86_emulate_ctxt *ctxt, u64 smbase >> return kvm_x86_ops->pre_rsm_load_state(emul_to_vcpu(ctxt), smbase); >> } >> >> +static void emulator_post_rsm_load_state(struct x86_emulate_ctxt *ctxt) >> +{ >> + kvm_x86_ops->post_rsm_load_state(emul_to_vcpu(ctxt)); >> +} >> + >> static const struct x86_emulate_ops emulate_ops = { >> .read_gpr = emulator_read_gpr, >> .write_gpr = emulator_write_gpr, >> @@ -6006,6 +6011,7 @@ static const struct x86_emulate_ops emulate_ops = { >> .get_hflags = emulator_get_hflags, >> .set_hflags = emulator_set_hflags, >> .pre_rsm_load_state = emulator_pre_rsm_load_state, >> + .post_rsm_load_state = emulator_post_rsm_load_state, >> }; >> >> static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) >> @@ -7516,6 +7522,9 @@ static void enter_smm(struct kvm_vcpu *vcpu) >> u32 cr0; >> >> trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true); >> + >> + kvm_x86_ops->pre_smi_save_state(vcpu); >> + >> memset(buf, 0, 512); >> if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) >> enter_smm_save_state_64(vcpu, buf); >