Sean Christopherson <sean.j.christopherson@xxxxxxxxx> writes: > Intel CPUs manually save and load CR4.VMXE internally instead of having > it automatically switched via the SMM State Save. Specifically, the > SDM uses the following pseudocode to describe SMI and RSM: > > SMI: > enter SMM; > save the following internal to the processor: > CR4.VMXE > an indication of whether the logical processor was in VMX operation (root or non-root) > IF the logical processor is in VMX operation THEN > save current VMCS pointer internal to the processor; > leave VMX operation; > save VMX-critical state defined below > FI; > CR4.VMXE <- 0; > perform ordinary SMI delivery: > save processor state in SMRAM; > set processor state to standard SMM values; > > RSM: > IF VMXE = 1 in CR4 image in SMRAM THEN > fail and enter shutdown state; > ELSE > restore state normally from SMRAM; > <unrelated stuff omitted for brevity> > FI; > CR4.VMXE <- value stored internally; > <more stuff omitted for brevity> > leave SMM; > > Presumably, the manual handling of CR4.VMXE is done to avoid having a > to special case CR4.VMXE in the legacy state save/load flows. As luck > would have it, KVM has a similar conundrum in its SMM state save/load > flows in that vmx_set_cr4() forbids CR4.VMXE from being set while in > SMM. The check in vmx_set_cr4() can cause RSM to fail when loading > CR4 from the SMM save state area. > > Take a page out of the SDM and manually handle CR4.VMXE during SMI and > RSM. Alternatively, HF_SMM_MASK could be temporarily disabled when > setting CR4 as part of RSM, but doing so is rather ugly due to the need > to plumb the "from_rsm" information through to emulator_set_cr(), and > also opens up a virtualization hole, e.g. KVM would not properly handle > the (extremely unlikely) scenario where the guest toggles CR4.VMXE in > the SMM save state area from 0->1 > > Reported-by: Jon Doron <arilou@xxxxxxxxx> > Suggested-by: Jim Mattson <jmattson@xxxxxxxxxx> > Cc: Liran Alon <liran.alon@xxxxxxxxxx> > Cc: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> > Fixes: 5bea5123cbf0 ("KVM: VMX: check nested state and CR4.VMXE against SMM") > Signed-off-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx> > --- > arch/x86/include/asm/kvm_emulate.h | 1 + > arch/x86/include/asm/kvm_host.h | 2 ++ > arch/x86/kvm/emulate.c | 2 ++ > arch/x86/kvm/svm.c | 12 ++++++++++++ > arch/x86/kvm/vmx/vmx.c | 22 ++++++++++++++++++++++ > arch/x86/kvm/vmx/vmx.h | 2 ++ > arch/x86/kvm/x86.c | 9 +++++++++ > 7 files changed, 50 insertions(+) > > diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h > index b2a65d08d1f8..02fa7ec89315 100644 > --- a/arch/x86/include/asm/kvm_emulate.h > +++ b/arch/x86/include/asm/kvm_emulate.h > @@ -227,6 +227,7 @@ struct x86_emulate_ops { > unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt); > void (*set_hflags)(struct x86_emulate_ctxt *ctxt, unsigned hflags); > int (*pre_rsm_load_state)(struct x86_emulate_ctxt *ctxt, u64 smbase); > + void (*post_rsm_load_state)(struct x86_emulate_ctxt *ctxt); > > }; > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 29ce45f41ee5..0ceb8b353dc0 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -1181,8 +1181,10 @@ struct kvm_x86_ops { > void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu); > > int (*smi_allowed)(struct kvm_vcpu *vcpu); > + void (*pre_smi_save_state)(struct kvm_vcpu *vcpu); > int (*post_smi_save_state)(struct kvm_vcpu *vcpu, char *smstate); > int (*pre_rsm_load_state)(struct kvm_vcpu *vcpu, u64 smbase); > + void (*post_rsm_load_state)(struct kvm_vcpu *vcpu); > int (*enable_smi_window)(struct kvm_vcpu *vcpu); > > int (*mem_enc_op)(struct kvm *kvm, void __user *argp); > diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c > index ca60eb3358d9..706accf524c3 100644 > --- a/arch/x86/kvm/emulate.c > +++ b/arch/x86/kvm/emulate.c > @@ -2625,6 +2625,8 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) > return X86EMUL_UNHANDLEABLE; > } > > + ctxt->ops->post_rsm_load_state(ctxt); > + > if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0) > ctxt->ops->set_nmi_mask(ctxt, false); > > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index c0e511c3b298..edc12fb39b32 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -6193,6 +6193,11 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu) > return 1; > } > > +static void svm_pre_smi_save_state(struct kvm_vcpu *vcpu) > +{ > + > +} > + > static int svm_post_smi_save_state(struct kvm_vcpu *vcpu, char *smstate) > { > struct vcpu_svm *svm = to_svm(vcpu); > @@ -6243,6 +6248,11 @@ static int svm_pre_rsm_load_state(struct kvm_vcpu *vcpu, u64 smbase) > return ret; > } > > +static void svm_post_rsm_load_state(struct kvm_vcpu *vcpu) > +{ > + > +} > + > static int enable_smi_window(struct kvm_vcpu *vcpu) > { > struct vcpu_svm *svm = to_svm(vcpu); > @@ -7251,8 +7261,10 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { > .setup_mce = svm_setup_mce, > > .smi_allowed = svm_smi_allowed, > + .pre_smi_save_state = svm_pre_smi_save_state, > .post_smi_save_state = svm_post_smi_save_state, > .pre_rsm_load_state = svm_pre_rsm_load_state, > + .post_rsm_load_state = svm_post_rsm_load_state, > .enable_smi_window = enable_smi_window, > > .mem_enc_op = svm_mem_enc_op, > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index fc96101e39ac..617443c1c6d7 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -7354,6 +7354,16 @@ static int vmx_smi_allowed(struct kvm_vcpu *vcpu) > return 1; > } > > +static void vmx_pre_smi_save_state(struct kvm_vcpu *vcpu) > +{ > + struct vcpu_vmx *vmx = to_vmx(vcpu); > + > + if (kvm_read_cr4(vcpu) & X86_CR4_VMXE) { > + vmx->nested.smm.cr4_vmxe = true; > + WARN_ON(vmx_set_cr4(vcpu, kvm_read_cr4(vcpu) & ~X86_CR4_VMXE)); This WARN_ON fires: vmx_set_cr4() has the following check: if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4)) return 1; X86_CR4_VMXE can't be unset while nested.vmxon is on.... > + } > +} > + > static int vmx_post_smi_save_state(struct kvm_vcpu *vcpu, char *smstate) > { > struct vcpu_vmx *vmx = to_vmx(vcpu); > @@ -7390,6 +7400,16 @@ static int vmx_pre_rsm_load_state(struct kvm_vcpu *vcpu, u64 smbase) > return 0; > } > > +static void vmx_post_rsm_load_state(struct kvm_vcpu *vcpu) > +{ > + struct vcpu_vmx *vmx = to_vmx(vcpu); > + > + if (vmx->nested.smm.cr4_vmxe) { > + WARN_ON(vmx_set_cr4(vcpu, kvm_read_cr4(vcpu) | X86_CR4_VMXE)); > + vmx->nested.smm.cr4_vmxe = false; If we manage to pass the previous problem this will likely fail: post_rsm_load_state() is called with HF_SMM_MASK still set. > + } > +} > + > static int enable_smi_window(struct kvm_vcpu *vcpu) > { > return 0; > @@ -7693,8 +7713,10 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { > .setup_mce = vmx_setup_mce, > > .smi_allowed = vmx_smi_allowed, > + .pre_smi_save_state = vmx_pre_smi_save_state, > .post_smi_save_state = vmx_post_smi_save_state, > .pre_rsm_load_state = vmx_pre_rsm_load_state, > + .post_rsm_load_state = vmx_post_rsm_load_state, > .enable_smi_window = enable_smi_window, > > .check_nested_events = NULL, > diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h > index a1e00d0a2482..a3444625ca7f 100644 > --- a/arch/x86/kvm/vmx/vmx.h > +++ b/arch/x86/kvm/vmx/vmx.h > @@ -162,6 +162,8 @@ struct nested_vmx { > > /* SMM related state */ > struct { > + /* CR4.VMXE=1 on SMM entry? */ > + bool cr4_vmxe; > /* in VMX operation on SMM entry? */ > bool vmxon; > /* in guest mode on SMM entry? */ > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index abfac99fb7c5..1b07706df840 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -5966,6 +5966,11 @@ static int emulator_pre_rsm_load_state(struct x86_emulate_ctxt *ctxt, u64 smbase > return kvm_x86_ops->pre_rsm_load_state(emul_to_vcpu(ctxt), smbase); > } > > +static void emulator_post_rsm_load_state(struct x86_emulate_ctxt *ctxt) > +{ > + kvm_x86_ops->post_rsm_load_state(emul_to_vcpu(ctxt)); > +} > + > static const struct x86_emulate_ops emulate_ops = { > .read_gpr = emulator_read_gpr, > .write_gpr = emulator_write_gpr, > @@ -6006,6 +6011,7 @@ static const struct x86_emulate_ops emulate_ops = { > .get_hflags = emulator_get_hflags, > .set_hflags = emulator_set_hflags, > .pre_rsm_load_state = emulator_pre_rsm_load_state, > + .post_rsm_load_state = emulator_post_rsm_load_state, > }; > > static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) > @@ -7516,6 +7522,9 @@ static void enter_smm(struct kvm_vcpu *vcpu) > u32 cr0; > > trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true); > + > + kvm_x86_ops->pre_smi_save_state(vcpu); > + > memset(buf, 0, 512); > if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) > enter_smm_save_state_64(vcpu, buf); -- Vitaly