Intel CPUs manually save and load CR4.VMXE internally instead of having it automatically switched via the SMM State Save. Specifically, the SDM uses the following pseudocode to describe SMI and RSM: SMI: enter SMM; save the following internal to the processor: CR4.VMXE an indication of whether the logical processor was in VMX operation (root or non-root) IF the logical processor is in VMX operation THEN save current VMCS pointer internal to the processor; leave VMX operation; save VMX-critical state defined below FI; CR4.VMXE <- 0; perform ordinary SMI delivery: save processor state in SMRAM; set processor state to standard SMM values; RSM: IF VMXE = 1 in CR4 image in SMRAM THEN fail and enter shutdown state; ELSE restore state normally from SMRAM; <unrelated stuff omitted for brevity> FI; CR4.VMXE <- value stored internally; <more stuff omitted for brevity> leave SMM; Presumably, the manual handling of CR4.VMXE is done to avoid having a to special case CR4.VMXE in the legacy state save/load flows. As luck would have it, KVM has a similar conundrum in its SMM state save/load flows in that vmx_set_cr4() forbids CR4.VMXE from being set while in SMM. The check in vmx_set_cr4() can cause RSM to fail when loading CR4 from the SMM save state area. Take a page out of the SDM and manually handle CR4.VMXE during SMI and RSM. Alternatively, HF_SMM_MASK could be temporarily disabled when setting CR4 as part of RSM, but doing so is rather ugly due to the need to plumb the "from_rsm" information through to emulator_set_cr(), and also opens up a virtualization hole, e.g. KVM would not properly handle the (extremely unlikely) scenario where the guest toggles CR4.VMXE in the SMM save state area from 0->1 Reported-by: Jon Doron <arilou@xxxxxxxxx> Suggested-by: Jim Mattson <jmattson@xxxxxxxxxx> Cc: Liran Alon <liran.alon@xxxxxxxxxx> Cc: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> Fixes: 5bea5123cbf0 ("KVM: VMX: check nested state and CR4.VMXE against SMM") Signed-off-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx> --- arch/x86/include/asm/kvm_emulate.h | 1 + arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/emulate.c | 2 ++ arch/x86/kvm/svm.c | 12 ++++++++++++ arch/x86/kvm/vmx/vmx.c | 22 ++++++++++++++++++++++ arch/x86/kvm/vmx/vmx.h | 2 ++ arch/x86/kvm/x86.c | 9 +++++++++ 7 files changed, 50 insertions(+) diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index b2a65d08d1f8..02fa7ec89315 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -227,6 +227,7 @@ struct x86_emulate_ops { unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt); void (*set_hflags)(struct x86_emulate_ctxt *ctxt, unsigned hflags); int (*pre_rsm_load_state)(struct x86_emulate_ctxt *ctxt, u64 smbase); + void (*post_rsm_load_state)(struct x86_emulate_ctxt *ctxt); }; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 29ce45f41ee5..0ceb8b353dc0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1181,8 +1181,10 @@ struct kvm_x86_ops { void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu); int (*smi_allowed)(struct kvm_vcpu *vcpu); + void (*pre_smi_save_state)(struct kvm_vcpu *vcpu); int (*post_smi_save_state)(struct kvm_vcpu *vcpu, char *smstate); int (*pre_rsm_load_state)(struct kvm_vcpu *vcpu, u64 smbase); + void (*post_rsm_load_state)(struct kvm_vcpu *vcpu); int (*enable_smi_window)(struct kvm_vcpu *vcpu); int (*mem_enc_op)(struct kvm *kvm, void __user *argp); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index ca60eb3358d9..706accf524c3 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2625,6 +2625,8 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) return X86EMUL_UNHANDLEABLE; } + ctxt->ops->post_rsm_load_state(ctxt); + if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0) ctxt->ops->set_nmi_mask(ctxt, false); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c0e511c3b298..edc12fb39b32 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -6193,6 +6193,11 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu) return 1; } +static void svm_pre_smi_save_state(struct kvm_vcpu *vcpu) +{ + +} + static int svm_post_smi_save_state(struct kvm_vcpu *vcpu, char *smstate) { struct vcpu_svm *svm = to_svm(vcpu); @@ -6243,6 +6248,11 @@ static int svm_pre_rsm_load_state(struct kvm_vcpu *vcpu, u64 smbase) return ret; } +static void svm_post_rsm_load_state(struct kvm_vcpu *vcpu) +{ + +} + static int enable_smi_window(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -7251,8 +7261,10 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .setup_mce = svm_setup_mce, .smi_allowed = svm_smi_allowed, + .pre_smi_save_state = svm_pre_smi_save_state, .post_smi_save_state = svm_post_smi_save_state, .pre_rsm_load_state = svm_pre_rsm_load_state, + .post_rsm_load_state = svm_post_rsm_load_state, .enable_smi_window = enable_smi_window, .mem_enc_op = svm_mem_enc_op, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index fc96101e39ac..617443c1c6d7 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7354,6 +7354,16 @@ static int vmx_smi_allowed(struct kvm_vcpu *vcpu) return 1; } +static void vmx_pre_smi_save_state(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (kvm_read_cr4(vcpu) & X86_CR4_VMXE) { + vmx->nested.smm.cr4_vmxe = true; + WARN_ON(vmx_set_cr4(vcpu, kvm_read_cr4(vcpu) & ~X86_CR4_VMXE)); + } +} + static int vmx_post_smi_save_state(struct kvm_vcpu *vcpu, char *smstate) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -7390,6 +7400,16 @@ static int vmx_pre_rsm_load_state(struct kvm_vcpu *vcpu, u64 smbase) return 0; } +static void vmx_post_rsm_load_state(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (vmx->nested.smm.cr4_vmxe) { + WARN_ON(vmx_set_cr4(vcpu, kvm_read_cr4(vcpu) | X86_CR4_VMXE)); + vmx->nested.smm.cr4_vmxe = false; + } +} + static int enable_smi_window(struct kvm_vcpu *vcpu) { return 0; @@ -7693,8 +7713,10 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .setup_mce = vmx_setup_mce, .smi_allowed = vmx_smi_allowed, + .pre_smi_save_state = vmx_pre_smi_save_state, .post_smi_save_state = vmx_post_smi_save_state, .pre_rsm_load_state = vmx_pre_rsm_load_state, + .post_rsm_load_state = vmx_post_rsm_load_state, .enable_smi_window = enable_smi_window, .check_nested_events = NULL, diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index a1e00d0a2482..a3444625ca7f 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -162,6 +162,8 @@ struct nested_vmx { /* SMM related state */ struct { + /* CR4.VMXE=1 on SMM entry? */ + bool cr4_vmxe; /* in VMX operation on SMM entry? */ bool vmxon; /* in guest mode on SMM entry? */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index abfac99fb7c5..1b07706df840 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5966,6 +5966,11 @@ static int emulator_pre_rsm_load_state(struct x86_emulate_ctxt *ctxt, u64 smbase return kvm_x86_ops->pre_rsm_load_state(emul_to_vcpu(ctxt), smbase); } +static void emulator_post_rsm_load_state(struct x86_emulate_ctxt *ctxt) +{ + kvm_x86_ops->post_rsm_load_state(emul_to_vcpu(ctxt)); +} + static const struct x86_emulate_ops emulate_ops = { .read_gpr = emulator_read_gpr, .write_gpr = emulator_write_gpr, @@ -6006,6 +6011,7 @@ static const struct x86_emulate_ops emulate_ops = { .get_hflags = emulator_get_hflags, .set_hflags = emulator_set_hflags, .pre_rsm_load_state = emulator_pre_rsm_load_state, + .post_rsm_load_state = emulator_post_rsm_load_state, }; static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) @@ -7516,6 +7522,9 @@ static void enter_smm(struct kvm_vcpu *vcpu) u32 cr0; trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true); + + kvm_x86_ops->pre_smi_save_state(vcpu); + memset(buf, 0, 512); if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) enter_smm_save_state_64(vcpu, buf); -- 2.21.0