On Tue, Jun 13, 2023 at 01:30:35PM -0700, Sean Christopherson wrote: > Reject KVM_SET_SREGS{2} with -EINVAL if the incoming CR0 is invalid, > e.g. due to setting bits 63:32, illegal combinations, or to a value that > isn't allowed in VMX (non-)root mode. The VMX checks in particular are > "fun" as failure to disallow Real Mode for an L2 that is configured with > unrestricted guest disabled, when KVM itself has unrestricted guest > enabled, will result in KVM forcing VM86 mode to virtual Real Mode for > L2, but then fail to unwind the related metadata when synthesizing a > nested VM-Exit back to L1 (which has unrestricted guest enabled). > > Opportunistically fix a benign typo in the prototype for is_valid_cr4(). > > Cc: stable@xxxxxxxxxxxxxxx > Reported-by: syzbot+5feef0b9ee9c8e9e5689@xxxxxxxxxxxxxxxxxxxxxxxxx > Closes: https://lore.kernel.org/all/000000000000f316b705fdf6e2b4@xxxxxxxxxx > Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> > --- > arch/x86/include/asm/kvm-x86-ops.h | 1 + > arch/x86/include/asm/kvm_host.h | 3 ++- > arch/x86/kvm/svm/svm.c | 6 ++++++ > arch/x86/kvm/vmx/vmx.c | 28 ++++++++++++++++++------ > arch/x86/kvm/x86.c | 34 +++++++++++++++++++----------- > 5 files changed, 52 insertions(+), 20 deletions(-) > > diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h > index 13bc212cd4bc..e3054e3e46d5 100644 > --- a/arch/x86/include/asm/kvm-x86-ops.h > +++ b/arch/x86/include/asm/kvm-x86-ops.h > @@ -37,6 +37,7 @@ KVM_X86_OP(get_segment) > KVM_X86_OP(get_cpl) > KVM_X86_OP(set_segment) > KVM_X86_OP(get_cs_db_l_bits) > +KVM_X86_OP(is_valid_cr0) > KVM_X86_OP(set_cr0) > KVM_X86_OP_OPTIONAL(post_set_cr3) > KVM_X86_OP(is_valid_cr4) > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 28bd38303d70..3bc146dfd38d 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -1566,9 +1566,10 @@ struct kvm_x86_ops { > void (*set_segment)(struct kvm_vcpu *vcpu, > struct kvm_segment *var, int seg); > void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); > + bool (*is_valid_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); > void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); > void (*post_set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); > - bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0); > + bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); > void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); > int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); > void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c > index e265834fe859..b29d0650582e 100644 > --- a/arch/x86/kvm/svm/svm.c > +++ b/arch/x86/kvm/svm/svm.c > @@ -1786,6 +1786,11 @@ static void sev_post_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) > } > } > > +static bool svm_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) > +{ > + return true; > +} > + > void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) > { > struct vcpu_svm *svm = to_svm(vcpu); > @@ -4815,6 +4820,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { > .set_segment = svm_set_segment, > .get_cpl = svm_get_cpl, > .get_cs_db_l_bits = svm_get_cs_db_l_bits, > + .is_valid_cr0 = svm_is_valid_cr0, > .set_cr0 = svm_set_cr0, > .post_set_cr3 = sev_post_set_cr3, > .is_valid_cr4 = svm_is_valid_cr4, > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index 0ecf4be2c6af..355b0e8c9b00 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -3037,6 +3037,15 @@ static void enter_rmode(struct kvm_vcpu *vcpu) > struct vcpu_vmx *vmx = to_vmx(vcpu); > struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm); > > + /* > + * KVM should never use VM86 to virtualize Real Mode when L2 is active, > + * as using VM86 is unnecessary if unrestricted guest is enabled, and > + * if unrestricted guest is disabled, VM-Enter (from L1) with CR0.PG=0 > + * should VM-Fail and KVM should reject userspace attempts to stuff VM Enry shall fail(with CR0.PG=0), because SECONDARY_EXEC_UNRESTRICTED_GUEST will be cleared in L1's secondary_ctls_high MSR, and hence in its VMCS12? When will an unrestricted L1 run L2 as a restricted one? Shadow on EPT(L0 uses EPT for L1 and L1 uses shadow for L2)? > + * CR0.PG=0 when L2 is active. > + */ > + WARN_ON_ONCE(is_guest_mode(vcpu)); > + B.R. Yu