On 1/21/21 8:07 AM, Maxim Levitsky wrote: > On Thu, 2021-01-21 at 01:55 -0500, Wei Huang wrote: >> From: Bandan Das <bsd@xxxxxxxxxx> >> >> While running SVM related instructions (VMRUN/VMSAVE/VMLOAD), some AMD >> CPUs check EAX against reserved memory regions (e.g. SMM memory on host) >> before checking VMCB's instruction intercept. If EAX falls into such >> memory areas, #GP is triggered before VMEXIT. This causes problem under >> nested virtualization. To solve this problem, KVM needs to trap #GP and >> check the instructions triggering #GP. For VM execution instructions, >> KVM emulates these instructions. >> >> Co-developed-by: Wei Huang <wei.huang2@xxxxxxx> >> Signed-off-by: Wei Huang <wei.huang2@xxxxxxx> >> Signed-off-by: Bandan Das <bsd@xxxxxxxxxx> >> --- >> arch/x86/kvm/svm/svm.c | 99 ++++++++++++++++++++++++++++++++++-------- >> 1 file changed, 81 insertions(+), 18 deletions(-) >> >> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c >> index 7ef171790d02..6ed523cab068 100644 >> --- a/arch/x86/kvm/svm/svm.c >> +++ b/arch/x86/kvm/svm/svm.c >> @@ -288,6 +288,9 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) >> if (!(efer & EFER_SVME)) { >> svm_leave_nested(svm); >> svm_set_gif(svm, true); >> + /* #GP intercept is still needed in vmware_backdoor */ >> + if (!enable_vmware_backdoor) >> + clr_exception_intercept(svm, GP_VECTOR); > Again I would prefer a flag for the errata workaround, but this is still > better. Instead of using !enable_vmware_backdoor, will the following be better? Or the existing form is acceptable. if (!kvm_cpu_cap_has(X86_FEATURE_SVME_ADDR_CHK)) clr_exception_intercept(svm, GP_VECTOR); > >> >> /* >> * Free the nested guest state, unless we are in SMM. >> @@ -309,6 +312,9 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) >> >> svm->vmcb->save.efer = efer | EFER_SVME; >> vmcb_mark_dirty(svm->vmcb, VMCB_CR); >> + /* Enable #GP interception for SVM instructions */ >> + set_exception_intercept(svm, GP_VECTOR); >> + >> return 0; >> } >> >> @@ -1957,24 +1963,6 @@ static int ac_interception(struct vcpu_svm *svm) >> return 1; >> } >> >> -static int gp_interception(struct vcpu_svm *svm) >> -{ >> - struct kvm_vcpu *vcpu = &svm->vcpu; >> - u32 error_code = svm->vmcb->control.exit_info_1; >> - >> - WARN_ON_ONCE(!enable_vmware_backdoor); >> - >> - /* >> - * VMware backdoor emulation on #GP interception only handles IN{S}, >> - * OUT{S}, and RDPMC, none of which generate a non-zero error code. >> - */ >> - if (error_code) { >> - kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); >> - return 1; >> - } >> - return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP); >> -} >> - >> static bool is_erratum_383(void) >> { >> int err, i; >> @@ -2173,6 +2161,81 @@ static int vmrun_interception(struct vcpu_svm *svm) >> return nested_svm_vmrun(svm); >> } >> >> +enum { >> + NOT_SVM_INSTR, >> + SVM_INSTR_VMRUN, >> + SVM_INSTR_VMLOAD, >> + SVM_INSTR_VMSAVE, >> +}; >> + >> +/* Return NOT_SVM_INSTR if not SVM instrs, otherwise return decode result */ >> +static int svm_instr_opcode(struct kvm_vcpu *vcpu) >> +{ >> + struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; >> + >> + if (ctxt->b != 0x1 || ctxt->opcode_len != 2) >> + return NOT_SVM_INSTR; >> + >> + switch (ctxt->modrm) { >> + case 0xd8: /* VMRUN */ >> + return SVM_INSTR_VMRUN; >> + case 0xda: /* VMLOAD */ >> + return SVM_INSTR_VMLOAD; >> + case 0xdb: /* VMSAVE */ >> + return SVM_INSTR_VMSAVE; >> + default: >> + break; >> + } >> + >> + return NOT_SVM_INSTR; >> +} >> + >> +static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode) >> +{ >> + int (*const svm_instr_handlers[])(struct vcpu_svm *svm) = { >> + [SVM_INSTR_VMRUN] = vmrun_interception, >> + [SVM_INSTR_VMLOAD] = vmload_interception, >> + [SVM_INSTR_VMSAVE] = vmsave_interception, >> + }; >> + struct vcpu_svm *svm = to_svm(vcpu); >> + >> + return svm_instr_handlers[opcode](svm); >> +} >> + >> +/* >> + * #GP handling code. Note that #GP can be triggered under the following two >> + * cases: >> + * 1) SVM VM-related instructions (VMRUN/VMSAVE/VMLOAD) that trigger #GP on >> + * some AMD CPUs when EAX of these instructions are in the reserved memory >> + * regions (e.g. SMM memory on host). >> + * 2) VMware backdoor >> + */ >> +static int gp_interception(struct vcpu_svm *svm) >> +{ >> + struct kvm_vcpu *vcpu = &svm->vcpu; >> + u32 error_code = svm->vmcb->control.exit_info_1; >> + int opcode; >> + >> + /* Both #GP cases have zero error_code */ > > I would have kept the original description of possible #GP reasons > for the VMWARE backdoor and that WARN_ON_ONCE that was removed. > Will do > >> + if (error_code) >> + goto reinject; >> + >> + /* Decode the instruction for usage later */ >> + if (x86_emulate_decoded_instruction(vcpu, 0, NULL, 0) != EMULATION_OK) >> + goto reinject; >> + >> + opcode = svm_instr_opcode(vcpu); >> + if (opcode) > > I prefer opcode != NOT_SVM_INSTR. > >> + return emulate_svm_instr(vcpu, opcode); >> + else > > 'WARN_ON_ONCE(!enable_vmware_backdoor)' I think can be placed here. > > >> + return kvm_emulate_instruction(vcpu, >> + EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE); > > I tested the vmware backdoor a bit (using the kvm unit tests) and I found out a tiny pre-existing bug > there: > > We shouldn't emulate the vmware backdoor for a nested guest, but rather let it do it. > > The below patch (on top of your patches) works for me and allows the vmware backdoor > test to pass when kvm unit tests run in a guest. > This fix can be a separate patch? This problem exist even before this patchset. > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c > index fe97b0e41824a..4557fdc9c3e1b 100644 > --- a/arch/x86/kvm/svm/svm.c > +++ b/arch/x86/kvm/svm/svm.c > @@ -2243,7 +2243,7 @@ static int gp_interception(struct vcpu_svm *svm) > opcode = svm_instr_opcode(vcpu); > if (opcode) > return emulate_svm_instr(vcpu, opcode); > - else > + else if (!is_guest_mode(vcpu)) > return kvm_emulate_instruction(vcpu, > EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE); > > > > Best regards, > Maxim Levitsky > >> + >> +reinject: >> + kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); >> + return 1; >> +} >> + >> void svm_set_gif(struct vcpu_svm *svm, bool value) >> { >> if (value) { > > > > >