From: Bandan Das <bsd@xxxxxxxxxx> While running SVM related instructions (VMRUN/VMSAVE/VMLOAD), some AMD CPUs check EAX against reserved memory regions (e.g. SMM memory on host) before checking VMCB's instruction intercept. If EAX falls into such memory areas, #GP is triggered before VMEXIT. This causes problem under nested virtualization. To solve this problem, KVM needs to trap #GP and check the instructions triggering #GP. For VM execution instructions, KVM emulates these instructions. Co-developed-by: Wei Huang <wei.huang2@xxxxxxx> Signed-off-by: Wei Huang <wei.huang2@xxxxxxx> Signed-off-by: Bandan Das <bsd@xxxxxxxxxx> --- arch/x86/kvm/svm/svm.c | 99 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 81 insertions(+), 18 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 7ef171790d02..6ed523cab068 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -288,6 +288,9 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) if (!(efer & EFER_SVME)) { svm_leave_nested(svm); svm_set_gif(svm, true); + /* #GP intercept is still needed in vmware_backdoor */ + if (!enable_vmware_backdoor) + clr_exception_intercept(svm, GP_VECTOR); /* * Free the nested guest state, unless we are in SMM. @@ -309,6 +312,9 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) svm->vmcb->save.efer = efer | EFER_SVME; vmcb_mark_dirty(svm->vmcb, VMCB_CR); + /* Enable #GP interception for SVM instructions */ + set_exception_intercept(svm, GP_VECTOR); + return 0; } @@ -1957,24 +1963,6 @@ static int ac_interception(struct vcpu_svm *svm) return 1; } -static int gp_interception(struct vcpu_svm *svm) -{ - struct kvm_vcpu *vcpu = &svm->vcpu; - u32 error_code = svm->vmcb->control.exit_info_1; - - WARN_ON_ONCE(!enable_vmware_backdoor); - - /* - * VMware backdoor emulation on #GP interception only handles IN{S}, - * OUT{S}, and RDPMC, none of which generate a non-zero error code. - */ - if (error_code) { - kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); - return 1; - } - return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP); -} - static bool is_erratum_383(void) { int err, i; @@ -2173,6 +2161,81 @@ static int vmrun_interception(struct vcpu_svm *svm) return nested_svm_vmrun(svm); } +enum { + NOT_SVM_INSTR, + SVM_INSTR_VMRUN, + SVM_INSTR_VMLOAD, + SVM_INSTR_VMSAVE, +}; + +/* Return NOT_SVM_INSTR if not SVM instrs, otherwise return decode result */ +static int svm_instr_opcode(struct kvm_vcpu *vcpu) +{ + struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; + + if (ctxt->b != 0x1 || ctxt->opcode_len != 2) + return NOT_SVM_INSTR; + + switch (ctxt->modrm) { + case 0xd8: /* VMRUN */ + return SVM_INSTR_VMRUN; + case 0xda: /* VMLOAD */ + return SVM_INSTR_VMLOAD; + case 0xdb: /* VMSAVE */ + return SVM_INSTR_VMSAVE; + default: + break; + } + + return NOT_SVM_INSTR; +} + +static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode) +{ + int (*const svm_instr_handlers[])(struct vcpu_svm *svm) = { + [SVM_INSTR_VMRUN] = vmrun_interception, + [SVM_INSTR_VMLOAD] = vmload_interception, + [SVM_INSTR_VMSAVE] = vmsave_interception, + }; + struct vcpu_svm *svm = to_svm(vcpu); + + return svm_instr_handlers[opcode](svm); +} + +/* + * #GP handling code. Note that #GP can be triggered under the following two + * cases: + * 1) SVM VM-related instructions (VMRUN/VMSAVE/VMLOAD) that trigger #GP on + * some AMD CPUs when EAX of these instructions are in the reserved memory + * regions (e.g. SMM memory on host). + * 2) VMware backdoor + */ +static int gp_interception(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + u32 error_code = svm->vmcb->control.exit_info_1; + int opcode; + + /* Both #GP cases have zero error_code */ + if (error_code) + goto reinject; + + /* Decode the instruction for usage later */ + if (x86_emulate_decoded_instruction(vcpu, 0, NULL, 0) != EMULATION_OK) + goto reinject; + + opcode = svm_instr_opcode(vcpu); + if (opcode) + return emulate_svm_instr(vcpu, opcode); + else + return kvm_emulate_instruction(vcpu, + EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE); + +reinject: + kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); + return 1; +} + void svm_set_gif(struct vcpu_svm *svm, bool value) { if (value) { -- 2.27.0