From: Bandan Das <bsd@xxxxxxxxxx> While running SVM related instructions (VMRUN/VMSAVE/VMLOAD), some AMD CPUs check EAX against reserved memory regions (e.g. SMM memory on host) before checking VMCB's instruction intercept. If EAX falls into such memory areas, #GP is triggered before VMEXIT. This causes problem under nested virtualization. To solve this problem, KVM needs to trap #GP and check the instructions triggering #GP. For VM execution instructions, KVM emulates these instructions. Co-developed-by: Wei Huang <wei.huang2@xxxxxxx> Signed-off-by: Wei Huang <wei.huang2@xxxxxxx> Signed-off-by: Bandan Das <bsd@xxxxxxxxxx> --- arch/x86/kvm/svm/svm.c | 109 ++++++++++++++++++++++++++++++++++------- 1 file changed, 91 insertions(+), 18 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 7ef171790d02..e5ca01e25e89 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -200,6 +200,8 @@ module_param(sev_es, int, 0444); bool __read_mostly dump_invalid_vmcb; module_param(dump_invalid_vmcb, bool, 0644); +bool svm_gp_erratum_intercept = true; + static u8 rsm_ins_bytes[] = "\x0f\xaa"; static void svm_complete_interrupts(struct vcpu_svm *svm); @@ -288,6 +290,9 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) if (!(efer & EFER_SVME)) { svm_leave_nested(svm); svm_set_gif(svm, true); + /* #GP intercept is still needed in vmware_backdoor */ + if (!enable_vmware_backdoor) + clr_exception_intercept(svm, GP_VECTOR); /* * Free the nested guest state, unless we are in SMM. @@ -309,6 +314,10 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) svm->vmcb->save.efer = efer | EFER_SVME; vmcb_mark_dirty(svm->vmcb, VMCB_CR); + /* Enable #GP interception for SVM instructions */ + if (svm_gp_erratum_intercept) + set_exception_intercept(svm, GP_VECTOR); + return 0; } @@ -1957,24 +1966,6 @@ static int ac_interception(struct vcpu_svm *svm) return 1; } -static int gp_interception(struct vcpu_svm *svm) -{ - struct kvm_vcpu *vcpu = &svm->vcpu; - u32 error_code = svm->vmcb->control.exit_info_1; - - WARN_ON_ONCE(!enable_vmware_backdoor); - - /* - * VMware backdoor emulation on #GP interception only handles IN{S}, - * OUT{S}, and RDPMC, none of which generate a non-zero error code. - */ - if (error_code) { - kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); - return 1; - } - return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP); -} - static bool is_erratum_383(void) { int err, i; @@ -2173,6 +2164,88 @@ static int vmrun_interception(struct vcpu_svm *svm) return nested_svm_vmrun(svm); } +enum { + NONE_SVM_INSTR, + SVM_INSTR_VMRUN, + SVM_INSTR_VMLOAD, + SVM_INSTR_VMSAVE, +}; + +/* Return NONE_SVM_INSTR if not SVM instrs, otherwise return decode result */ +static int svm_instr_opcode(struct kvm_vcpu *vcpu) +{ + struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; + + if (ctxt->b != 0x1 || ctxt->opcode_len != 2) + return NONE_SVM_INSTR; + + switch (ctxt->modrm) { + case 0xd8: /* VMRUN */ + return SVM_INSTR_VMRUN; + case 0xda: /* VMLOAD */ + return SVM_INSTR_VMLOAD; + case 0xdb: /* VMSAVE */ + return SVM_INSTR_VMSAVE; + default: + break; + } + + return NONE_SVM_INSTR; +} + +static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode) +{ + int (*const svm_instr_handlers[])(struct vcpu_svm *svm) = { + [SVM_INSTR_VMRUN] = vmrun_interception, + [SVM_INSTR_VMLOAD] = vmload_interception, + [SVM_INSTR_VMSAVE] = vmsave_interception, + }; + struct vcpu_svm *svm = to_svm(vcpu); + + return svm_instr_handlers[opcode](svm); +} + +/* + * #GP handling code. Note that #GP can be triggered under the following two + * cases: + * 1) SVM VM-related instructions (VMRUN/VMSAVE/VMLOAD) that trigger #GP on + * some AMD CPUs when EAX of these instructions are in the reserved memory + * regions (e.g. SMM memory on host). + * 2) VMware backdoor + */ +static int gp_interception(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + u32 error_code = svm->vmcb->control.exit_info_1; + int opcode; + + /* Both #GP cases have zero error_code */ + if (error_code) + goto reinject; + + /* Decode the instruction for usage later */ + if (x86_decode_emulated_instruction(vcpu, 0, NULL, 0) != EMULATION_OK) + goto reinject; + + opcode = svm_instr_opcode(vcpu); + + if (opcode == NONE_SVM_INSTR) { + WARN_ON_ONCE(!enable_vmware_backdoor); + + /* + * VMware backdoor emulation on #GP interception only handles + * IN{S}, OUT{S}, and RDPMC. + */ + return kvm_emulate_instruction(vcpu, + EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE); + } else + return emulate_svm_instr(vcpu, opcode); + +reinject: + kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); + return 1; +} + void svm_set_gif(struct vcpu_svm *svm, bool value) { if (value) { -- 2.27.0