Re: [PATCH v3 2/4] KVM: SVM: Add emulation support for #GP triggered by SVM instructions

Maxim Levitsky <mlevitsk@xxxxxxxxxx> · Tue, 26 Jan 2021 13:50:28 +0200

On Tue, 2021-01-26 at 03:18 -0500, Wei Huang wrote:
> From: Bandan Das <bsd@xxxxxxxxxx>
> 
> While running SVM related instructions (VMRUN/VMSAVE/VMLOAD), some AMD
> CPUs check EAX against reserved memory regions (e.g. SMM memory on host)
> before checking VMCB's instruction intercept. If EAX falls into such
> memory areas, #GP is triggered before VMEXIT. This causes problem under
> nested virtualization. To solve this problem, KVM needs to trap #GP and
> check the instructions triggering #GP. For VM execution instructions,
> KVM emulates these instructions.
> 
> Co-developed-by: Wei Huang <wei.huang2@xxxxxxx>
> Signed-off-by: Wei Huang <wei.huang2@xxxxxxx>
> Signed-off-by: Bandan Das <bsd@xxxxxxxxxx>
> ---
>  arch/x86/kvm/svm/svm.c | 109 ++++++++++++++++++++++++++++++++++-------
>  1 file changed, 91 insertions(+), 18 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 7ef171790d02..e5ca01e25e89 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -200,6 +200,8 @@ module_param(sev_es, int, 0444);
>  bool __read_mostly dump_invalid_vmcb;
>  module_param(dump_invalid_vmcb, bool, 0644);
>  
> +bool svm_gp_erratum_intercept = true;
I'll expect this to be a module parm, so that user
could override it, just like enable_vmware_backdoor

> +
>  static u8 rsm_ins_bytes[] = "\x0f\xaa";
>  
>  static void svm_complete_interrupts(struct vcpu_svm *svm);
> @@ -288,6 +290,9 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
>  		if (!(efer & EFER_SVME)) {
>  			svm_leave_nested(svm);
>  			svm_set_gif(svm, true);
> +			/* #GP intercept is still needed in vmware_backdoor */
> +			if (!enable_vmware_backdoor)
I would use if (svm_gp_erratum_intercept && !enable_vmware_backdoor) to document
this.

> +				clr_exception_intercept(svm, GP_VECTOR);
>  
>  			/*
>  			 * Free the nested guest state, unless we are in SMM.
> @@ -309,6 +314,10 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
>  
>  	svm->vmcb->save.efer = efer | EFER_SVME;
>  	vmcb_mark_dirty(svm->vmcb, VMCB_CR);
> +	/* Enable #GP interception for SVM instructions */
> +	if (svm_gp_erratum_intercept)
> +		set_exception_intercept(svm, GP_VECTOR);
> +
>  	return 0;
>  }
>  
> @@ -1957,24 +1966,6 @@ static int ac_interception(struct vcpu_svm *svm)
>  	return 1;
>  }
>  
> -static int gp_interception(struct vcpu_svm *svm)
> -{
> -	struct kvm_vcpu *vcpu = &svm->vcpu;
> -	u32 error_code = svm->vmcb->control.exit_info_1;
> -
> -	WARN_ON_ONCE(!enable_vmware_backdoor);	
> -
> -	/*
> -	 * VMware backdoor emulation on #GP interception only handles IN{S},
> -	 * OUT{S}, and RDPMC, none of which generate a non-zero error code.
> -	 */
> -	if (error_code) {
> -		kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
> -		return 1;
> -	}
> -	return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
> -}
> -
>  static bool is_erratum_383(void)
>  {
>  	int err, i;
> @@ -2173,6 +2164,88 @@ static int vmrun_interception(struct vcpu_svm *svm)
>  	return nested_svm_vmrun(svm);
>  }
>  
> +enum {
> +	NONE_SVM_INSTR,
> +	SVM_INSTR_VMRUN,
> +	SVM_INSTR_VMLOAD,
> +	SVM_INSTR_VMSAVE,
> +};
> +
> +/* Return NONE_SVM_INSTR if not SVM instrs, otherwise return decode result */
> +static int svm_instr_opcode(struct kvm_vcpu *vcpu)
> +{
> +	struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
> +
> +	if (ctxt->b != 0x1 || ctxt->opcode_len != 2)
> +		return NONE_SVM_INSTR;
> +
> +	switch (ctxt->modrm) {
> +	case 0xd8: /* VMRUN */
> +		return SVM_INSTR_VMRUN;
> +	case 0xda: /* VMLOAD */
> +		return SVM_INSTR_VMLOAD;
> +	case 0xdb: /* VMSAVE */
> +		return SVM_INSTR_VMSAVE;
> +	default:
> +		break;
> +	}
> +
> +	return NONE_SVM_INSTR;
> +}
> +
> +static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
> +{
> +	int (*const svm_instr_handlers[])(struct vcpu_svm *svm) = {
> +		[SVM_INSTR_VMRUN] = vmrun_interception,
> +		[SVM_INSTR_VMLOAD] = vmload_interception,
> +		[SVM_INSTR_VMSAVE] = vmsave_interception,
> +	};
> +	struct vcpu_svm *svm = to_svm(vcpu);
> +
> +	return svm_instr_handlers[opcode](svm);
> +}
> +
> +/*
> + * #GP handling code. Note that #GP can be triggered under the following two
> + * cases:
> + *   1) SVM VM-related instructions (VMRUN/VMSAVE/VMLOAD) that trigger #GP on
> + *      some AMD CPUs when EAX of these instructions are in the reserved memory
> + *      regions (e.g. SMM memory on host).
> + *   2) VMware backdoor
> + */
> +static int gp_interception(struct vcpu_svm *svm)
> +{
> +	struct kvm_vcpu *vcpu = &svm->vcpu;
> +	u32 error_code = svm->vmcb->control.exit_info_1;
> +	int opcode;
> +
> +	/* Both #GP cases have zero error_code */
> +	if (error_code)
> +		goto reinject;
> +
> +	/* Decode the instruction for usage later */
> +	if (x86_decode_emulated_instruction(vcpu, 0, NULL, 0) != EMULATION_OK)
> +		goto reinject;
> +
> +	opcode = svm_instr_opcode(vcpu);
> +
> +	if (opcode == NONE_SVM_INSTR) {
> +		WARN_ON_ONCE(!enable_vmware_backdoor);
> +
> +		/*
> +		 * VMware backdoor emulation on #GP interception only handles
> +		 * IN{S}, OUT{S}, and RDPMC.
> +		 */
> +		return kvm_emulate_instruction(vcpu,
> +				EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE);
> +	} else

I would check svm_gp_erratum_intercept here, not do any emulation
if not set, and print a warning.

> +		return emulate_svm_instr(vcpu, opcode);
> +
> +reinject:
> +	kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
> +	return 1;
> +}
> +
>  void svm_set_gif(struct vcpu_svm *svm, bool value)
>  {
>  	if (value) {

Best regards,
	Maxim Levitsky