Re: [PATCH 12/13] KVM: x86: Replace late check_nested_events() hack with more precise fix

Paolo Bonzini <pbonzini@xxxxxxxxxx> · Thu, 23 Apr 2020 13:00:08 +0200

On 23/04/20 04:25, Sean Christopherson wrote:
> Add a separate hook for checking if interrupt injection is blocked and
> use the hook to handle the case where an interrupt arrives between
> check_nested_events() and the injection logic.  Drop the retry of
> check_nested_events() that hack-a-fixed the same condition.
> 
> Blocking injection is also a bit of a hack, e.g. KVM should do exiting
> and non-exiting interrupt processing in a single pass, but it's a more
> precise hack.  The old comment is also misleading, e.g. KVM_REQ_EVENT is
> purely an optimization, setting it on every run loop (which KVM doesn't
> do) should not affect functionality, only performance.
> 
> Signed-off-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
> ---
>  arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/svm/svm.c          |  1 +
>  arch/x86/kvm/vmx/vmx.c          | 13 +++++++++++++
>  arch/x86/kvm/x86.c              | 22 ++++------------------
>  4 files changed, 19 insertions(+), 18 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 787636acd648..16fdeddb4a65 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1140,6 +1140,7 @@ struct kvm_x86_ops {
>  	void (*queue_exception)(struct kvm_vcpu *vcpu);
>  	void (*cancel_injection)(struct kvm_vcpu *vcpu);
>  	bool (*interrupt_allowed)(struct kvm_vcpu *vcpu);
> +	bool (*interrupt_injection_allowed)(struct kvm_vcpu *vcpu);
>  	bool (*nmi_allowed)(struct kvm_vcpu *vcpu);
>  	bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
>  	void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index f21f734861dd..6d3ccbfc9e6a 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -3993,6 +3993,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
>  	.queue_exception = svm_queue_exception,
>  	.cancel_injection = svm_cancel_injection,
>  	.interrupt_allowed = svm_interrupt_allowed,
> +	.interrupt_injection_allowed = svm_interrupt_allowed,
>  	.nmi_allowed = svm_nmi_allowed,
>  	.get_nmi_mask = svm_get_nmi_mask,
>  	.set_nmi_mask = svm_set_nmi_mask,
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 2f8cacb3aa9b..68b3748b5383 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -4550,6 +4550,18 @@ static bool vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
>  	return !vmx_interrupt_blocked(vcpu);
>  }
>  
> +static bool vmx_interrupt_injection_allowed(struct kvm_vcpu *vcpu)
> +{
> +	/*
> +	 * An IRQ must not be injected into L2 if it's supposed to VM-Exit,
> +	 * e.g. if the IRQ arrived asynchronously after checking nested events.
> +	 */
> +	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
> +		return false;
> +
> +	return vmx_interrupt_allowed(vcpu);
> +}
> +
>  static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
>  {
>  	int ret;
> @@ -7823,6 +7835,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
>  	.queue_exception = vmx_queue_exception,
>  	.cancel_injection = vmx_cancel_injection,
>  	.interrupt_allowed = vmx_interrupt_allowed,
> +	.interrupt_injection_allowed = vmx_interrupt_injection_allowed,
>  	.nmi_allowed = vmx_nmi_allowed,
>  	.get_nmi_mask = vmx_get_nmi_mask,
>  	.set_nmi_mask = vmx_set_nmi_mask,
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 7c49a7dc601f..d9d6028a77e0 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7755,24 +7755,10 @@ static int inject_pending_event(struct kvm_vcpu *vcpu)
>  		--vcpu->arch.nmi_pending;
>  		vcpu->arch.nmi_injected = true;
>  		kvm_x86_ops.set_nmi(vcpu);
> -	} else if (kvm_cpu_has_injectable_intr(vcpu)) {
> -		/*
> -		 * Because interrupts can be injected asynchronously, we are
> -		 * calling check_nested_events again here to avoid a race condition.
> -		 * See https://lkml.org/lkml/2014/7/2/60 for discussion about this
> -		 * proposal and current concerns.  Perhaps we should be setting
> -		 * KVM_REQ_EVENT only on certain events and not unconditionally?
> -		 */
> -		if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events) {
> -			r = kvm_x86_ops.check_nested_events(vcpu);
> -			if (r != 0)
> -				return r;
> -		}
> -		if (kvm_x86_ops.interrupt_allowed(vcpu)) {
> -			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
> -					    false);
> -			kvm_x86_ops.set_irq(vcpu);
> -		}
> +	} else if (kvm_cpu_has_injectable_intr(vcpu) &&
> +		   kvm_x86_ops.interrupt_injection_allowed(vcpu)) {
> +		kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false);
> +		kvm_x86_ops.set_irq(vcpu);

Hmm I'm interested in how this can help with AMD introducing another
instance of the late random check_nested_events.  I'll play with it.

Paolo