Re: [PATCH v7 11/11] KVM: xen: allow vcpu_info content to be 'safely' copied

Sean Christopherson <seanjc@xxxxxxxxxx> · Wed, 8 Nov 2023 12:55:24 -0800

On Wed, Nov 08, 2023, David Woodhouse wrote:
> On Tue, 2023-10-31 at 16:58 -0700, Sean Christopherson wrote:
> > 
> > > diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
> > > index aafc794940e4..e645066217bb 100644
> > > --- a/arch/x86/kvm/xen.c
> > > +++ b/arch/x86/kvm/xen.c
> > > @@ -1606,9 +1606,6 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm)
> > >                 WRITE_ONCE(xe->vcpu_idx, vcpu->vcpu_idx);
> > >         }
> > >   
> > > -       if (!vcpu->arch.xen.vcpu_info_cache.active)
> > > -               return -EINVAL;
> > > -
> > 
> > Hmm, maybe move this check after the "hard" error checks and explicitly do:
> > 
> >                 return -EWOULDBLOCK
> > 
> > That way it's much more obvious that this patch is safe.  Alternatively, briefly
> > explain what happens if the cache is invalid in the changelog.
> 
> 
> Hm, I thought we dropped that? It doesn't want to return -EWOULDBLOCK.
> That would cause a "fast" irqfd delivery to defer to the slow
> (workqueue) path, but then the same thing would happen on that path
> *too*.
> 
> I actually think this check needs to be dropped completely. The
> evtchn_pending_sel bit will get set in the *shadow* copy in vcpu-
> >arch.xen.evtchn_pending_sel, and will subsequently be set in the real
> vcpu_info when that is configured again. We do already handle that case
> correctly, I believe:
> 
> 		if (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) {
> 			/*
> 			 * Could not access the vcpu_info. Set the bit in-kernel
> 			 * and prod the vCPU to deliver it for itself.
> 			 */
> 			if (!test_and_set_bit(port_word_bit, &vcpu->arch.xen.evtchn_pending_sel))
> 				kick_vcpu = true;
> 			goto out_rcu;
> 		}
> 
> If we leave this check in place as it is, I think we're going to lose
> incoming events (and IPIs) directed at a vCPU while it has its
> vcpu_info temporarily removed.

Oh, dagnabbit, there are two different gfn_to_pfn_cache objects.  I assumed "gpc"
was the same thing as vcpu_info_cache.active, i.e. thought the explicit active
check here was just to pre-check the validity before acquiring the lock.

	if (!vcpu->arch.xen.vcpu_info_cache.active)
		return -EINVAL;

	rc = -EWOULDBLOCK;

	idx = srcu_read_lock(&kvm->srcu);

	read_lock_irqsave(&gpc->lock, flags);
	if (!kvm_gpc_check(gpc, PAGE_SIZE))  <== I thought this was the same check
		goto out_rcu;

That's why my comment was nonsensical, and also why I was super confused by your
response for a few minutes :-)

Good gravy, this even conditionally switches gpc and makes the out path drop
different locks.  That's just mean.

Any objection to splitting out the vcpu_info chunk to a separate function?  That'd
eliminate the subtle gpc+lock switch, and provide a convenient and noticeable
location to explain why it's ok for setting the vcpu_info to fail.

E.g.

---
 arch/x86/kvm/xen.c | 102 ++++++++++++++++++++++++++-------------------
 1 file changed, 58 insertions(+), 44 deletions(-)

diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index d65e89e062e4..cd2021ba0ae3 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -1621,6 +1621,57 @@ static void kvm_xen_check_poller(struct kvm_vcpu *vcpu, int port)
 	}
 }
 
+static int kvm_xen_needs_a_name(struct kvm_vcpu *vcpu, int port_word_bit)
+{
+	struct gfn_to_pfn_cache *gpc = &vcpu->arch.xen.vcpu_info_cache;
+	bool kick_vcpu = false;
+	unsigned long flags;
+	int r = 0;
+
+	read_lock_irqsave(&gpc->lock, flags);
+	if (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) {
+		/*
+		 * Could not access the vcpu_info. Set the bit in-kernel and
+		 * prod the vCPU to deliver it for itself.
+		 */
+		if (!test_and_set_bit(port_word_bit, &vcpu->arch.xen.evtchn_pending_sel))
+			kick_vcpu = true;
+
+		r = -EWOULDBLOCK;
+		goto out;
+	}
+
+	if (IS_ENABLED(CONFIG_64BIT) && vcpu->kvm->arch.xen.long_mode) {
+		struct vcpu_info *vcpu_info = gpc->khva;
+		if (!test_and_set_bit(port_word_bit, &vcpu_info->evtchn_pending_sel)) {
+			WRITE_ONCE(vcpu_info->evtchn_upcall_pending, 1);
+			kick_vcpu = true;
+		}
+	} else {
+		struct compat_vcpu_info *vcpu_info = gpc->khva;
+		if (!test_and_set_bit(port_word_bit,
+					(unsigned long *)&vcpu_info->evtchn_pending_sel)) {
+			WRITE_ONCE(vcpu_info->evtchn_upcall_pending, 1);
+			kick_vcpu = true;
+		}
+	}
+
+	/* For the per-vCPU lapic vector, deliver it as MSI. */
+	if (kick_vcpu && vcpu->arch.xen.upcall_vector) {
+		kvm_xen_inject_vcpu_vector(vcpu);
+		kick_vcpu = false;
+	}
+
+out:
+	read_unlock_irqrestore(&gpc->lock, flags);
+
+	if (kick_vcpu) {
+		kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
+		kvm_vcpu_kick(vcpu);
+	}
+
+	return r;
+}
 /*
  * The return value from this function is propagated to kvm_set_irq() API,
  * so it returns:
@@ -1638,7 +1689,6 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm)
 	unsigned long *pending_bits, *mask_bits;
 	unsigned long flags;
 	int port_word_bit;
-	bool kick_vcpu = false;
 	int vcpu_idx, idx, rc;
 
 	vcpu_idx = READ_ONCE(xe->vcpu_idx);
@@ -1660,7 +1710,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm)
 
 	read_lock_irqsave(&gpc->lock, flags);
 	if (!kvm_gpc_check(gpc, PAGE_SIZE))
-		goto out_rcu;
+		goto out_unlock;
 
 	if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) {
 		struct shared_info *shinfo = gpc->khva;
@@ -1688,52 +1738,16 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm)
 		kvm_xen_check_poller(vcpu, xe->port);
 	} else {
 		rc = 1; /* Delivered to the bitmap in shared_info. */
-		/* Now switch to the vCPU's vcpu_info to set the index and pending_sel */
-		read_unlock_irqrestore(&gpc->lock, flags);
-		gpc = &vcpu->arch.xen.vcpu_info_cache;
-
-		read_lock_irqsave(&gpc->lock, flags);
-		if (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) {
-			/*
-			 * Could not access the vcpu_info. Set the bit in-kernel
-			 * and prod the vCPU to deliver it for itself.
-			 */
-			if (!test_and_set_bit(port_word_bit, &vcpu->arch.xen.evtchn_pending_sel))
-				kick_vcpu = true;
-			goto out_rcu;
-		}
-
-		if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) {
-			struct vcpu_info *vcpu_info = gpc->khva;
-			if (!test_and_set_bit(port_word_bit, &vcpu_info->evtchn_pending_sel)) {
-				WRITE_ONCE(vcpu_info->evtchn_upcall_pending, 1);
-				kick_vcpu = true;
-			}
-		} else {
-			struct compat_vcpu_info *vcpu_info = gpc->khva;
-			if (!test_and_set_bit(port_word_bit,
-					      (unsigned long *)&vcpu_info->evtchn_pending_sel)) {
-				WRITE_ONCE(vcpu_info->evtchn_upcall_pending, 1);
-				kick_vcpu = true;
-			}
-		}
-
-		/* For the per-vCPU lapic vector, deliver it as MSI. */
-		if (kick_vcpu && vcpu->arch.xen.upcall_vector) {
-			kvm_xen_inject_vcpu_vector(vcpu);
-			kick_vcpu = false;
-		}
 	}
 
- out_rcu:
+ out_unlock:
 	read_unlock_irqrestore(&gpc->lock, flags);
+
+	/* Paul to add comment explaining why it's ok kvm_xen_needs_a_name() fails */
+	if (rc == 1)
+		rc = kvm_xen_needs_a_name(vcpu, port_word_bit);
+
 	srcu_read_unlock(&kvm->srcu, idx);
-
-	if (kick_vcpu) {
-		kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
-		kvm_vcpu_kick(vcpu);
-	}
-
 	return rc;
 }
 

base-commit: 0f34262cf6fab8163456c7740c7ee1888a8af93c
--