On Wed, Nov 08, 2023, David Woodhouse wrote: > On Tue, 2023-10-31 at 16:58 -0700, Sean Christopherson wrote: > > > > > diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c > > > index aafc794940e4..e645066217bb 100644 > > > --- a/arch/x86/kvm/xen.c > > > +++ b/arch/x86/kvm/xen.c > > > @@ -1606,9 +1606,6 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm) > > > WRITE_ONCE(xe->vcpu_idx, vcpu->vcpu_idx); > > > } > > > > > > - if (!vcpu->arch.xen.vcpu_info_cache.active) > > > - return -EINVAL; > > > - > > > > Hmm, maybe move this check after the "hard" error checks and explicitly do: > > > > return -EWOULDBLOCK > > > > That way it's much more obvious that this patch is safe. Alternatively, briefly > > explain what happens if the cache is invalid in the changelog. > > > Hm, I thought we dropped that? It doesn't want to return -EWOULDBLOCK. > That would cause a "fast" irqfd delivery to defer to the slow > (workqueue) path, but then the same thing would happen on that path > *too*. > > I actually think this check needs to be dropped completely. The > evtchn_pending_sel bit will get set in the *shadow* copy in vcpu- > >arch.xen.evtchn_pending_sel, and will subsequently be set in the real > vcpu_info when that is configured again. We do already handle that case > correctly, I believe: > > if (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) { > /* > * Could not access the vcpu_info. Set the bit in-kernel > * and prod the vCPU to deliver it for itself. > */ > if (!test_and_set_bit(port_word_bit, &vcpu->arch.xen.evtchn_pending_sel)) > kick_vcpu = true; > goto out_rcu; > } > > If we leave this check in place as it is, I think we're going to lose > incoming events (and IPIs) directed at a vCPU while it has its > vcpu_info temporarily removed. Oh, dagnabbit, there are two different gfn_to_pfn_cache objects. I assumed "gpc" was the same thing as vcpu_info_cache.active, i.e. thought the explicit active check here was just to pre-check the validity before acquiring the lock. if (!vcpu->arch.xen.vcpu_info_cache.active) return -EINVAL; rc = -EWOULDBLOCK; idx = srcu_read_lock(&kvm->srcu); read_lock_irqsave(&gpc->lock, flags); if (!kvm_gpc_check(gpc, PAGE_SIZE)) <== I thought this was the same check goto out_rcu; That's why my comment was nonsensical, and also why I was super confused by your response for a few minutes :-) Good gravy, this even conditionally switches gpc and makes the out path drop different locks. That's just mean. Any objection to splitting out the vcpu_info chunk to a separate function? That'd eliminate the subtle gpc+lock switch, and provide a convenient and noticeable location to explain why it's ok for setting the vcpu_info to fail. E.g. --- arch/x86/kvm/xen.c | 102 ++++++++++++++++++++++++++------------------- 1 file changed, 58 insertions(+), 44 deletions(-) diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index d65e89e062e4..cd2021ba0ae3 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -1621,6 +1621,57 @@ static void kvm_xen_check_poller(struct kvm_vcpu *vcpu, int port) } } +static int kvm_xen_needs_a_name(struct kvm_vcpu *vcpu, int port_word_bit) +{ + struct gfn_to_pfn_cache *gpc = &vcpu->arch.xen.vcpu_info_cache; + bool kick_vcpu = false; + unsigned long flags; + int r = 0; + + read_lock_irqsave(&gpc->lock, flags); + if (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) { + /* + * Could not access the vcpu_info. Set the bit in-kernel and + * prod the vCPU to deliver it for itself. + */ + if (!test_and_set_bit(port_word_bit, &vcpu->arch.xen.evtchn_pending_sel)) + kick_vcpu = true; + + r = -EWOULDBLOCK; + goto out; + } + + if (IS_ENABLED(CONFIG_64BIT) && vcpu->kvm->arch.xen.long_mode) { + struct vcpu_info *vcpu_info = gpc->khva; + if (!test_and_set_bit(port_word_bit, &vcpu_info->evtchn_pending_sel)) { + WRITE_ONCE(vcpu_info->evtchn_upcall_pending, 1); + kick_vcpu = true; + } + } else { + struct compat_vcpu_info *vcpu_info = gpc->khva; + if (!test_and_set_bit(port_word_bit, + (unsigned long *)&vcpu_info->evtchn_pending_sel)) { + WRITE_ONCE(vcpu_info->evtchn_upcall_pending, 1); + kick_vcpu = true; + } + } + + /* For the per-vCPU lapic vector, deliver it as MSI. */ + if (kick_vcpu && vcpu->arch.xen.upcall_vector) { + kvm_xen_inject_vcpu_vector(vcpu); + kick_vcpu = false; + } + +out: + read_unlock_irqrestore(&gpc->lock, flags); + + if (kick_vcpu) { + kvm_make_request(KVM_REQ_UNBLOCK, vcpu); + kvm_vcpu_kick(vcpu); + } + + return r; +} /* * The return value from this function is propagated to kvm_set_irq() API, * so it returns: @@ -1638,7 +1689,6 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm) unsigned long *pending_bits, *mask_bits; unsigned long flags; int port_word_bit; - bool kick_vcpu = false; int vcpu_idx, idx, rc; vcpu_idx = READ_ONCE(xe->vcpu_idx); @@ -1660,7 +1710,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm) read_lock_irqsave(&gpc->lock, flags); if (!kvm_gpc_check(gpc, PAGE_SIZE)) - goto out_rcu; + goto out_unlock; if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) { struct shared_info *shinfo = gpc->khva; @@ -1688,52 +1738,16 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm) kvm_xen_check_poller(vcpu, xe->port); } else { rc = 1; /* Delivered to the bitmap in shared_info. */ - /* Now switch to the vCPU's vcpu_info to set the index and pending_sel */ - read_unlock_irqrestore(&gpc->lock, flags); - gpc = &vcpu->arch.xen.vcpu_info_cache; - - read_lock_irqsave(&gpc->lock, flags); - if (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) { - /* - * Could not access the vcpu_info. Set the bit in-kernel - * and prod the vCPU to deliver it for itself. - */ - if (!test_and_set_bit(port_word_bit, &vcpu->arch.xen.evtchn_pending_sel)) - kick_vcpu = true; - goto out_rcu; - } - - if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) { - struct vcpu_info *vcpu_info = gpc->khva; - if (!test_and_set_bit(port_word_bit, &vcpu_info->evtchn_pending_sel)) { - WRITE_ONCE(vcpu_info->evtchn_upcall_pending, 1); - kick_vcpu = true; - } - } else { - struct compat_vcpu_info *vcpu_info = gpc->khva; - if (!test_and_set_bit(port_word_bit, - (unsigned long *)&vcpu_info->evtchn_pending_sel)) { - WRITE_ONCE(vcpu_info->evtchn_upcall_pending, 1); - kick_vcpu = true; - } - } - - /* For the per-vCPU lapic vector, deliver it as MSI. */ - if (kick_vcpu && vcpu->arch.xen.upcall_vector) { - kvm_xen_inject_vcpu_vector(vcpu); - kick_vcpu = false; - } } - out_rcu: + out_unlock: read_unlock_irqrestore(&gpc->lock, flags); + + /* Paul to add comment explaining why it's ok kvm_xen_needs_a_name() fails */ + if (rc == 1) + rc = kvm_xen_needs_a_name(vcpu, port_word_bit); + srcu_read_unlock(&kvm->srcu, idx); - - if (kick_vcpu) { - kvm_make_request(KVM_REQ_UNBLOCK, vcpu); - kvm_vcpu_kick(vcpu); - } - return rc; } base-commit: 0f34262cf6fab8163456c7740c7ee1888a8af93c --