From: Paul Durrant <pdurrant@xxxxxxxxxx> As described in [1] compiling with CONFIG_PROVE_RAW_LOCK_NESTING shows that kvm_xen_set_evtchn_fast() is blocking on pfncache locks in IRQ context. There is only actually blocking with PREEMPT_RT because the locks will turned into mutexes. There is no 'raw' version of rwlock_t that can be used to avoid that, so use read_trylock() and treat failure to lock the same as an invalid cache. [1] https://lore.kernel.org/lkml/99771ef3a4966a01fefd3adbb2ba9c3a75f97cf2.camel@xxxxxxxxxxxxx/T/#mbd06e5a04534ce9c0ee94bd8f1e8d942b2d45bd6 Fixes: 77c9b9dea4fb ("KVM: x86/xen: Use fast path for Xen timer delivery") Signed-off-by: Paul Durrant <pdurrant@xxxxxxxxxx> Reviewed-by: David Woodhouse <dwmw@xxxxxxxxxxxx> --- Cc: Sean Christopherson <seanjc@xxxxxxxxxx> Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Ingo Molnar <mingo@xxxxxxxxxx> Cc: Borislav Petkov <bp@xxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Steven Rostedt <rostedt@xxxxxxxxxxx> Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx> Cc: "H. Peter Anvin" <hpa@xxxxxxxxx> Cc: David Woodhouse <dwmw2@xxxxxxxxxxxxx> Cc: x86@xxxxxxxxxx v2: • Use read_trylock only in interrupt context, to avoid concerns about unfairness in the slow path. --- arch/x86/kvm/xen.c | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index c16b6d394d55..d8b5326ecebc 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -1736,9 +1736,23 @@ static int set_shinfo_evtchn_pending(struct kvm_vcpu *vcpu, u32 port) unsigned long flags; int rc = -EWOULDBLOCK; - read_lock_irqsave(&gpc->lock, flags); + local_irq_save(flags); + if (!read_trylock(&gpc->lock)) { + /* + * When PREEMPT_RT turns locks into mutexes, rwlocks are + * turned into mutexes and most interrupts are threaded. + * But timer events may be delivered in hardirq mode due + * to using HRTIMER_MODE_ABS_HARD. So bail to the slow + * path if the trylock fails in interrupt context. + */ + if (in_interrupt()) + goto out; + + read_lock(&gpc->lock); + } + if (!kvm_gpc_check(gpc, PAGE_SIZE)) - goto out; + goto out_unlock; if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) { struct shared_info *shinfo = gpc->khva; @@ -1761,8 +1775,10 @@ static int set_shinfo_evtchn_pending(struct kvm_vcpu *vcpu, u32 port) rc = 1; /* It is newly raised */ } + out_unlock: + read_unlock(&gpc->lock); out: - read_unlock_irqrestore(&gpc->lock, flags); + local_irq_restore(flags); return rc; } @@ -1772,21 +1788,23 @@ static bool set_vcpu_info_evtchn_pending(struct kvm_vcpu *vcpu, u32 port) struct gfn_to_pfn_cache *gpc = &vcpu->arch.xen.vcpu_info_cache; unsigned long flags; bool kick_vcpu = false; + bool locked; - read_lock_irqsave(&gpc->lock, flags); + local_irq_save(flags); + locked = read_trylock(&gpc->lock); /* * Try to deliver the event directly to the vcpu_info. If successful and * the guest is using upcall_vector delivery, send the MSI. - * If the pfncache is invalid, set the shadow. In this case, or if the - * guest is using another form of event delivery, the vCPU must be - * kicked to complete the delivery. + * If the pfncache lock is contended or the cache is invalid, set the + * shadow. In this case, or if the guest is using another form of event + * delivery, the vCPU must be kicked to complete the delivery. */ if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) { struct vcpu_info *vcpu_info = gpc->khva; int port_word_bit = port / 64; - if (!kvm_gpc_check(gpc, sizeof(*vcpu_info))) { + if ((!locked || !kvm_gpc_check(gpc, sizeof(*vcpu_info)))) { if (!test_and_set_bit(port_word_bit, &vcpu->arch.xen.evtchn_pending_sel)) kick_vcpu = true; goto out; @@ -1800,7 +1818,7 @@ static bool set_vcpu_info_evtchn_pending(struct kvm_vcpu *vcpu, u32 port) struct compat_vcpu_info *vcpu_info = gpc->khva; int port_word_bit = port / 32; - if (!kvm_gpc_check(gpc, sizeof(*vcpu_info))) { + if ((!locked || !kvm_gpc_check(gpc, sizeof(*vcpu_info)))) { if (!test_and_set_bit(port_word_bit, &vcpu->arch.xen.evtchn_pending_sel)) kick_vcpu = true; goto out; @@ -1819,7 +1837,10 @@ static bool set_vcpu_info_evtchn_pending(struct kvm_vcpu *vcpu, u32 port) } out: - read_unlock_irqrestore(&gpc->lock, flags); + if (locked) + read_unlock(&gpc->lock); + + local_irq_restore(flags); return kick_vcpu; } -- 2.43.0