On 04/06/2019 14:53, Marc Zyngier wrote: > On 04/06/2019 14:16, Steven Rostedt wrote: >> On Tue, 4 Jun 2019 13:58:51 +0100 >> Julien Grall <julien.grall@xxxxxxx> wrote: >> >>> This is happening because vgic_v2_fold_lr_state() is expected >>> to be called with interrupt disabled. However, some of the path >>> (e.g eventfd) will take a spinlock. >>> >>> The spinlock is from the waitqueue, so using a raw_spin_lock cannot >>> even be considered. >>> >>> Do you have any input on how this could be solved? >> >> What's the reason that vgic_v2_fold_lr_state() expects interrupts to be >> disabled? > > That's to prevent the injection of an interrupt firing on the same CPU > while we're saving the corresponding vcpu interrupt context, among other > things (the whole guest exit path runs with interrupt disabled in order > to avoid this kind of thing). > > One possibility would be to accumulate the set of interrupt that require > resampling (which is bound to be small, the number of LRs at most) and > call kvm_notify_acked_irq on that set once interrupts are re-enabled. > > I'll have a look... Julien, Here's what I have in mind, compile-tested only. Please let me know how it fares on your setup... Thanks, M. diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index c36c86f1ec9a..9e587f5d90fa 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -307,6 +307,10 @@ struct vgic_cpu { unsigned int used_lrs; struct vgic_irq private_irqs[VGIC_NR_PRIVATE_IRQS]; + /* SPIs to be resampled by SW at EOI time */ + u16 spi_notify[VGIC_V2_MAX_LRS]; + int spi_notify_count; + raw_spinlock_t ap_list_lock; /* Protects the ap_list */ /* @@ -371,6 +375,7 @@ void kvm_vgic_put(struct kvm_vcpu *vcpu); bool kvm_vcpu_has_pending_irqs(struct kvm_vcpu *vcpu); void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); +void kvm_vgic_process_resample(struct kvm_vcpu *vcpu); void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid); void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1); diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 7eeebe5e9da2..a95c46b2d723 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -827,6 +827,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) guest_exit(); trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); + /* + * Now that interrupts are enabled, signal interrupts + * that require SW resampling. + */ + kvm_vgic_process_resample(vcpu); + /* Exit types that need handling before we can be preempted */ handle_exit_early(vcpu, run, ret); diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index d91a8938aa7c..9953e1742d65 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -79,8 +79,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) /* Notify fds when the guest EOI'ed a level-triggered SPI */ if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) - kvm_notify_acked_irq(vcpu->kvm, 0, - intid - VGIC_NR_PRIVATE_IRQS); + vgic_cpu->spi_notify[vgic_cpu->spi_notify_count++] = intid; irq = vgic_get_irq(vcpu->kvm, vcpu, intid); diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 9f87e58dbd4a..54b1e4ea5cfa 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -69,8 +69,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) /* Notify fds when the guest EOI'ed a level-triggered IRQ */ if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) - kvm_notify_acked_irq(vcpu->kvm, 0, - intid - VGIC_NR_PRIVATE_IRQS); + vgic_cpu->spi_notify[vgic_cpu->spi_notify_count++] = intid; irq = vgic_get_irq(vcpu->kvm, vcpu, intid); if (!irq) /* An LPI could have been unmapped. */ diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 191deccf60bf..baa6aa494e86 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -880,6 +880,8 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) { WARN_ON(vgic_v4_flush_hwstate(vcpu)); + vcpu->arch.vgic_cpu.spi_notify_count = 0; + /* * If there are no virtual interrupts active or pending for this * VCPU, then there is no work to do and we can bail out without @@ -908,6 +910,16 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) vgic_restore_state(vcpu); } +void kvm_vgic_process_resample(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + int i; + + for (i = 0; i < vgic_cpu->spi_notify_count; i++) + kvm_notify_acked_irq(vcpu->kvm, 0, + vgic_cpu->spi_notify[i] - VGIC_NR_PRIVATE_IRQS); +} + void kvm_vgic_load(struct kvm_vcpu *vcpu) { if (unlikely(!vgic_initialized(vcpu->kvm))) -- Jazz is not dead. It just smells funny...