> On Aug 12, 2019, at 12:07 AM, Paul Mackerras <paulus@xxxxxxxxxx> wrote: > > --- > arch/powerpc/include/asm/xive.h | 8 ++++ > arch/powerpc/kvm/book3s_xive.c | 31 ++++++++++++++ > arch/powerpc/sysdev/xive/common.c | 87 ++++++++++++++++++++++++++++----------- > 3 files changed, 103 insertions(+), 23 deletions(-) > > diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h > index e4016985764e..efb0e597b272 100644 > --- a/arch/powerpc/include/asm/xive.h > +++ b/arch/powerpc/include/asm/xive.h > @@ -46,7 +46,15 @@ struct xive_irq_data { > > /* Setup/used by frontend */ > int target; > + /* > + * saved_p means that there is a queue entry for this interrupt > + * in some CPU's queue (not including guest vcpu queues), even > + * if P is not set in the source ESB. > + * stale_p means that there is no queue entry for this interrupt > + * in some CPU's queue, even if P is set in the source ESB. > + */ > bool saved_p; > + bool stale_p; > }; > #define XIVE_IRQ_FLAG_STORE_EOI 0x01 > #define XIVE_IRQ_FLAG_LSI 0x02 > diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c > index 09f838aa3138..74eea009c095 100644 > --- a/arch/powerpc/kvm/book3s_xive.c > +++ b/arch/powerpc/kvm/book3s_xive.c > @@ -160,6 +160,9 @@ static irqreturn_t xive_esc_irq(int irq, void *data) > */ > vcpu->arch.xive_esc_on = false; > > + /* This orders xive_esc_on = false vs. subsequent stale_p = true */ > + smp_wmb(); /* goes with smp_mb() in cleanup_single_escalation */ > + > return IRQ_HANDLED; > } > > @@ -1113,6 +1116,31 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) > vcpu->arch.xive_esc_raddr = 0; > } > > +/* > + * In single escalation mode, the escalation interrupt is marked so > + * that EOI doesn't re-enable it, but just sets the stale_p flag to > + * indicate that the P bit has already been dealt with. However, the > + * assembly code that enters the guest sets PQ to 00 without clearing > + * stale_p (because it has no easy way to address it). Hence we have > + * to adjust stale_p before shutting down the interrupt. > + */ > +static void cleanup_single_escalation(struct kvm_vcpu *vcpu, > + struct kvmppc_xive_vcpu *xc, int irq) > +{ > + struct irq_data *d = irq_get_irq_data(irq); > + struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); > + > + /* > + * This slightly odd sequence gives the right result > + * (i.e. stale_p set if xive_esc_on is false) even if > + * we race with xive_esc_irq() and xive_irq_eoi(). > + */ Hi Paul, I don’t quite understand the logic here. Are you saying the code sequence is vcpu->arch.xive_esc_on = false; (xive_esc_irq) then xd->stale_p = true; (cleanup_single_escaltion) > + xd->stale_p = false; > + smp_mb(); /* paired with smb_wmb in xive_esc_irq */ > + if (!vcpu->arch.xive_esc_on) > + xd->stale_p = true; > +} > + > void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) > { > struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; > @@ -1137,6 +1165,9 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) > /* Free escalations */ > for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { > if (xc->esc_virq[i]) { > + if (xc->xive->single_escalation) > + cleanup_single_escalation(vcpu, xc, > + xc->esc_virq[i]); > free_irq(xc->esc_virq[i], vcpu); > irq_dispose_mapping(xc->esc_virq[i]); > kfree(xc->esc_virq_names[i]); > diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c > index 1cdb39575eae..be86fce1a84e 100644 > --- a/arch/powerpc/sysdev/xive/common.c > +++ b/arch/powerpc/sysdev/xive/common.c > @@ -135,7 +135,7 @@ static u32 xive_read_eq(struct xive_q *q, bool just_peek) > static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek) > { > u32 irq = 0; > - u8 prio; > + u8 prio = 0; > > /* Find highest pending priority */ > while (xc->pending_prio != 0) { > @@ -148,8 +148,19 @@ static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek) > irq = xive_read_eq(&xc->queue[prio], just_peek); > > /* Found something ? That's it */ > - if (irq) > - break; > + if (irq) { > + if (just_peek || irq_to_desc(irq)) > + break; > + /* > + * We should never get here; if we do then we must > + * have failed to synchronize the interrupt properly > + * when shutting it down. > + */ > + pr_crit("xive: got interrupt %d without descriptor, dropping\n", > + irq); > + WARN_ON(1); > + continue; > + } > > /* Clear pending bits */ > xc->pending_prio &= ~(1 << prio); > @@ -307,6 +318,7 @@ static void xive_do_queue_eoi(struct xive_cpu *xc) > */ > static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd) > { > + xd->stale_p = false; > /* If the XIVE supports the new "store EOI facility, use it */ > if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI) > xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0); > @@ -350,7 +362,7 @@ static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd) > } > } > > -/* irq_chip eoi callback */ > +/* irq_chip eoi callback, called with irq descriptor lock held */ > static void xive_irq_eoi(struct irq_data *d) > { > struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); > @@ -366,6 +378,8 @@ static void xive_irq_eoi(struct irq_data *d) > if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) && > !(xd->flags & XIVE_IRQ_NO_EOI)) > xive_do_source_eoi(irqd_to_hwirq(d), xd); > + else > + xd->stale_p = true; > > /* > * Clear saved_p to indicate that it's no longer occupying > @@ -397,11 +411,16 @@ static void xive_do_source_set_mask(struct xive_irq_data *xd, > */ > if (mask) { > val = xive_esb_read(xd, XIVE_ESB_SET_PQ_01); > - xd->saved_p = !!(val & XIVE_ESB_VAL_P); > - } else if (xd->saved_p) > + if (!xd->stale_p && !!(val & XIVE_ESB_VAL_P)) > + xd->saved_p = true; > + xd->stale_p = false; > + } else if (xd->saved_p) { > xive_esb_read(xd, XIVE_ESB_SET_PQ_10); > - else > + xd->saved_p = false; Should we also explicitly set xd->stale_p = true; here? > + } else { > xive_esb_read(xd, XIVE_ESB_SET_PQ_00); > + xd->stale_p = false; Should we also explicitly set xd->saved_p = true; here? Thanks, Lijun