On Fri, 6 Mar 2020 16:01:40 +0100 Cédric Le Goater <clg@xxxxxxxx> wrote: > When a CPU is brought up, an IPI number is allocated and recorded > under the XIVE CPU structure. Invalid IPI numbers are tracked with > interrupt number 0x0. > > On the PowerNV platform, the interrupt number space starts at 0x10 and > this works fine. However, on the sPAPR platform, it is possible to > allocate the interrupt number 0x0 and this raises an issue when CPU 0 > is unplugged. The XIVE spapr driver tracks allocated interrupt numbers > in a bitmask and it is not correctly updated when interrupt number 0x0 > is freed. It stays allocated and it is then impossible to reallocate. > > Fix by using the XIVE_BAD_IRQ value instead of zero on both platforms. > > Reported-by: David Gibson <david@xxxxxxxxxxxxxxxxxxxxx> > Fixes: eac1e731b59e ("powerpc/xive: guest exploitation of the XIVE interrupt controller") > Cc: stable@xxxxxxxxxxxxxxx # v4.14+ > Signed-off-by: Cédric Le Goater <clg@xxxxxxxx> > --- This looks mostly good. I'm juste wondering about potential overlooks: $ git grep 'if.*hw_i' arch/powerpc/ | egrep -v 'xics|XIVE_BAD_IRQ' arch/powerpc/kvm/book3s_xive.h: if (out_hw_irq) arch/powerpc/kvm/book3s_xive.h: if (out_hw_irq) arch/powerpc/kvm/book3s_xive_template.c: else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) arch/powerpc/sysdev/xive/common.c: else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) { This hw_irq check in xive_do_source_eoi() for example is related to: /* * Note: We pass "0" to the hw_irq argument in order to * avoid calling into the backend EOI code which we don't * want to do in the case of a re-trigger. Backends typically * only do EOI for LSIs anyway. */ xive_do_source_eoi(0, xd); but it can get hw_irq from: xive_do_source_eoi(xc->hw_ipi, &xc->ipi_data); It seems that these should use XIVE_BAD_IRQ as well or I'm missing something ? arch/powerpc/sysdev/xive/common.c: if (hw_irq) arch/powerpc/sysdev/xive/common.c: if (d->domain != xive_irq_domain || hw_irq == 0) > arch/powerpc/sysdev/xive/xive-internal.h | 7 +++++++ > arch/powerpc/sysdev/xive/common.c | 12 +++--------- > arch/powerpc/sysdev/xive/native.c | 4 ++-- > arch/powerpc/sysdev/xive/spapr.c | 4 ++-- > 4 files changed, 14 insertions(+), 13 deletions(-) > > diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h > index 59cd366e7933..382980f4de2d 100644 > --- a/arch/powerpc/sysdev/xive/xive-internal.h > +++ b/arch/powerpc/sysdev/xive/xive-internal.h > @@ -5,6 +5,13 @@ > #ifndef __XIVE_INTERNAL_H > #define __XIVE_INTERNAL_H > > +/* > + * A "disabled" interrupt should never fire, to catch problems > + * we set its logical number to this > + */ > +#define XIVE_BAD_IRQ 0x7fffffff > +#define XIVE_MAX_IRQ (XIVE_BAD_IRQ - 1) > + > /* Each CPU carry one of these with various per-CPU state */ > struct xive_cpu { > #ifdef CONFIG_SMP > diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c > index fa49193206b6..550baba98ec9 100644 > --- a/arch/powerpc/sysdev/xive/common.c > +++ b/arch/powerpc/sysdev/xive/common.c > @@ -68,13 +68,6 @@ static u32 xive_ipi_irq; > /* Xive state for each CPU */ > static DEFINE_PER_CPU(struct xive_cpu *, xive_cpu); > > -/* > - * A "disabled" interrupt should never fire, to catch problems > - * we set its logical number to this > - */ > -#define XIVE_BAD_IRQ 0x7fffffff > -#define XIVE_MAX_IRQ (XIVE_BAD_IRQ - 1) > - > /* An invalid CPU target */ > #define XIVE_INVALID_TARGET (-1) > > @@ -1153,7 +1146,7 @@ static int xive_setup_cpu_ipi(unsigned int cpu) > xc = per_cpu(xive_cpu, cpu); > > /* Check if we are already setup */ > - if (xc->hw_ipi != 0) > + if (xc->hw_ipi != XIVE_BAD_IRQ) > return 0; > > /* Grab an IPI from the backend, this will populate xc->hw_ipi */ > @@ -1190,7 +1183,7 @@ static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc) > /* Disable the IPI and free the IRQ data */ > > /* Already cleaned up ? */ > - if (xc->hw_ipi == 0) > + if (xc->hw_ipi == XIVE_BAD_IRQ) > return; > > /* Mask the IPI */ > @@ -1346,6 +1339,7 @@ static int xive_prepare_cpu(unsigned int cpu) > if (np) > xc->chip_id = of_get_ibm_chip_id(np); > of_node_put(np); > + xc->hw_ipi = XIVE_BAD_IRQ; > > per_cpu(xive_cpu, cpu) = xc; > } > diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c > index 0ff6b739052c..50e1a8e02497 100644 > --- a/arch/powerpc/sysdev/xive/native.c > +++ b/arch/powerpc/sysdev/xive/native.c > @@ -312,7 +312,7 @@ static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc) > s64 rc; > > /* Free the IPI */ > - if (!xc->hw_ipi) > + if (xc->hw_ipi == XIVE_BAD_IRQ) > return; > for (;;) { > rc = opal_xive_free_irq(xc->hw_ipi); > @@ -320,7 +320,7 @@ static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc) > msleep(OPAL_BUSY_DELAY_MS); > continue; > } > - xc->hw_ipi = 0; > + xc->hw_ipi = XIVE_BAD_IRQ; > break; > } > } > diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c > index 55dc61cb4867..3f15615712b5 100644 > --- a/arch/powerpc/sysdev/xive/spapr.c > +++ b/arch/powerpc/sysdev/xive/spapr.c > @@ -560,11 +560,11 @@ static int xive_spapr_get_ipi(unsigned int cpu, struct xive_cpu *xc) > > static void xive_spapr_put_ipi(unsigned int cpu, struct xive_cpu *xc) > { > - if (!xc->hw_ipi) > + if (xc->hw_ipi == XIVE_BAD_IRQ) > return; > > xive_irq_bitmap_free(xc->hw_ipi); > - xc->hw_ipi = 0; > + xc->hw_ipi = XIVE_BAD_IRQ; > } > #endif /* CONFIG_SMP */ >