Re: [PATCH] powerpc/kvm/xive: Resend re-routed interrupts on CPU priority change

Alexey Kardashevskiy <aik@xxxxxxxxx> · Thu, 10 May 2018 13:26:53 +1000



On 10/5/18 1:06 pm, Benjamin Herrenschmidt wrote:
> When a vcpu priority (CPPR) is set to a lower value (masking more
> interrupts), we stop processing interrupts already in the queue
> for the priorities that have now been masked.
> 
> If those interrupts were previously re-routed to a different
> CPU, they might still be stuck until the older one that has
> them in its queue processes them. In the case of guest CPU
> unplug, that can be never.
> 
> To address that without creating additional overhead for
> the normal interrupt processing path, this changes H_CPPR
> handling so that when such a priority change occurs, we
> scan the interrupt queue for that vCPU, and for any
> interrupt in there that has been re-routed, we replace it
> with a dummy and force a re-trigger.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx>


Tested-by: Alexey Kardashevskiy <aik@xxxxxxxxx>


> ---
>  arch/powerpc/kvm/book3s_xive_template.c | 108 ++++++++++++++++++++++--
>  1 file changed, 101 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
> index c7a5deadd1cc..99c3620b40d9 100644
> --- a/arch/powerpc/kvm/book3s_xive_template.c
> +++ b/arch/powerpc/kvm/book3s_xive_template.c
> @@ -11,6 +11,9 @@
>  #define XGLUE(a,b) a##b
>  #define GLUE(a,b) XGLUE(a,b)
>  
> +/* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */
> +#define XICS_DUMMY	1
> +
>  static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
>  {
>  	u8 cppr;
> @@ -205,6 +208,10 @@ static u32 GLUE(X_PFX,scan_interrupts)(struct kvmppc_xive_vcpu *xc,
>  				goto skip_ipi;
>  		}
>  
> +		/* If it's the dummy interrupt, continue searching */
> +		if (hirq == XICS_DUMMY)
> +			goto skip_ipi;
> +
>  		/* If fetching, update queue pointers */
>  		if (scan_type == scan_fetch) {
>  			q->idx = idx;
> @@ -385,9 +392,76 @@ static void GLUE(X_PFX,push_pending_to_hw)(struct kvmppc_xive_vcpu *xc)
>  	__x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING);
>  }
>  
> +static void GLUE(X_PFX,scan_for_rerouted_irqs)(struct kvmppc_xive *xive,
> +					       struct kvmppc_xive_vcpu *xc)
> +{
> +	unsigned int prio;
> +
> +	/* For each priority that is now masked */
> +	for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
> +		struct xive_q *q = &xc->queues[prio];
> +		struct kvmppc_xive_irq_state *state;
> +		struct kvmppc_xive_src_block *sb;
> +		u32 idx, toggle, entry, irq, hw_num;
> +		struct xive_irq_data *xd;
> +		__be32 *qpage;
> +		u16 src;
> +
> +		idx = q->idx;
> +		toggle = q->toggle;
> +		qpage = READ_ONCE(q->qpage);
> +		if (!qpage)
> +			continue;
> +
> +		/* For each interrupt in the queue */
> +		for (;;) {
> +			entry = be32_to_cpup(qpage + idx);
> +
> +			/* No more ? */
> +			if ((entry >> 31) == toggle)
> +				break;
> +			irq = entry & 0x7fffffff;
> +
> +			/* Skip dummies and IPIs */
> +			if (irq == XICS_DUMMY || irq == XICS_IPI)
> +				goto next;
> +			sb = kvmppc_xive_find_source(xive, irq, &src);
> +			if (!sb)
> +				goto next;
> +			state = &sb->irq_state[src];
> +
> +			/* Has it been rerouted ? */
> +			if (xc->server_num == state->act_server)
> +				goto next;
> +
> +			/*
> +			 * Allright, it *has* been re-routed, kill it from
> +			 * the queue.
> +			 */
> +			qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY);
> +
> +			/* Find the HW interrupt */
> +			kvmppc_xive_select_irq(state, &hw_num, &xd);
> +
> +			/* If it's not an LSI, set PQ to 11 the EOI will force a resend */
> +			if (!(xd->flags & XIVE_IRQ_FLAG_LSI))
> +				GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_11);
> +
> +			/* EOI the source */
> +			GLUE(X_PFX,source_eoi)(hw_num, xd);
> +
> +		next:
> +			idx = (idx + 1) & q->msk;
> +			if (idx == 0)
> +				toggle ^= 1;
> +		}
> +	}
> +}
> +
>  X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
>  {
>  	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
> +	struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
>  	u8 old_cppr;
>  
>  	pr_devel("H_CPPR(cppr=%ld)\n", cppr);
> @@ -407,14 +481,34 @@ X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
>  	 */
>  	smp_mb();
>  
> -	/*
> -	 * We are masking less, we need to look for pending things
> -	 * to deliver and set VP pending bits accordingly to trigger
> -	 * a new interrupt otherwise we might miss MFRR changes for
> -	 * which we have optimized out sending an IPI signal.
> -	 */
> -	if (cppr > old_cppr)
> +	if (cppr > old_cppr) {
> +		/*
> +		 * We are masking less, we need to look for pending things
> +		 * to deliver and set VP pending bits accordingly to trigger
> +		 * a new interrupt otherwise we might miss MFRR changes for
> +		 * which we have optimized out sending an IPI signal.
> +		 */
>  		GLUE(X_PFX,push_pending_to_hw)(xc);
> +	} else {
> +		/*
> +		 * We are masking more, we need to check the queue for any
> +		 * interrupt that has been routed to another CPU, take
> +		 * it out (replace it with the dummy) and retrigger it.
> +		 *
> +		 * This is necessary since those interrupts may otherwise
> +		 * never be processed, at least not until this CPU restores
> +		 * its CPPR.
> +		 *
> +		 * This is in theory racy vs. HW adding new interrupts to
> +		 * the queue. In practice this works because the interesting
> +		 * cases are when the guest has done a set_xive() to move the
> +		 * interrupt away, which flushes the xive, followed by the
> +		 * target CPU doing a H_CPPR. So any new interrupt coming into
> +		 * the queue must still be routed to us and isn't a source
> +		 * of concern.
> +		 */
> +		GLUE(X_PFX,scan_for_rerouted_irqs)(xive, xc);
> +	}
>  
>  	/* Apply new CPPR */
>  	xc->hw_cppr = cppr;
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 


-- 
Alexey