irq_work's use of the DEC SPR is racy with guest<->host switch and guest entry which flips the DEC interrupt to guest, which could lose a host work interrupt. This patch closes one race, and attempts to comment several others. (XXX: should think a bit harder about this) Signed-off-by: Nicholas Piggin <npiggin@xxxxxxxxx> --- arch/powerpc/include/asm/paca.h | 1 + arch/powerpc/kvm/book3s_hv.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index ec18ac818e3a..23c12048fbc9 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -174,6 +174,7 @@ struct paca_struct { u8 irq_happened; /* irq happened while soft-disabled */ u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* Could have irq_work_using_hdec here, but what about nested HV entry modifying DEC? Could have a pointer to the hv struct time limit */ u8 pmcregs_in_use; /* pseries puts this in lppaca */ #endif u64 sprg_vdso; /* Saved user-visible sprg */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index d98958b78830..1997cf347d3e 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3689,6 +3689,18 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, if (!(vcpu->arch.ctrl & 1)) mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1); + /* + * XXX: must always deal with irq_work_raise via NMI vs setting DEC. + * The problem occurs right as we switch into guest mode if a NMI + * hits and sets pending work and sets DEC, then that will apply to + * the guest and not bring us back to the host. + * + * irq_work_raise could check a flag (or possibly LPCR[HDICE] for + * example) and set HDEC to 1? That wouldn't solve the nested hv + * case which needs to abort the hcall or zero the time limit. + * + * Another day's problem. + */ mtspr(SPRN_DEC, vcpu->arch.dec_expires - tb); if (kvmhv_on_pseries()) { @@ -3822,7 +3834,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, vc->entry_exit_map = 0x101; vc->in_guest = 0; - mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - tb); + set_dec_or_work(local_paca->kvm_hstate.dec_expires - tb); + mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso); kvmhv_load_host_pmu(); -- 2.23.0