SRR0/1, DAR, DSISR must all be protected from machine check which can clobber them. Ensure MSR[RI] is clear while they are live. Signed-off-by: Nicholas Piggin <npiggin@xxxxxxxxx> --- arch/powerpc/kvm/book3s_hv.c | 5 +++-- arch/powerpc/kvm/book3s_hv_interrupt.c | 26 +++++++++++++++++++++++--- arch/powerpc/kvm/book3s_hv_ras.c | 5 +++++ 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index f99503acdda5..94989fe2fdfe 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3506,8 +3506,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, mtspr(SPRN_BESCR, vcpu->arch.bescr); mtspr(SPRN_WORT, vcpu->arch.wort); mtspr(SPRN_TIDR, vcpu->arch.tid); - mtspr(SPRN_DAR, vcpu->arch.shregs.dar); - mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr); + /* XXX: DAR, DSISR must be set with MSR[RI] clear (or hstate as appropriate) */ mtspr(SPRN_AMR, vcpu->arch.amr); mtspr(SPRN_UAMOR, vcpu->arch.uamor); @@ -3553,6 +3552,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, hvregs.vcpu_token = vcpu->vcpu_id; } hvregs.hdec_expiry = time_limit; + mtspr(SPRN_DAR, vcpu->arch.shregs.dar); + mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr); trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs), __pa(&vcpu->arch.regs)); kvmhv_restore_hv_return_state(vcpu, &hvregs); diff --git a/arch/powerpc/kvm/book3s_hv_interrupt.c b/arch/powerpc/kvm/book3s_hv_interrupt.c index dea3eca3648a..f5fef7398e37 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupt.c +++ b/arch/powerpc/kvm/book3s_hv_interrupt.c @@ -126,6 +126,7 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc s64 hdec; u64 tb, purr, spurr; u64 *exsave; + bool ri_clear; unsigned long msr = mfmsr(); int trap; unsigned long host_hfscr = mfspr(SPRN_HFSCR); @@ -197,9 +198,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc */ mtspr(SPRN_HDEC, hdec); - mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0); - mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1); - start_timing(vcpu, &vcpu->arch.rm_entry); vcpu->arch.ceded = 0; @@ -225,6 +223,13 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc */ mtspr(SPRN_HDSISR, HDSISR_CANARY); + __mtmsrd(0, 1); /* clear RI */ + + mtspr(SPRN_DAR, vcpu->arch.shregs.dar); + mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr); + mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0); + mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1); + accumulate_time(vcpu, &vcpu->arch.guest_time); local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_GUEST_HV_FAST; @@ -240,6 +245,13 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc vcpu->arch.shregs.dar = mfspr(SPRN_DAR); vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR); + /* HSRR interrupts leave MSR[RI] unchanged, SRR interrupts clear it. */ + if ((local_paca->kvm_hstate.scratch0 & 0x2) && + (vcpu->arch.shregs.msr & MSR_RI)) + ri_clear = false; + else + ri_clear = true; + trap = local_paca->kvm_hstate.scratch0 & ~0x2; if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK)) { exsave = local_paca->exgen; @@ -251,6 +263,14 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc vcpu->arch.regs.gpr[1] = local_paca->kvm_hstate.scratch1; vcpu->arch.regs.gpr[3] = local_paca->kvm_hstate.scratch2; + + if (ri_clear) { +/// XXX this fires maybe on syscalls on mambo WARN_ON((mfmsr() & MSR_RI)); + __mtmsrd(MSR_RI, 1); /* set RI after reading machine check regs (DAR, DSISR, SRR0/1) and hstate scratch (which we need to move into exsave) */ + } else { + WARN_ON(!(mfmsr() & MSR_RI)); + } + vcpu->arch.regs.gpr[9] = exsave[EX_R9/sizeof(u64)]; vcpu->arch.regs.gpr[10] = exsave[EX_R10/sizeof(u64)]; vcpu->arch.regs.gpr[11] = exsave[EX_R11/sizeof(u64)]; diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index d4bca93b79f6..7a645f4428c2 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -198,6 +198,7 @@ static void kvmppc_tb_resync_done(void) * value. Hence the idea is to resync the TB on every HMI, so that we * know about the exact state of the TB value. Resync TB call will * restore TB to host timebase. + * XXX: could use new opal hmi handler flags for this * * Things to consider: * - On TB error, HMI interrupt is reported on all the threads of the core @@ -290,6 +291,10 @@ long kvmppc_realmode_hmi_handler(void) */ wait_for_subcore_guest_exit(); + /* + * XXX: Is this safe with independent threads mode? + */ + /* * At this point we are sure that primary threads from each * subcore on this core have completed guest->host partition -- 2.23.0