On POWER9 and newer, rather than the complex HMI synchronisation and subcore state, have each thread un-apply the guest TB offset before calling into the early HMI handler. This allows the subcore state to be avoided, including subcore enter / exit guest, which includes an expensive divide that shows up slightly in profiles. Signed-off-by: Nicholas Piggin <npiggin@xxxxxxxxx> --- arch/powerpc/include/asm/kvm_ppc.h | 1 + arch/powerpc/kvm/book3s_hv.c | 12 +++--- arch/powerpc/kvm/book3s_hv_hmi.c | 7 +++- arch/powerpc/kvm/book3s_hv_p9_entry.c | 2 +- arch/powerpc/kvm/book3s_hv_ras.c | 54 +++++++++++++++++++++++++++ 5 files changed, 67 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 2d88944f9f34..6355a6980ccf 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -760,6 +760,7 @@ void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu); void kvmppc_subcore_enter_guest(void); void kvmppc_subcore_exit_guest(void); long kvmppc_realmode_hmi_handler(void); +long kvmppc_p9_realmode_hmi_handler(struct kvm_vcpu *vcpu); long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel); long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index e7f4525f2a74..f1f343307578 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4017,8 +4017,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, vcpu->arch.ceded = 0; - kvmppc_subcore_enter_guest(); - vcpu_vpa_increment_dispatch(vcpu); if (kvmhv_on_pseries()) { @@ -4071,8 +4069,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, vcpu_vpa_increment_dispatch(vcpu); - kvmppc_subcore_exit_guest(); - return trap; } @@ -6054,9 +6050,11 @@ static int kvmppc_book3s_init_hv(void) if (r) return r; - r = kvm_init_subcore_bitmap(); - if (r) - return r; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + r = kvm_init_subcore_bitmap(); + if (r) + return r; + } /* * We need a way of accessing the XICS interrupt controller, diff --git a/arch/powerpc/kvm/book3s_hv_hmi.c b/arch/powerpc/kvm/book3s_hv_hmi.c index 9af660476314..1ec50c69678b 100644 --- a/arch/powerpc/kvm/book3s_hv_hmi.c +++ b/arch/powerpc/kvm/book3s_hv_hmi.c @@ -20,10 +20,15 @@ void wait_for_subcore_guest_exit(void) /* * NULL bitmap pointer indicates that KVM module hasn't - * been loaded yet and hence no guests are running. + * been loaded yet and hence no guests are running, or running + * on POWER9 or newer CPU. + * * If no KVM is in use, no need to co-ordinate among threads * as all of them will always be in host and no one is going * to modify TB other than the opal hmi handler. + * + * POWER9 and newer don't need this synchronisation. + * * Hence, just return from here. */ if (!local_paca->sibling_subcore_state) diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c index 1e18c089478e..7d31ad3de723 100644 --- a/arch/powerpc/kvm/book3s_hv_p9_entry.c +++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c @@ -934,7 +934,7 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc kvmppc_realmode_machine_check(vcpu); } else if (unlikely(trap == BOOK3S_INTERRUPT_HMI)) { - kvmppc_realmode_hmi_handler(); + kvmppc_p9_realmode_hmi_handler(vcpu); } else if (trap == BOOK3S_INTERRUPT_H_EMUL_ASSIST) { vcpu->arch.emul_inst = mfspr(SPRN_HEIR); diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index d4bca93b79f6..3f94f4080d04 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -136,6 +136,60 @@ void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu) vcpu->arch.mce_evt = mce_evt; } + +long kvmppc_p9_realmode_hmi_handler(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcore *vc = vcpu->arch.vcore; + long ret = 0; + + /* + * Unapply and clear the offset first. That way, if the TB was not + * resynced then it will remain in host-offset, and if it was resynced + * then it is brought into host-offset. Then the tb offset is + * re-applied before continuing with the KVM exit. + * + * This way, we don't need to actualy know whether not OPAL resynced + * the timebase or do any of the complicated dance that the P7/8 + * path requires. + */ + if (vc->tb_offset_applied) { + u64 new_tb = mftb() - vc->tb_offset_applied; + mtspr(SPRN_TBU40, new_tb); + if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) { + new_tb += 0x1000000; + mtspr(SPRN_TBU40, new_tb); + } + vc->tb_offset_applied = 0; + } + + local_paca->hmi_irqs++; + + if (hmi_handle_debugtrig(NULL) >= 0) { + ret = 1; + goto out; + } + + if (ppc_md.hmi_exception_early) + ppc_md.hmi_exception_early(NULL); + +out: + if (vc->tb_offset) { + u64 new_tb = mftb() + vc->tb_offset; + mtspr(SPRN_TBU40, new_tb); + if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) { + new_tb += 0x1000000; + mtspr(SPRN_TBU40, new_tb); + } + vc->tb_offset_applied = vc->tb_offset; + } + + return ret; +} + +/* + * The following subcore HMI handling is all only for pre-POWER9 CPUs. + */ + /* Check if dynamic split is in force and return subcore size accordingly. */ static inline int kvmppc_cur_subcore_size(void) { -- 2.23.0