> On 26-Jul-2021, at 9:19 AM, Nicholas Piggin <npiggin@xxxxxxxxx> wrote: > > Implement the P9 path PMU save/restore code in C, and remove the > POWER9/10 code from the P7/8 path assembly. > > -449 cycles (8533) POWER9 virt-mode NULL hcall > > Signed-off-by: Nicholas Piggin <npiggin@xxxxxxxxx> > --- > arch/powerpc/include/asm/asm-prototypes.h | 5 - > arch/powerpc/kvm/book3s_hv.c | 205 ++++++++++++++++++++-- > arch/powerpc/kvm/book3s_hv_interrupts.S | 13 +- > arch/powerpc/kvm/book3s_hv_rmhandlers.S | 43 +---- > 4 files changed, 200 insertions(+), 66 deletions(-) > > diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h > index 222823861a67..41b8a1e1144a 100644 > --- a/arch/powerpc/include/asm/asm-prototypes.h > +++ b/arch/powerpc/include/asm/asm-prototypes.h > @@ -141,11 +141,6 @@ static inline void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr, > bool preserve_nv) { } > #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ > > -void kvmhv_save_host_pmu(void); > -void kvmhv_load_host_pmu(void); > -void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use); > -void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu); > - > void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu); > > long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr); > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c > index 2eef708c4354..d20b579ddcdf 100644 > --- a/arch/powerpc/kvm/book3s_hv.c > +++ b/arch/powerpc/kvm/book3s_hv.c > @@ -3735,6 +3735,188 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) > trace_kvmppc_run_core(vc, 1); > } > > +/* > + * Privileged (non-hypervisor) host registers to save. > + */ > +struct p9_host_os_sprs { > + unsigned long dscr; > + unsigned long tidr; > + unsigned long iamr; > + unsigned long amr; > + unsigned long fscr; > + > + unsigned int pmc1; > + unsigned int pmc2; > + unsigned int pmc3; > + unsigned int pmc4; > + unsigned int pmc5; > + unsigned int pmc6; > + unsigned long mmcr0; > + unsigned long mmcr1; > + unsigned long mmcr2; > + unsigned long mmcr3; > + unsigned long mmcra; > + unsigned long siar; > + unsigned long sier1; > + unsigned long sier2; > + unsigned long sier3; > + unsigned long sdar; > +}; > + > +static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra) > +{ > + if (!(mmcr0 & MMCR0_FC)) > + goto do_freeze; > + if (mmcra & MMCRA_SAMPLE_ENABLE) > + goto do_freeze; > + if (cpu_has_feature(CPU_FTR_ARCH_31)) { > + if (!(mmcr0 & MMCR0_PMCCEXT)) > + goto do_freeze; > + if (!(mmcra & MMCRA_BHRB_DISABLE)) > + goto do_freeze; > + } > + return; > + > +do_freeze: > + mmcr0 = MMCR0_FC; > + mmcra = 0; > + if (cpu_has_feature(CPU_FTR_ARCH_31)) { > + mmcr0 |= MMCR0_PMCCEXT; > + mmcra = MMCRA_BHRB_DISABLE; > + } > + > + mtspr(SPRN_MMCR0, mmcr0); > + mtspr(SPRN_MMCRA, mmcra); > + isync(); > +} > + Hi Nick, After feezing pmu, do we need to clear “pmcregs_in_use” as well? Also can’t we unconditionally do the MMCR0/MMCRA/ freeze settings in here ? do we need the if conditions for FC/PMCCEXT/BHRB ? Thanks Athira > +static void save_p9_host_pmu(struct p9_host_os_sprs *host_os_sprs) > +{ > + if (ppc_get_pmu_inuse()) { > + /* > + * It might be better to put PMU handling (at least for the > + * host) in the perf subsystem because it knows more about what > + * is being used. > + */ > + > + /* POWER9, POWER10 do not implement HPMC or SPMC */ > + > + host_os_sprs->mmcr0 = mfspr(SPRN_MMCR0); > + host_os_sprs->mmcra = mfspr(SPRN_MMCRA); > + > + freeze_pmu(host_os_sprs->mmcr0, host_os_sprs->mmcra); > + > + host_os_sprs->pmc1 = mfspr(SPRN_PMC1); > + host_os_sprs->pmc2 = mfspr(SPRN_PMC2); > + host_os_sprs->pmc3 = mfspr(SPRN_PMC3); > + host_os_sprs->pmc4 = mfspr(SPRN_PMC4); > + host_os_sprs->pmc5 = mfspr(SPRN_PMC5); > + host_os_sprs->pmc6 = mfspr(SPRN_PMC6); > + host_os_sprs->mmcr1 = mfspr(SPRN_MMCR1); > + host_os_sprs->mmcr2 = mfspr(SPRN_MMCR2); > + host_os_sprs->sdar = mfspr(SPRN_SDAR); > + host_os_sprs->siar = mfspr(SPRN_SIAR); > + host_os_sprs->sier1 = mfspr(SPRN_SIER); > + > + if (cpu_has_feature(CPU_FTR_ARCH_31)) { > + host_os_sprs->mmcr3 = mfspr(SPRN_MMCR3); > + host_os_sprs->sier2 = mfspr(SPRN_SIER2); > + host_os_sprs->sier3 = mfspr(SPRN_SIER3); > + } > + } > +} > + > +static void load_p9_guest_pmu(struct kvm_vcpu *vcpu) > +{ > + mtspr(SPRN_PMC1, vcpu->arch.pmc[0]); > + mtspr(SPRN_PMC2, vcpu->arch.pmc[1]); > + mtspr(SPRN_PMC3, vcpu->arch.pmc[2]); > + mtspr(SPRN_PMC4, vcpu->arch.pmc[3]); > + mtspr(SPRN_PMC5, vcpu->arch.pmc[4]); > + mtspr(SPRN_PMC6, vcpu->arch.pmc[5]); > + mtspr(SPRN_MMCR1, vcpu->arch.mmcr[1]); > + mtspr(SPRN_MMCR2, vcpu->arch.mmcr[2]); > + mtspr(SPRN_SDAR, vcpu->arch.sdar); > + mtspr(SPRN_SIAR, vcpu->arch.siar); > + mtspr(SPRN_SIER, vcpu->arch.sier[0]); > + > + if (cpu_has_feature(CPU_FTR_ARCH_31)) { > + mtspr(SPRN_MMCR3, vcpu->arch.mmcr[3]); > + mtspr(SPRN_SIER2, vcpu->arch.sier[1]); > + mtspr(SPRN_SIER3, vcpu->arch.sier[2]); > + } > + > + /* Set MMCRA then MMCR0 last */ > + mtspr(SPRN_MMCRA, vcpu->arch.mmcra); > + mtspr(SPRN_MMCR0, vcpu->arch.mmcr[0]); > + /* No isync necessary because we're starting counters */ > +} > + > +static void save_p9_guest_pmu(struct kvm_vcpu *vcpu) > +{ > + struct lppaca *lp; > + int save_pmu = 1; > + > + lp = vcpu->arch.vpa.pinned_addr; > + if (lp) > + save_pmu = lp->pmcregs_in_use; > + > + if (save_pmu) { > + vcpu->arch.mmcr[0] = mfspr(SPRN_MMCR0); > + vcpu->arch.mmcra = mfspr(SPRN_MMCRA); > + > + freeze_pmu(vcpu->arch.mmcr[0], vcpu->arch.mmcra); > + > + vcpu->arch.pmc[0] = mfspr(SPRN_PMC1); > + vcpu->arch.pmc[1] = mfspr(SPRN_PMC2); > + vcpu->arch.pmc[2] = mfspr(SPRN_PMC3); > + vcpu->arch.pmc[3] = mfspr(SPRN_PMC4); > + vcpu->arch.pmc[4] = mfspr(SPRN_PMC5); > + vcpu->arch.pmc[5] = mfspr(SPRN_PMC6); > + vcpu->arch.mmcr[1] = mfspr(SPRN_MMCR1); > + vcpu->arch.mmcr[2] = mfspr(SPRN_MMCR2); > + vcpu->arch.sdar = mfspr(SPRN_SDAR); > + vcpu->arch.siar = mfspr(SPRN_SIAR); > + vcpu->arch.sier[0] = mfspr(SPRN_SIER); > + > + if (cpu_has_feature(CPU_FTR_ARCH_31)) { > + vcpu->arch.mmcr[3] = mfspr(SPRN_MMCR3); > + vcpu->arch.sier[1] = mfspr(SPRN_SIER2); > + vcpu->arch.sier[2] = mfspr(SPRN_SIER3); > + } > + } else { > + freeze_pmu(mfspr(SPRN_MMCR0), mfspr(SPRN_MMCRA)); > + } > +} > + > +static void load_p9_host_pmu(struct p9_host_os_sprs *host_os_sprs) > +{ > + if (ppc_get_pmu_inuse()) { > + mtspr(SPRN_PMC1, host_os_sprs->pmc1); > + mtspr(SPRN_PMC2, host_os_sprs->pmc2); > + mtspr(SPRN_PMC3, host_os_sprs->pmc3); > + mtspr(SPRN_PMC4, host_os_sprs->pmc4); > + mtspr(SPRN_PMC5, host_os_sprs->pmc5); > + mtspr(SPRN_PMC6, host_os_sprs->pmc6); > + mtspr(SPRN_MMCR1, host_os_sprs->mmcr1); > + mtspr(SPRN_MMCR2, host_os_sprs->mmcr2); > + mtspr(SPRN_SDAR, host_os_sprs->sdar); > + mtspr(SPRN_SIAR, host_os_sprs->siar); > + mtspr(SPRN_SIER, host_os_sprs->sier1); > + > + if (cpu_has_feature(CPU_FTR_ARCH_31)) { > + mtspr(SPRN_MMCR3, host_os_sprs->mmcr3); > + mtspr(SPRN_SIER2, host_os_sprs->sier2); > + mtspr(SPRN_SIER3, host_os_sprs->sier3); > + } > + > + /* Set MMCRA then MMCR0 last */ > + mtspr(SPRN_MMCRA, host_os_sprs->mmcra); > + mtspr(SPRN_MMCR0, host_os_sprs->mmcr0); > + isync(); > + } > +} > + > static void load_spr_state(struct kvm_vcpu *vcpu) > { > mtspr(SPRN_DSCR, vcpu->arch.dscr); > @@ -3777,17 +3959,6 @@ static void store_spr_state(struct kvm_vcpu *vcpu) > vcpu->arch.dscr = mfspr(SPRN_DSCR); > } > > -/* > - * Privileged (non-hypervisor) host registers to save. > - */ > -struct p9_host_os_sprs { > - unsigned long dscr; > - unsigned long tidr; > - unsigned long iamr; > - unsigned long amr; > - unsigned long fscr; > -}; > - > static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs) > { > host_os_sprs->dscr = mfspr(SPRN_DSCR); > @@ -3835,7 +4006,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, > struct p9_host_os_sprs host_os_sprs; > s64 dec; > u64 tb, next_timer; > - int trap, save_pmu; > + int trap; > > WARN_ON_ONCE(vcpu->arch.ceded); > > @@ -3848,7 +4019,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, > > save_p9_host_os_sprs(&host_os_sprs); > > - kvmhv_save_host_pmu(); /* saves it to PACA kvm_hstate */ > + save_p9_host_pmu(&host_os_sprs); > > kvmppc_subcore_enter_guest(); > > @@ -3878,7 +4049,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, > barrier(); > } > #endif > - kvmhv_load_guest_pmu(vcpu); > + load_p9_guest_pmu(vcpu); > > msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX); > load_fp_state(&vcpu->arch.fp); > @@ -4000,16 +4171,14 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, > cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) > kvmppc_save_tm_hv(vcpu, vcpu->arch.shregs.msr, true); > > - save_pmu = 1; > if (vcpu->arch.vpa.pinned_addr) { > struct lppaca *lp = vcpu->arch.vpa.pinned_addr; > u32 yield_count = be32_to_cpu(lp->yield_count) + 1; > lp->yield_count = cpu_to_be32(yield_count); > vcpu->arch.vpa.dirty = 1; > - save_pmu = lp->pmcregs_in_use; > } > > - kvmhv_save_guest_pmu(vcpu, save_pmu); > + save_p9_guest_pmu(vcpu); > #ifdef CONFIG_PPC_PSERIES > if (kvmhv_on_pseries()) { > barrier(); > @@ -4025,7 +4194,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, > > mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso); > > - kvmhv_load_host_pmu(); > + load_p9_host_pmu(&host_os_sprs); > > kvmppc_subcore_exit_guest(); > > diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S > index 4444f83cb133..59d89e4b154a 100644 > --- a/arch/powerpc/kvm/book3s_hv_interrupts.S > +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S > @@ -104,7 +104,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) > mtlr r0 > blr > > -_GLOBAL(kvmhv_save_host_pmu) > +/* > + * void kvmhv_save_host_pmu(void) > + */ > +kvmhv_save_host_pmu: > BEGIN_FTR_SECTION > /* Work around P8 PMAE bug */ > li r3, -1 > @@ -138,14 +141,6 @@ BEGIN_FTR_SECTION > std r8, HSTATE_MMCR2(r13) > std r9, HSTATE_SIER(r13) > END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) > -BEGIN_FTR_SECTION > - mfspr r5, SPRN_MMCR3 > - mfspr r6, SPRN_SIER2 > - mfspr r7, SPRN_SIER3 > - std r5, HSTATE_MMCR3(r13) > - std r6, HSTATE_SIER2(r13) > - std r7, HSTATE_SIER3(r13) > -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) > mfspr r3, SPRN_PMC1 > mfspr r5, SPRN_PMC2 > mfspr r6, SPRN_PMC3 > diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S > index 9021052f1579..551ce223b40c 100644 > --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S > +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S > @@ -2738,10 +2738,11 @@ kvmppc_msr_interrupt: > blr > > /* > + * void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu) > + * > * Load up guest PMU state. R3 points to the vcpu struct. > */ > -_GLOBAL(kvmhv_load_guest_pmu) > -EXPORT_SYMBOL_GPL(kvmhv_load_guest_pmu) > +kvmhv_load_guest_pmu: > mr r4, r3 > mflr r0 > li r3, 1 > @@ -2775,27 +2776,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG) > mtspr SPRN_MMCRA, r6 > mtspr SPRN_SIAR, r7 > mtspr SPRN_SDAR, r8 > -BEGIN_FTR_SECTION > - ld r5, VCPU_MMCR + 24(r4) > - ld r6, VCPU_SIER + 8(r4) > - ld r7, VCPU_SIER + 16(r4) > - mtspr SPRN_MMCR3, r5 > - mtspr SPRN_SIER2, r6 > - mtspr SPRN_SIER3, r7 > -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) > BEGIN_FTR_SECTION > ld r5, VCPU_MMCR + 16(r4) > ld r6, VCPU_SIER(r4) > mtspr SPRN_MMCR2, r5 > mtspr SPRN_SIER, r6 > -BEGIN_FTR_SECTION_NESTED(96) > lwz r7, VCPU_PMC + 24(r4) > lwz r8, VCPU_PMC + 28(r4) > ld r9, VCPU_MMCRS(r4) > mtspr SPRN_SPMC1, r7 > mtspr SPRN_SPMC2, r8 > mtspr SPRN_MMCRS, r9 > -END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) > END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) > mtspr SPRN_MMCR0, r3 > isync > @@ -2803,10 +2794,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) > blr > > /* > + * void kvmhv_load_host_pmu(void) > + * > * Reload host PMU state saved in the PACA by kvmhv_save_host_pmu. > */ > -_GLOBAL(kvmhv_load_host_pmu) > -EXPORT_SYMBOL_GPL(kvmhv_load_host_pmu) > +kvmhv_load_host_pmu: > mflr r0 > lbz r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */ > cmpwi r4, 0 > @@ -2844,25 +2836,18 @@ BEGIN_FTR_SECTION > mtspr SPRN_MMCR2, r8 > mtspr SPRN_SIER, r9 > END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) > -BEGIN_FTR_SECTION > - ld r5, HSTATE_MMCR3(r13) > - ld r6, HSTATE_SIER2(r13) > - ld r7, HSTATE_SIER3(r13) > - mtspr SPRN_MMCR3, r5 > - mtspr SPRN_SIER2, r6 > - mtspr SPRN_SIER3, r7 > -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) > mtspr SPRN_MMCR0, r3 > isync > mtlr r0 > 23: blr > > /* > + * void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use) > + * > * Save guest PMU state into the vcpu struct. > * r3 = vcpu, r4 = full save flag (PMU in use flag set in VPA) > */ > -_GLOBAL(kvmhv_save_guest_pmu) > -EXPORT_SYMBOL_GPL(kvmhv_save_guest_pmu) > +kvmhv_save_guest_pmu: > mr r9, r3 > mr r8, r4 > BEGIN_FTR_SECTION > @@ -2911,14 +2896,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) > BEGIN_FTR_SECTION > std r10, VCPU_MMCR + 16(r9) > END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) > -BEGIN_FTR_SECTION > - mfspr r5, SPRN_MMCR3 > - mfspr r6, SPRN_SIER2 > - mfspr r7, SPRN_SIER3 > - std r5, VCPU_MMCR + 24(r9) > - std r6, VCPU_SIER + 8(r9) > - std r7, VCPU_SIER + 16(r9) > -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) > std r7, VCPU_SIAR(r9) > std r8, VCPU_SDAR(r9) > mfspr r3, SPRN_PMC1 > @@ -2936,7 +2913,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) > BEGIN_FTR_SECTION > mfspr r5, SPRN_SIER > std r5, VCPU_SIER(r9) > -BEGIN_FTR_SECTION_NESTED(96) > mfspr r6, SPRN_SPMC1 > mfspr r7, SPRN_SPMC2 > mfspr r8, SPRN_MMCRS > @@ -2945,7 +2921,6 @@ BEGIN_FTR_SECTION_NESTED(96) > std r8, VCPU_MMCRS(r9) > lis r4, 0x8000 > mtspr SPRN_MMCRS, r4 > -END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) > END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) > 22: blr > > -- > 2.23.0 >