On Wed, Mar 2, 2022 at 3:14 AM Like Xu <like.xu.linux@xxxxxxxxx> wrote: > > From: Like Xu <likexu@xxxxxxxxxxx> > > The code sketch for reprogram_{gp, fixed}_counter() is similar, while the > fixed counter using the PERF_TYPE_HARDWAR type and the gp being > able to use either PERF_TYPE_HARDWAR or PERF_TYPE_RAW type > depending on the pmc->eventsel value. > > After 'commit 761875634a5e ("KVM: x86/pmu: Setup pmc->eventsel > for fixed PMCs")', the pmc->eventsel of the fixed counter will also have > been setup with the same semantic value and will not be changed during > the guest runtime. But essentially, "the HARDWARE is just a convenience > wrapper over RAW IIRC", quoated from Peterz. So it could be pretty safe > to use the PERF_TYPE_RAW type only to program both gp and fixed > counters naturally in the reprogram_counter(). > > To make the gp and fixed counters more semantically symmetrical, > the selection of EVENTSEL_{USER, OS, INT} bits is temporarily translated > via fixed_ctr_ctrl before the pmc_reprogram_counter() call. > > Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> > Suggested-by: Jim Mattson <jmattson@xxxxxxxxxx> > Signed-off-by: Like Xu <likexu@xxxxxxxxxxx> > --- > arch/x86/kvm/pmu.c | 128 +++++++++++++---------------------- > arch/x86/kvm/vmx/pmu_intel.c | 2 +- > 2 files changed, 47 insertions(+), 83 deletions(-) > > diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c > index 5299488b002c..00e1660c10ca 100644 > --- a/arch/x86/kvm/pmu.c > +++ b/arch/x86/kvm/pmu.c > @@ -215,85 +215,60 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc) > return allow_event; > } > > -static void reprogram_gp_counter(struct kvm_pmc *pmc) > -{ > - u64 config; > - u32 type = PERF_TYPE_RAW; > - u64 eventsel = pmc->eventsel; > - > - if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL) > - printk_once("kvm pmu: pin control bit is ignored\n"); > - > - pmc_pause_counter(pmc); > - > - if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc)) > - return; > - > - if (!check_pmu_event_filter(pmc)) > - return; > - > - if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE | > - ARCH_PERFMON_EVENTSEL_INV | > - ARCH_PERFMON_EVENTSEL_CMASK | > - HSW_IN_TX | > - HSW_IN_TX_CHECKPOINTED))) { > - config = kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc); > - if (config != PERF_COUNT_HW_MAX) > - type = PERF_TYPE_HARDWARE; > - } > - > - if (type == PERF_TYPE_RAW) > - config = eventsel & AMD64_RAW_EVENT_MASK; > - > - if (pmc->current_config == eventsel && pmc_resume_counter(pmc)) > - return; > - > - pmc_release_perf_event(pmc); > - > - pmc->current_config = eventsel; > - pmc_reprogram_counter(pmc, type, config, > - !(eventsel & ARCH_PERFMON_EVENTSEL_USR), > - !(eventsel & ARCH_PERFMON_EVENTSEL_OS), > - eventsel & ARCH_PERFMON_EVENTSEL_INT, > - (eventsel & HSW_IN_TX), > - (eventsel & HSW_IN_TX_CHECKPOINTED)); > -} > - > -static void reprogram_fixed_counter(struct kvm_pmc *pmc) > +static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc) > { > struct kvm_pmu *pmu = pmc_to_pmu(pmc); > - int idx = pmc->idx - INTEL_PMC_IDX_FIXED; > - u8 ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, idx); > - unsigned en_field = ctrl & 0x3; > - bool pmi = ctrl & 0x8; > > - pmc_pause_counter(pmc); > + if (pmc_is_fixed(pmc)) > + return fixed_ctrl_field(pmu->fixed_ctr_ctrl, > + pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3; > > - if (!en_field || !pmc_is_enabled(pmc)) > - return; > - > - if (!check_pmu_event_filter(pmc)) > - return; > - > - if (pmc->current_config == (u64)ctrl && pmc_resume_counter(pmc)) > - return; > - > - pmc_release_perf_event(pmc); > - > - pmc->current_config = (u64)ctrl; > - pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE, > - kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc), > - !(en_field & 0x2), /* exclude user */ > - !(en_field & 0x1), /* exclude kernel */ > - pmi, false, false); > + return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE; > } > > void reprogram_counter(struct kvm_pmc *pmc) > { > - if (pmc_is_gp(pmc)) > - reprogram_gp_counter(pmc); > - else > - reprogram_fixed_counter(pmc); > + struct kvm_pmu *pmu = pmc_to_pmu(pmc); > + u64 eventsel = pmc->eventsel; > + u64 new_config = eventsel; > + u8 fixed_ctr_ctrl; > + > + pmc_pause_counter(pmc); > + > + if (!pmc_speculative_in_use(pmc) || !pmc_is_enabled(pmc)) > + return; > + > + if (!check_pmu_event_filter(pmc)) > + return; > + > + if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL) > + printk_once("kvm pmu: pin control bit is ignored\n"); > + > + if (pmc_is_fixed(pmc)) { > + fixed_ctr_ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, > + pmc->idx - INTEL_PMC_IDX_FIXED); > + if (fixed_ctr_ctrl & 0x1) > + eventsel |= ARCH_PERFMON_EVENTSEL_OS; > + if (fixed_ctr_ctrl & 0x2) > + eventsel |= ARCH_PERFMON_EVENTSEL_USR; > + if (fixed_ctr_ctrl & 0x8) > + eventsel |= ARCH_PERFMON_EVENTSEL_INT; > + new_config = (u64)fixed_ctr_ctrl; > + } > + > + if (pmc->current_config == new_config && pmc_resume_counter(pmc)) > + return; > + > + pmc_release_perf_event(pmc); > + > + pmc->current_config = new_config; > + pmc_reprogram_counter(pmc, PERF_TYPE_RAW, > + (eventsel & AMD64_RAW_EVENT_MASK), > + !(eventsel & ARCH_PERFMON_EVENTSEL_USR), > + !(eventsel & ARCH_PERFMON_EVENTSEL_OS), > + eventsel & ARCH_PERFMON_EVENTSEL_INT, > + (eventsel & HSW_IN_TX), > + (eventsel & HSW_IN_TX_CHECKPOINTED)); It seems that this extremely long argument list was motivated by the differences between the two original call sites. Now that you have mocked up a full eventsel (with USR, OS, INT, IN_TX, and IN_TXCP bits) for the fixed counters, why not pass the entire eventsel as the third argument and drop all of the rest? Then, pmc_reprogram_counter() can extract/check the bits of interest. > } > EXPORT_SYMBOL_GPL(reprogram_counter); > > @@ -451,17 +426,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu) > kvm_pmu_refresh(vcpu); > } > > -static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc) > -{ > - struct kvm_pmu *pmu = pmc_to_pmu(pmc); > - > - if (pmc_is_fixed(pmc)) > - return fixed_ctrl_field(pmu->fixed_ctr_ctrl, > - pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3; > - > - return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE; > -} > - > /* Release perf_events for vPMCs that have been unused for a full time slice. */ > void kvm_pmu_cleanup(struct kvm_vcpu *vcpu) > { > diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c > index 19b78a9d9d47..d823fbe4e155 100644 > --- a/arch/x86/kvm/vmx/pmu_intel.c > +++ b/arch/x86/kvm/vmx/pmu_intel.c > @@ -492,7 +492,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) > pmu->reserved_bits = 0xffffffff00200000ull; > > entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); > - if (!entry || !vcpu->kvm->arch.enable_pmu) > + if (!entry || !vcpu->kvm->arch.enable_pmu || !boot_cpu_has(X86_FEATURE_ARCH_PERFMON)) This change seems unrelated. > return; > eax.full = entry->eax; > edx.full = entry->edx; > -- > 2.35.1 >