From: Like Xu <likexu@xxxxxxxxxxx> The code sketch for reprogram_{gp, fixed}_counter() is similar, while the fixed counter using the PERF_TYPE_HARDWAR type and the gp being able to use either PERF_TYPE_HARDWAR or PERF_TYPE_RAW type depending on the pmc->eventsel value. After 'commit 761875634a5e ("KVM: x86/pmu: Setup pmc->eventsel for fixed PMCs")', the pmc->eventsel of the fixed counter will also have been setup with the same semantic value and will not be changed during the guest runtime. But essentially, "the HARDWARE is just a convenience wrapper over RAW IIRC", quoated from Peterz. So it could be pretty safe to use the PERF_TYPE_RAW type only to program both gp and fixed counters naturally in the reprogram_counter(). To make the gp and fixed counters more semantically symmetrical, the selection of EVENTSEL_{USER, OS, INT} bits is temporarily translated via fixed_ctr_ctrl before the pmc_reprogram_counter() call. Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Suggested-by: Jim Mattson <jmattson@xxxxxxxxxx> Signed-off-by: Like Xu <likexu@xxxxxxxxxxx> --- arch/x86/kvm/pmu.c | 128 +++++++++++++---------------------- arch/x86/kvm/vmx/pmu_intel.c | 2 +- 2 files changed, 47 insertions(+), 83 deletions(-) diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 5299488b002c..00e1660c10ca 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -215,85 +215,60 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc) return allow_event; } -static void reprogram_gp_counter(struct kvm_pmc *pmc) -{ - u64 config; - u32 type = PERF_TYPE_RAW; - u64 eventsel = pmc->eventsel; - - if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL) - printk_once("kvm pmu: pin control bit is ignored\n"); - - pmc_pause_counter(pmc); - - if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc)) - return; - - if (!check_pmu_event_filter(pmc)) - return; - - if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE | - ARCH_PERFMON_EVENTSEL_INV | - ARCH_PERFMON_EVENTSEL_CMASK | - HSW_IN_TX | - HSW_IN_TX_CHECKPOINTED))) { - config = kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc); - if (config != PERF_COUNT_HW_MAX) - type = PERF_TYPE_HARDWARE; - } - - if (type == PERF_TYPE_RAW) - config = eventsel & AMD64_RAW_EVENT_MASK; - - if (pmc->current_config == eventsel && pmc_resume_counter(pmc)) - return; - - pmc_release_perf_event(pmc); - - pmc->current_config = eventsel; - pmc_reprogram_counter(pmc, type, config, - !(eventsel & ARCH_PERFMON_EVENTSEL_USR), - !(eventsel & ARCH_PERFMON_EVENTSEL_OS), - eventsel & ARCH_PERFMON_EVENTSEL_INT, - (eventsel & HSW_IN_TX), - (eventsel & HSW_IN_TX_CHECKPOINTED)); -} - -static void reprogram_fixed_counter(struct kvm_pmc *pmc) +static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc) { struct kvm_pmu *pmu = pmc_to_pmu(pmc); - int idx = pmc->idx - INTEL_PMC_IDX_FIXED; - u8 ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, idx); - unsigned en_field = ctrl & 0x3; - bool pmi = ctrl & 0x8; - pmc_pause_counter(pmc); + if (pmc_is_fixed(pmc)) + return fixed_ctrl_field(pmu->fixed_ctr_ctrl, + pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3; - if (!en_field || !pmc_is_enabled(pmc)) - return; - - if (!check_pmu_event_filter(pmc)) - return; - - if (pmc->current_config == (u64)ctrl && pmc_resume_counter(pmc)) - return; - - pmc_release_perf_event(pmc); - - pmc->current_config = (u64)ctrl; - pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE, - kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc), - !(en_field & 0x2), /* exclude user */ - !(en_field & 0x1), /* exclude kernel */ - pmi, false, false); + return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE; } void reprogram_counter(struct kvm_pmc *pmc) { - if (pmc_is_gp(pmc)) - reprogram_gp_counter(pmc); - else - reprogram_fixed_counter(pmc); + struct kvm_pmu *pmu = pmc_to_pmu(pmc); + u64 eventsel = pmc->eventsel; + u64 new_config = eventsel; + u8 fixed_ctr_ctrl; + + pmc_pause_counter(pmc); + + if (!pmc_speculative_in_use(pmc) || !pmc_is_enabled(pmc)) + return; + + if (!check_pmu_event_filter(pmc)) + return; + + if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL) + printk_once("kvm pmu: pin control bit is ignored\n"); + + if (pmc_is_fixed(pmc)) { + fixed_ctr_ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, + pmc->idx - INTEL_PMC_IDX_FIXED); + if (fixed_ctr_ctrl & 0x1) + eventsel |= ARCH_PERFMON_EVENTSEL_OS; + if (fixed_ctr_ctrl & 0x2) + eventsel |= ARCH_PERFMON_EVENTSEL_USR; + if (fixed_ctr_ctrl & 0x8) + eventsel |= ARCH_PERFMON_EVENTSEL_INT; + new_config = (u64)fixed_ctr_ctrl; + } + + if (pmc->current_config == new_config && pmc_resume_counter(pmc)) + return; + + pmc_release_perf_event(pmc); + + pmc->current_config = new_config; + pmc_reprogram_counter(pmc, PERF_TYPE_RAW, + (eventsel & AMD64_RAW_EVENT_MASK), + !(eventsel & ARCH_PERFMON_EVENTSEL_USR), + !(eventsel & ARCH_PERFMON_EVENTSEL_OS), + eventsel & ARCH_PERFMON_EVENTSEL_INT, + (eventsel & HSW_IN_TX), + (eventsel & HSW_IN_TX_CHECKPOINTED)); } EXPORT_SYMBOL_GPL(reprogram_counter); @@ -451,17 +426,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu) kvm_pmu_refresh(vcpu); } -static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc) -{ - struct kvm_pmu *pmu = pmc_to_pmu(pmc); - - if (pmc_is_fixed(pmc)) - return fixed_ctrl_field(pmu->fixed_ctr_ctrl, - pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3; - - return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE; -} - /* Release perf_events for vPMCs that have been unused for a full time slice. */ void kvm_pmu_cleanup(struct kvm_vcpu *vcpu) { diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 19b78a9d9d47..d823fbe4e155 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -492,7 +492,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) pmu->reserved_bits = 0xffffffff00200000ull; entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); - if (!entry || !vcpu->kvm->arch.enable_pmu) + if (!entry || !vcpu->kvm->arch.enable_pmu || !boot_cpu_has(X86_FEATURE_ARCH_PERFMON)) return; eax.full = entry->eax; edx.full = entry->edx; -- 2.35.1