HSW_IN_TX* bits are used in generic code which are not supported on AMD. Worse, these bits overlap with AMD EventSelect[11:8] and hence using HSW_IN_TX* bits unconditionally in generic code is resulting in unintentional pmu behavior on AMD. For example, if EventSelect[11:8] is 0x2, pmc_reprogram_counter() wrongly assumes that HSW_IN_TX_CHECKPOINTED is set and thus forces sampling period to be 0. Fixes: ca724305a2b0 ("KVM: x86/vPMU: Implement AMD vPMU code for KVM") Signed-off-by: Ravi Bangoria <ravi.bangoria@xxxxxxx> --- arch/x86/kvm/pmu.c | 66 +++++++++++++++++++++++------------- arch/x86/kvm/pmu.h | 4 +-- arch/x86/kvm/svm/pmu.c | 6 +++- arch/x86/kvm/vmx/pmu_intel.c | 4 +-- 4 files changed, 51 insertions(+), 29 deletions(-) diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 4a70380f2287..b91dbede87b3 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -97,7 +97,7 @@ static void kvm_perf_overflow(struct perf_event *perf_event, static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config, bool exclude_user, bool exclude_kernel, bool intr, - bool in_tx, bool in_tx_cp) + bool in_tx, bool in_tx_cp, bool is_intel) { struct perf_event *event; struct perf_event_attr attr = { @@ -116,16 +116,18 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, attr.sample_period = get_sample_period(pmc, pmc->counter); - if (in_tx) - attr.config |= INTEL_HSW_IN_TX; - if (in_tx_cp) { - /* - * INTEL_HSW_IN_TX_CHECKPOINTED is not supported with nonzero - * period. Just clear the sample period so at least - * allocating the counter doesn't fail. - */ - attr.sample_period = 0; - attr.config |= INTEL_HSW_IN_TX_CHECKPOINTED; + if (is_intel) { + if (in_tx) + attr.config |= INTEL_HSW_IN_TX; + if (in_tx_cp) { + /* + * INTEL_HSW_IN_TX_CHECKPOINTED is not supported with nonzero + * period. Just clear the sample period so at least + * allocating the counter doesn't fail. + */ + attr.sample_period = 0; + attr.config |= INTEL_HSW_IN_TX_CHECKPOINTED; + } } event = perf_event_create_kernel_counter(&attr, -1, current, @@ -179,13 +181,14 @@ static int cmp_u64(const void *a, const void *b) return *(__u64 *)a - *(__u64 *)b; } -void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) +void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel, bool is_intel) { u64 config; u32 type = PERF_TYPE_RAW; struct kvm *kvm = pmc->vcpu->kvm; struct kvm_pmu_event_filter *filter; bool allow_event = true; + u64 eventsel_mask; if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL) printk_once("kvm pmu: pin control bit is ignored\n"); @@ -210,18 +213,31 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) if (!allow_event) return; - if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE | - ARCH_PERFMON_EVENTSEL_INV | - ARCH_PERFMON_EVENTSEL_CMASK | - INTEL_HSW_IN_TX | - INTEL_HSW_IN_TX_CHECKPOINTED))) { + eventsel_mask = ARCH_PERFMON_EVENTSEL_EDGE | + ARCH_PERFMON_EVENTSEL_INV | + ARCH_PERFMON_EVENTSEL_CMASK; + if (is_intel) { + eventsel_mask |= INTEL_HSW_IN_TX | INTEL_HSW_IN_TX_CHECKPOINTED; + } else { + /* + * None of the AMD generalized events has EventSelect[11:8] + * set so far. + */ + eventsel_mask |= (0xFULL << 32); + } + + if (!(eventsel & eventsel_mask)) { config = kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc); if (config != PERF_COUNT_HW_MAX) type = PERF_TYPE_HARDWARE; } - if (type == PERF_TYPE_RAW) - config = eventsel & AMD64_RAW_EVENT_MASK; + if (type == PERF_TYPE_RAW) { + if (is_intel) + config = eventsel & X86_RAW_EVENT_MASK; + else + config = eventsel & AMD64_RAW_EVENT_MASK; + } if (pmc->current_config == eventsel && pmc_resume_counter(pmc)) return; @@ -234,11 +250,12 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) !(eventsel & ARCH_PERFMON_EVENTSEL_OS), eventsel & ARCH_PERFMON_EVENTSEL_INT, (eventsel & INTEL_HSW_IN_TX), - (eventsel & INTEL_HSW_IN_TX_CHECKPOINTED)); + (eventsel & INTEL_HSW_IN_TX_CHECKPOINTED), + is_intel); } EXPORT_SYMBOL_GPL(reprogram_gp_counter); -void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx) +void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx, bool is_intel) { unsigned en_field = ctrl & 0x3; bool pmi = ctrl & 0x8; @@ -270,24 +287,25 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx) kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc), !(en_field & 0x2), /* exclude user */ !(en_field & 0x1), /* exclude kernel */ - pmi, false, false); + pmi, false, false, is_intel); } EXPORT_SYMBOL_GPL(reprogram_fixed_counter); void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx) { struct kvm_pmc *pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, pmc_idx); + bool is_intel = !strncmp(kvm_x86_ops.name, "kvm_intel", 9); if (!pmc) return; if (pmc_is_gp(pmc)) - reprogram_gp_counter(pmc, pmc->eventsel); + reprogram_gp_counter(pmc, pmc->eventsel, is_intel); else { int idx = pmc_idx - INTEL_PMC_IDX_FIXED; u8 ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, idx); - reprogram_fixed_counter(pmc, ctrl, idx); + reprogram_fixed_counter(pmc, ctrl, idx, is_intel); } } EXPORT_SYMBOL_GPL(reprogram_counter); diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 7a7b8d5b775e..610a4cbf85a4 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -140,8 +140,8 @@ static inline u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value) return sample_period; } -void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel); -void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx); +void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel, bool is_intel); +void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx, bool is_intel); void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx); void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index 5aa45f13b16d..9ad63e940883 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -140,6 +140,10 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr, static unsigned int amd_pmc_perf_hw_id(struct kvm_pmc *pmc) { + /* + * None of the AMD generalized events has EventSelect[11:8] set. + * Hence 8 bit event_select works for now. + */ u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT; u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; int i; @@ -265,7 +269,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (data == pmc->eventsel) return 0; if (!(data & pmu->reserved_bits)) { - reprogram_gp_counter(pmc, data); + reprogram_gp_counter(pmc, data, false); return 0; } } diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 7c64792a9506..ba1fbd37f608 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -50,7 +50,7 @@ static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data) continue; __set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use); - reprogram_fixed_counter(pmc, new_ctrl, i); + reprogram_fixed_counter(pmc, new_ctrl, i, true); } pmu->fixed_ctr_ctrl = data; @@ -444,7 +444,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (data == pmc->eventsel) return 0; if (!(data & pmu->reserved_bits)) { - reprogram_gp_counter(pmc, data); + reprogram_gp_counter(pmc, data, true); return 0; } } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false)) -- 2.27.0