From: Like Xu <likexu@xxxxxxxxxxx> KVM needs to be fixed to avoid perf_event creation when the requested hw event on a gp or fixed counter is marked as unavailable in the Intel guest CPUID 0AH.EBX leaf. It's proposed to use is_intel_cpuid_event() to distinguish whether the hw event is an Intel pre-defined architecture event, so that we can decide to reprogram it with PERF_TYPE_HARDWARE (for fixed and gp) or PERF_TYPE_RAW (for gp only) perf_event, or just avoid creating perf_event. If an Intel cpuid event is marked as unavailable by checking pmu->available_event_types, the intel_find_[fixed|arch]_event() returns a new special value of "PERF_COUNT_HW_MAX + 1" to tell the caller to avoid creating perf_ event and not to use PERF_TYPE_RAW mode for gp. Signed-off-by: Like Xu <likexu@xxxxxxxxxxx> --- arch/x86/kvm/pmu.c | 8 +++++++ arch/x86/kvm/vmx/pmu_intel.c | 45 +++++++++++++++++++++++++++++++----- 2 files changed, 47 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 7093fc70cd38..3b47bd92e7bb 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -111,6 +111,14 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, .config = config, }; + /* + * The "config > PERF_COUNT_HW_MAX" only appears when + * the kernel generic event is marked as unavailable + * in the Intel guest architecture event CPUID leaf. + */ + if (type == PERF_TYPE_HARDWARE && config >= PERF_COUNT_HW_MAX) + return; + attr.sample_period = get_sample_period(pmc, pmc->counter); if (in_tx) diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 4c04e94ae548..4f58c14efa61 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -68,17 +68,39 @@ static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data) reprogram_counter(pmu, bit); } +/* UMask and Event Select Encodings for Intel CPUID Events */ +static inline bool is_intel_cpuid_event(u8 event_select, u8 unit_mask) +{ + if ((!unit_mask && event_select == 0x3C) || + (!unit_mask && event_select == 0xC0) || + (unit_mask == 0x01 && event_select == 0x3C) || + (unit_mask == 0x4F && event_select == 0x2E) || + (unit_mask == 0x41 && event_select == 0x2E) || + (!unit_mask && event_select == 0xC4) || + (!unit_mask && event_select == 0xC5)) + return true; + + /* the unimplemented topdown.slots event check is kipped. */ + return false; +} + static unsigned intel_find_arch_event(struct kvm_pmu *pmu, u8 event_select, u8 unit_mask) { int i; - for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++) - if (intel_arch_events[i].eventsel == event_select && - intel_arch_events[i].unit_mask == unit_mask && - ((i > 6) || pmu->available_event_types & (1 << i))) - break; + for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++) { + if (intel_arch_events[i].eventsel != event_select || + intel_arch_events[i].unit_mask != unit_mask) + continue; + + if (is_intel_cpuid_event(event_select, unit_mask) && + !(pmu->available_event_types & BIT_ULL(i))) + return PERF_COUNT_HW_MAX + 1; + + break; + } if (i == ARRAY_SIZE(intel_arch_events)) return PERF_COUNT_HW_MAX; @@ -90,12 +112,23 @@ static unsigned int intel_find_fixed_event(struct kvm_pmu *pmu, int idx) { u32 event; size_t size = ARRAY_SIZE(fixed_pmc_events); + u8 event_select, unit_mask; + unsigned int event_type; if (idx >= size) return PERF_COUNT_HW_MAX; event = fixed_pmc_events[array_index_nospec(idx, size)]; - return intel_arch_events[event].event_type; + + event_select = intel_arch_events[event].eventsel; + unit_mask = intel_arch_events[event].unit_mask; + event_type = intel_arch_events[event].event_type; + + if (is_intel_cpuid_event(event_select, unit_mask) && + !(pmu->available_event_types & BIT_ULL(event_type))) + return PERF_COUNT_HW_MAX + 1; + + return event_type; } /* check if a PMC is enabled by comparing it with globl_ctrl bits. */ -- 2.33.0