From: Like Xu <likexu@xxxxxxxxxxx>
According to CPUID 0x0A.EBX bit vector, the event [7] should be the
unrealized event "Topdown Slots" instead of the *kernel* generalized
common hardware event "REF_CPU_CYCLES", so we need to skip the cpuid
unavaliblity check in the intel_pmc_perf_hw_id() for the last
REF_CPU_CYCLES event and update the confusing comment.
If the event is marked as unavailable in the Intel guest CPUID
0AH.EBX leaf, we need to avoid any perf_event creation, whether
it's a gp or fixed counter. To distinguish whether it is a rejected
event or an event that needs to be programmed with PERF_TYPE_RAW type,
a new special returned value of "PERF_COUNT_HW_MAX + 1" is introduced.
Fixes: 62079d8a43128 ("KVM: PMU: add proper support for fixed counter 2")
Signed-off-by: Like Xu <likexu@xxxxxxxxxxx>
---
v1 -> v2 Changelog:
- Refine comment based on commit c1d6f42f1a42;
- Squash the idea "avoid event creation for rejected hw_config" into this commit;
- Squash the idea "PERF_COUNT_HW_MAX + 1" into this commit;
Previous:
https://lore.kernel.org/kvm/20211112095139.21775-3-likexu@xxxxxxxxxxx/
arch/x86/kvm/pmu.c | 3 +++
arch/x86/kvm/vmx/pmu_intel.c | 18 ++++++++++++------
2 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 8abdadb7e22a..e632693a2266 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -109,6 +109,9 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
.config = config,
};
+ if (type == PERF_TYPE_HARDWARE && config >= PERF_COUNT_HW_MAX)
+ return;
+
attr.sample_period = get_sample_period(pmc, pmc->counter);
if (in_tx)
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 5e0ac57d6d1b..ffccfd9823c0 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -21,7 +21,6 @@
#define MSR_PMC_FULL_WIDTH_BIT (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
static struct kvm_event_hw_type_mapping intel_arch_events[] = {
- /* Index must match CPUID 0x0A.EBX bit vector */
[0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES },
[1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
[2] = { 0x3c, 0x01, PERF_COUNT_HW_BUS_CYCLES },
@@ -29,6 +28,7 @@ static struct kvm_event_hw_type_mapping intel_arch_events[] = {
[4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES },
[5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
[6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
+ /* The above index must match CPUID 0x0A.EBX bit vector */
[7] = { 0x00, 0x03, PERF_COUNT_HW_REF_CPU_CYCLES },
};
@@ -75,11 +75,17 @@ static unsigned int intel_pmc_perf_hw_id(struct kvm_pmc *pmc)
u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
int i;
- for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++)
- if (intel_arch_events[i].eventsel == event_select &&
- intel_arch_events[i].unit_mask == unit_mask &&
- (pmc_is_fixed(pmc) || pmu->available_event_types & (1 << i)))
- break;
+ for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++) {
+ if (intel_arch_events[i].eventsel != event_select ||
+ intel_arch_events[i].unit_mask != unit_mask)
+ continue;
+
+ /* disable event that reported as not present by cpuid */
+ if ((i < 7) && !(pmu->available_event_types & (1 << i)))
+ return PERF_COUNT_HW_MAX + 1;
+
+ break;
+ }
if (i == ARRAY_SIZE(intel_arch_events))
return PERF_COUNT_HW_MAX;