On Fri, Nov 3, 2023 at 5:03 PM Sean Christopherson <seanjc@xxxxxxxxxx> wrote: > > From: Jinrong Liang <cloudliang@xxxxxxxxxxx> > > Add test cases to verify that Intel's Architectural PMU events work as > expected when the are (un)available according to guest CPUID. Iterate > over a range of sane PMU versions, with and without full-width writes > enabled, and over interesting combinations of lengths/masks for the bit > vector that enumerates unavailable events. > > Test up to vPMU version 5, i.e. the current architectural max. KVM only > officially supports up to version 2, but the behavior of the counters is > backwards compatible, i.e. KVM shouldn't do something completely different > for a higher, architecturally-defined vPMU version. Verify KVM behavior > against the effective vPMU version, e.g. advertising vPMU 5 when KVM only > supports vPMU 2 shouldn't magically unlock vPMU 5 features. > > According to Intel SDM, the number of architectural events is reported > through CPUID.0AH:EAX[31:24] and the architectural event x is supported > if EBX[x]=0 && EAX[31:24]>x. Note, KVM's ABI is that unavailable events > do not count, even though strictly speaking that's not required by the > SDM (the behavior is effectively undefined). > > Handcode the entirety of the measured section so that the test can > precisely assert on the number of instructions and branches retired. > > Co-developed-by: Like Xu <likexu@xxxxxxxxxxx> > Signed-off-by: Like Xu <likexu@xxxxxxxxxxx> > Signed-off-by: Jinrong Liang <cloudliang@xxxxxxxxxxx> > Co-developed-by: Sean Christopherson <seanjc@xxxxxxxxxx> > Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> > --- > tools/testing/selftests/kvm/Makefile | 1 + > .../selftests/kvm/x86_64/pmu_counters_test.c | 321 ++++++++++++++++++ > 2 files changed, 322 insertions(+) > create mode 100644 tools/testing/selftests/kvm/x86_64/pmu_counters_test.c > > diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile > index 44d8d022b023..09f5d6fe84de 100644 > --- a/tools/testing/selftests/kvm/Makefile > +++ b/tools/testing/selftests/kvm/Makefile > @@ -91,6 +91,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test > TEST_GEN_PROGS_x86_64 += x86_64/monitor_mwait_test > TEST_GEN_PROGS_x86_64 += x86_64/nested_exceptions_test > TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test > +TEST_GEN_PROGS_x86_64 += x86_64/pmu_counters_test > TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test > TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id > TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test > diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c > new file mode 100644 > index 000000000000..dd9a7864410c > --- /dev/null > +++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c > @@ -0,0 +1,321 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright (C) 2023, Tencent, Inc. > + */ > + > +#define _GNU_SOURCE /* for program_invocation_short_name */ > +#include <x86intrin.h> > + > +#include "pmu.h" > +#include "processor.h" > + > +/* Number of LOOP instructions for the guest measurement payload. */ > +#define NUM_BRANCHES 10 > +/* > + * Number of "extra" instructions that will be counted, i.e. the number of > + * instructions that are needed to set up the loop and then disabled the > + * counter. 2 MOV, 2 XOR, 1 WRMSR. > + */ > +#define NUM_EXTRA_INSNS 5 > +#define NUM_INSNS_RETIRED (NUM_BRANCHES + NUM_EXTRA_INSNS) > + > +static uint8_t kvm_pmu_version; > +static bool kvm_has_perf_caps; > + > +static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, > + void *guest_code, > + uint8_t pmu_version, > + uint64_t perf_capabilities) > +{ > + struct kvm_vm *vm; > + > + vm = vm_create_with_one_vcpu(vcpu, guest_code); > + vm_init_descriptor_tables(vm); > + vcpu_init_descriptor_tables(*vcpu); > + > + sync_global_to_guest(vm, kvm_pmu_version); > + > + /* > + * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling > + * features via PERF_CAPABILITIES if the guest doesn't have a vPMU. > + */ > + if (kvm_has_perf_caps) > + vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities); > + > + vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version); > + return vm; > +} > + > +static void run_vcpu(struct kvm_vcpu *vcpu) > +{ > + struct ucall uc; > + > + do { > + vcpu_run(vcpu); > + switch (get_ucall(vcpu, &uc)) { > + case UCALL_SYNC: > + break; > + case UCALL_ABORT: > + REPORT_GUEST_ASSERT(uc); > + break; > + case UCALL_PRINTF: > + pr_info("%s", uc.buffer); > + break; > + case UCALL_DONE: > + break; > + default: > + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); > + } > + } while (uc.cmd != UCALL_DONE); > +} > + > +static uint8_t guest_get_pmu_version(void) > +{ > + /* > + * Return the effective PMU version, i.e. the minimum between what KVM > + * supports and what is enumerated to the guest. The host deliberately > + * advertises a PMU version to the guest beyond what is actually > + * supported by KVM to verify KVM doesn't freak out and do something > + * bizarre with an architecturally valid, but unsupported, version. > + */ > + return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION)); > +} > + > +/* > + * If an architectural event is supported and guaranteed to generate at least > + * one "hit, assert that its count is non-zero. If an event isn't supported or > + * the test can't guarantee the associated action will occur, then all bets are > + * off regarding the count, i.e. no checks can be done. > + * > + * Sanity check that in all cases, the event doesn't count when it's disabled, > + * and that KVM correctly emulates the write of an arbitrary value. > + */ > +static void guest_assert_event_count(uint8_t idx, > + struct kvm_x86_pmu_feature event, > + uint32_t pmc, uint32_t pmc_msr) > +{ > + uint64_t count; > + > + count = _rdpmc(pmc); > + if (!this_pmu_has(event)) > + goto sanity_checks; > + > + switch (idx) { > + case INTEL_ARCH_INSTRUCTIONS_RETIRED: > + GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED); > + break; > + case INTEL_ARCH_BRANCHES_RETIRED: > + GUEST_ASSERT_EQ(count, NUM_BRANCHES); > + break; > + case INTEL_ARCH_CPU_CYCLES: > + case INTEL_ARCH_REFERENCE_CYCLES: > + GUEST_ASSERT_NE(count, 0); > + break; > + default: > + break; > + } > + > +sanity_checks: > + __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); > + GUEST_ASSERT_EQ(_rdpmc(pmc), count); > + > + wrmsr(pmc_msr, 0xdead); > + GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead); > +} > + > +static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event, > + uint32_t pmc, uint32_t pmc_msr, > + uint32_t ctrl_msr, uint64_t ctrl_msr_value) > +{ > + wrmsr(pmc_msr, 0); > + > + /* > + * Enable and disable the PMC in a monolithic asm blob to ensure that > + * the compiler can't insert _any_ code into the measured sequence. > + * Note, ECX doesn't need to be clobbered as the input value, @pmc_msr, > + * is restored before the end of the sequence. > + */ > + __asm__ __volatile__("wrmsr\n\t" > + "mov $" __stringify(NUM_BRANCHES) ", %%ecx\n\t" > + "loop .\n\t" > + "mov %%edi, %%ecx\n\t" > + "xor %%eax, %%eax\n\t" > + "xor %%edx, %%edx\n\t" > + "wrmsr\n\t" > + :: "a"((uint32_t)ctrl_msr_value), > + "d"(ctrl_msr_value >> 32), > + "c"(ctrl_msr), "D"(ctrl_msr) > + ); > + > + guest_assert_event_count(idx, event, pmc, pmc_msr); > +} > + > +static void guest_test_arch_event(uint8_t idx) > +{ > + const struct { > + struct kvm_x86_pmu_feature gp_event; > + } intel_event_to_feature[] = { > + [INTEL_ARCH_CPU_CYCLES] = { X86_PMU_FEATURE_CPU_CYCLES }, > + [INTEL_ARCH_INSTRUCTIONS_RETIRED] = { X86_PMU_FEATURE_INSNS_RETIRED }, > + [INTEL_ARCH_REFERENCE_CYCLES] = { X86_PMU_FEATURE_REFERENCE_CYCLES }, > + [INTEL_ARCH_LLC_REFERENCES] = { X86_PMU_FEATURE_LLC_REFERENCES }, > + [INTEL_ARCH_LLC_MISSES] = { X86_PMU_FEATURE_LLC_MISSES }, > + [INTEL_ARCH_BRANCHES_RETIRED] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED }, > + [INTEL_ARCH_BRANCHES_MISPREDICTED] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED }, > + }; > + > + uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); > + uint32_t pmu_version = guest_get_pmu_version(); > + /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */ > + bool guest_has_perf_global_ctrl = pmu_version >= 2; > + struct kvm_x86_pmu_feature gp_event; > + uint32_t base_pmc_msr; > + unsigned int i; > + > + /* The host side shouldn't invoke this without a guest PMU. */ > + GUEST_ASSERT(pmu_version); > + > + if (this_cpu_has(X86_FEATURE_PDCM) && > + rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES) > + base_pmc_msr = MSR_IA32_PMC0; > + else > + base_pmc_msr = MSR_IA32_PERFCTR0; > + > + gp_event = intel_event_to_feature[idx].gp_event; > + GUEST_ASSERT_EQ(idx, gp_event.f.bit); > + > + GUEST_ASSERT(nr_gp_counters); > + > + for (i = 0; i < nr_gp_counters; i++) { > + uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS | > + ARCH_PERFMON_EVENTSEL_ENABLE | > + intel_pmu_arch_events[idx]; > + > + wrmsr(MSR_P6_EVNTSEL0 + i, 0); > + if (guest_has_perf_global_ctrl) > + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i)); > + > + __guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i, > + MSR_P6_EVNTSEL0 + i, eventsel); > + } > +} > + > +static void guest_test_arch_events(void) > +{ > + uint8_t i; > + > + for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) > + guest_test_arch_event(i); > + > + GUEST_DONE(); > +} > + > +static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities, > + uint8_t length, uint32_t unavailable_mask) > +{ > + struct kvm_vcpu *vcpu; > + struct kvm_vm *vm; > + > + /* Testing arch events requires a vPMU (there are no negative tests). */ > + if (!pmu_version) > + return; > + > + vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events, > + pmu_version, perf_capabilities); > + > + vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH, > + length); > + vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK, > + unavailable_mask); > + > + run_vcpu(vcpu); > + > + kvm_vm_free(vm); > +} > + > +static void test_intel_counters(void) > +{ > + uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); > + uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION); > + unsigned int i; > + uint8_t v, j; > + uint32_t k; > + > + const uint64_t perf_caps[] = { > + 0, > + PMU_CAP_FW_WRITES, > + }; > + > + /* > + * Test up to PMU v5, which is the current maximum version defined by > + * Intel, i.e. is the last version that is guaranteed to be backwards > + * compatible with KVM's existing behavior. > + */ > + uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5); > + > + /* > + * Verify that KVM is sanitizing the architectural events, i.e. hiding > + * events that KVM doesn't support. This will fail any time KVM adds > + * support for a new event, but it's worth paying that price to be able > + * to detect KVM bugs. > + */ > + TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS, > + "KVM is either buggy, or has learned new tricks (length = %u, mask = %x)", > + nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK)); As stated earlier in this series, KVM doesn't have to do anything when a new architectural event is defined, so this should just say something like, "New architectural event(s); please update this test." > + /* > + * Force iterating over known arch events regardless of whether or not > + * KVM/hardware supports a given event. > + */ > + nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS); > + > + for (v = 0; v <= max_pmu_version; v++) { > + for (i = 0; i < ARRAY_SIZE(perf_caps); i++) { > + if (!kvm_has_perf_caps && perf_caps[i]) > + continue; > + > + pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n", > + v, perf_caps[i]); > + /* > + * To keep the total runtime reasonable, test every > + * possible non-zero, non-reserved bitmap combination > + * only with the native PMU version and the full bit > + * vector length. > + */ > + if (v == pmu_version) { > + for (k = 1; k < (BIT(nr_arch_events) - 1); k++) > + test_arch_events(v, perf_caps[i], nr_arch_events, k); > + } > + /* > + * Test single bits for all PMU version and lengths up > + * the number of events +1 (to verify KVM doesn't do > + * weird things if the guest length is greater than the > + * host length). Explicitly test a mask of '0' and all > + * ones i.e. all events being available and unavailable. > + */ > + for (j = 0; j <= nr_arch_events + 1; j++) { > + test_arch_events(v, perf_caps[i], j, 0); > + test_arch_events(v, perf_caps[i], j, -1u); > + > + for (k = 0; k < nr_arch_events; k++) > + test_arch_events(v, perf_caps[i], j, BIT(k)); > + } > + } > + } > +} > + > +int main(int argc, char *argv[]) > +{ > + TEST_REQUIRE(get_kvm_param_bool("enable_pmu")); > + > + TEST_REQUIRE(host_cpu_is_intel); > + TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION)); > + TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0); > + > + kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION); > + kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM); > + > + test_intel_counters(); > + > + return 0; > +} > -- > 2.42.0.869.gea05f2083d-goog >