Note that the virtual PMU doesn't work as expected on AMD Zen CPUs (an intercepted rdmsr is counted as a retired branch instruction), but the PMU event filter does work. This is a local application of change authored by Jim Mattson and sent to kvm mailiong list on Jan 14, 2022. Signed-off-by: Jim Mattson <jmattson@xxxxxxxxxx> Signed-off-by: David Dunn <daviddunn@xxxxxxxxxx> --- .../kvm/x86_64/pmu_event_filter_test.c | 194 +++++++++++++++--- 1 file changed, 163 insertions(+), 31 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c index 8ac99d4cbc73..aa104946e6e0 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -16,10 +16,38 @@ #include "processor.h" /* - * In lieue of copying perf_event.h into tools... + * In lieu of copying perf_event.h into tools... */ -#define ARCH_PERFMON_EVENTSEL_ENABLE BIT(22) -#define ARCH_PERFMON_EVENTSEL_OS BIT(17) +#define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17) +#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) + +union cpuid10_eax { + struct { + unsigned int version_id:8; + unsigned int num_counters:8; + unsigned int bit_width:8; + unsigned int mask_length:8; + } split; + unsigned int full; +}; + +union cpuid10_ebx { + struct { + unsigned int no_unhalted_core_cycles:1; + unsigned int no_instructions_retired:1; + unsigned int no_unhalted_reference_cycles:1; + unsigned int no_llc_reference:1; + unsigned int no_llc_misses:1; + unsigned int no_branch_instruction_retired:1; + unsigned int no_branch_misses_retired:1; + } split; + unsigned int full; +}; + +/* End of stuff taken from perf_event.h. */ + +/* Oddly, this isn't in perf_event.h. */ +#define ARCH_PERFMON_BRANCHES_RETIRED 5 #define VCPU_ID 0 #define NUM_BRANCHES 42 @@ -45,14 +73,15 @@ * Preliminary Processor Programming Reference (PPR) for AMD Family * 17h Model 31h, Revision B0 Processors, and Preliminary Processor * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision - * B1 Processors Volume 1 of 2 + * B1 Processors Volume 1 of 2. */ #define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0) /* * This event list comprises Intel's eight architectural events plus - * AMD's "branch instructions retired" for Zen[123]. + * AMD's "retired branch instructions" for Zen[123] (and possibly + * other AMD CPUs). */ static const uint64_t event_list[] = { EVENT(0x3c, 0), @@ -66,11 +95,45 @@ static const uint64_t event_list[] = { AMD_ZEN_BR_RETIRED, }; +/* + * If we encounter a #GP during the guest PMU sanity check, then the guest + * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0). + */ +static void guest_gp_handler(struct ex_regs *regs) +{ + GUEST_SYNC(0); +} + +/* + * Check that we can write a new value to the given MSR and read it back. + * The caller should provide a non-empty set of bits that are safe to flip. + * + * Return on success. GUEST_SYNC(0) on error. + */ +static void check_msr(uint32_t msr, uint64_t bits_to_flip) +{ + uint64_t v = rdmsr(msr) ^ bits_to_flip; + + wrmsr(msr, v); + if (rdmsr(msr) != v) + GUEST_SYNC(0); + + v ^= bits_to_flip; + wrmsr(msr, v); + if (rdmsr(msr) != v) + GUEST_SYNC(0); +} + static void intel_guest_code(void) { - uint64_t br0, br1; + check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1); + check_msr(MSR_P6_EVNTSEL0, 0xffff); + check_msr(MSR_IA32_PMC0, 0xffff); + GUEST_SYNC(1); for (;;) { + uint64_t br0, br1; + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE | ARCH_PERFMON_EVENTSEL_OS | INTEL_BR_RETIRED); @@ -83,15 +146,19 @@ static void intel_guest_code(void) } /* - * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit - * 23], this code uses the always-available, legacy K7 PMU MSRs, which - * alias to the first four of the six extended core PMU MSRs. + * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23], + * this code uses the always-available, legacy K7 PMU MSRs, which alias to + * the first four of the six extended core PMU MSRs. */ static void amd_guest_code(void) { - uint64_t br0, br1; + check_msr(MSR_K7_EVNTSEL0, 0xffff); + check_msr(MSR_K7_PERFCTR0, 0xffff); + GUEST_SYNC(1); for (;;) { + uint64_t br0, br1; + wrmsr(MSR_K7_EVNTSEL0, 0); wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE | ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BR_RETIRED); @@ -102,7 +169,11 @@ static void amd_guest_code(void) } } -static uint64_t test_branches_retired(struct kvm_vm *vm) +/* + * Run the VM to the next GUEST_SYNC(value), and return the value passed + * to the sync. Any other exit from the guest is fatal. + */ +static uint64_t run_vm_to_sync(struct kvm_vm *vm) { struct kvm_run *run = vcpu_state(vm, VCPU_ID); struct ucall uc; @@ -118,6 +189,25 @@ static uint64_t test_branches_retired(struct kvm_vm *vm) return uc.args[1]; } +/* + * In a nested environment or if the vPMU is disabled, the guest PMU + * might not work as architected (accessing the PMU MSRs may raise + * #GP, or writes could simply be discarded). In those situations, + * there is no point in running these tests. The guest code will perform + * a sanity check and then GUEST_SYNC(success). In the case of failure, + * the behavior of the guest on resumption is undefined. + */ +static bool sanity_check_pmu(struct kvm_vm *vm) +{ + bool success; + + vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); + success = run_vm_to_sync(vm); + vm_install_exception_handler(vm, GP_VECTOR, NULL); + + return success; +} + static struct kvm_pmu_event_filter *make_pmu_event_filter(uint32_t nevents) { struct kvm_pmu_event_filter *f; @@ -143,6 +233,10 @@ static struct kvm_pmu_event_filter *event_filter(uint32_t action) return f; } +/* + * Remove the first occurrence of 'event' (if any) from the filter's + * event list. + */ static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f, uint64_t event) { @@ -160,9 +254,9 @@ static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f, return f; } -static void test_no_filter(struct kvm_vm *vm) +static void test_without_filter(struct kvm_vm *vm) { - uint64_t count = test_branches_retired(vm); + uint64_t count = run_vm_to_sync(vm); if (count != NUM_BRANCHES) pr_info("%s: Branch instructions retired = %lu (expected %u)\n", @@ -174,7 +268,7 @@ static uint64_t test_with_filter(struct kvm_vm *vm, struct kvm_pmu_event_filter *f) { vm_ioctl(vm, KVM_SET_PMU_EVENT_FILTER, (void *)f); - return test_branches_retired(vm); + return run_vm_to_sync(vm); } static void test_member_deny_list(struct kvm_vm *vm) @@ -231,40 +325,70 @@ static void test_not_member_allow_list(struct kvm_vm *vm) TEST_ASSERT(!count, "Disallowed PMU Event is counting"); } +/* + * Check for a non-zero PMU version, at least one general-purpose + * counter per logical processor, an EBX bit vector of length greater + * than 5, and EBX[5] clear. + */ +static bool check_intel_pmu_leaf(struct kvm_cpuid_entry2 *entry) +{ + union cpuid10_eax eax = { .full = entry->eax }; + union cpuid10_ebx ebx = { .full = entry->ebx }; + + return eax.split.version_id && eax.split.num_counters > 0 && + eax.split.mask_length > ARCH_PERFMON_BRANCHES_RETIRED && + !ebx.split.no_branch_instruction_retired; +} + /* * Note that CPUID leaf 0xa is Intel-specific. This leaf should be * clear on AMD hardware. */ -static bool vcpu_supports_intel_br_retired(void) +static bool use_intel_pmu(void) { struct kvm_cpuid_entry2 *entry; struct kvm_cpuid2 *cpuid; cpuid = kvm_get_supported_cpuid(); entry = kvm_get_supported_cpuid_index(0xa, 0); - return entry && - (entry->eax & 0xff) && - (entry->eax >> 24) > 5 && - !(entry->ebx & BIT(5)); + return is_intel_cpu() && entry && check_intel_pmu_leaf(entry); +} + +static bool is_zen1(uint32_t eax) +{ + return x86_family(eax) == 0x17 && x86_model(eax) <= 0x0f; +} + +static bool is_zen2(uint32_t eax) +{ + return x86_family(eax) == 0x17 && + x86_model(eax) >= 0x30 && x86_model(eax) <= 0x3f; +} + +static bool is_zen3(uint32_t eax) +{ + return x86_family(eax) == 0x19 && x86_model(eax) <= 0x0f; } /* * Determining AMD support for a PMU event requires consulting the AMD - * PPR for the CPU or reference material derived therefrom. + * PPR for the CPU or reference material derived therefrom. The AMD + * test code herein has been verified to work on Zen1, Zen2, and Zen3. + * + * Feel free to add more AMD CPUs that are documented to support event + * select 0xc2 umask 0 as "retired branch instructions." */ -static bool vcpu_supports_amd_zen_br_retired(void) +static bool use_amd_pmu(void) { struct kvm_cpuid_entry2 *entry; struct kvm_cpuid2 *cpuid; cpuid = kvm_get_supported_cpuid(); entry = kvm_get_supported_cpuid_index(1, 0); - return entry && - ((x86_family(entry->eax) == 0x17 && - (x86_model(entry->eax) == 1 || - x86_model(entry->eax) == 0x31)) || - (x86_family(entry->eax) == 0x19 && - x86_model(entry->eax) == 1)); + return is_amd_cpu() && entry && + (is_zen1(entry->eax) || + is_zen2(entry->eax) || + is_zen3(entry->eax)); } int main(int argc, char *argv[]) @@ -282,19 +406,27 @@ int main(int argc, char *argv[]) exit(KSFT_SKIP); } - if (vcpu_supports_intel_br_retired()) + if (use_intel_pmu()) guest_code = intel_guest_code; - else if (vcpu_supports_amd_zen_br_retired()) + else if (use_amd_pmu()) guest_code = amd_guest_code; if (!guest_code) { - print_skip("Branch instructions retired not supported"); + print_skip("Don't know how to test this guest PMU"); exit(KSFT_SKIP); } vm = vm_create_default(VCPU_ID, 0, guest_code); - test_no_filter(vm); + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + + if (!sanity_check_pmu(vm)) { + print_skip("Guest PMU is not functional"); + exit(KSFT_SKIP); + } + + test_without_filter(vm); test_member_deny_list(vm); test_member_allow_list(vm); test_not_member_deny_list(vm); -- 2.34.1.703.g22d0c6ccf7-goog