AMD PMU implementation is very different from Intel. This patch adds a new test case, called amd_pmu, to x86 architecture. The implementation of amd_pmu.c is based on intel_pmu.c, with focus on AMD's general-purpose registers. To avoid running on Intel CPUs, we check the CPU's vendor name before executing the tests. Signed-off-by: Wei Huang <wei@xxxxxxxxxx> --- x86/Makefile.common | 3 +- x86/amd_pmu.c | 265 ++++++++++++++++++++++++++++++++++++++++++++++++++++ x86/unittests.cfg | 4 + 3 files changed, 271 insertions(+), 1 deletion(-) create mode 100644 x86/amd_pmu.c diff --git a/x86/Makefile.common b/x86/Makefile.common index d0f4ed1..36bcf90 100644 --- a/x86/Makefile.common +++ b/x86/Makefile.common @@ -45,7 +45,8 @@ tests-common = $(TEST_DIR)/vmexit.flat $(TEST_DIR)/tsc.flat \ $(TEST_DIR)/realmode.flat $(TEST_DIR)/msr.flat \ $(TEST_DIR)/hypercall.flat $(TEST_DIR)/sieve.flat \ $(TEST_DIR)/kvmclock_test.flat $(TEST_DIR)/eventinj.flat \ - $(TEST_DIR)/s3.flat $(TEST_DIR)/intel_pmu.flat $(TEST_DIR)/setjmp.flat \ + $(TEST_DIR)/s3.flat $(TEST_DIR)/intel_pmu.flat \ + $(TEST_DIR)/amd_pmu.flat $(TEST_DIR)/setjmp.flat \ $(TEST_DIR)/tsc_adjust.flat $(TEST_DIR)/asyncpf.flat \ $(TEST_DIR)/init.flat $(TEST_DIR)/smap.flat \ $(TEST_DIR)/hyperv_synic.flat $(TEST_DIR)/hyperv_stimer.flat \ diff --git a/x86/amd_pmu.c b/x86/amd_pmu.c new file mode 100644 index 0000000..006eccf --- /dev/null +++ b/x86/amd_pmu.c @@ -0,0 +1,265 @@ +/* + * vPMU testing for AMD CPUs + * + * Copyright (c) 2017 Red Hat Inc + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#include "x86/msr.h" +#include "x86/processor.h" +#include "x86/apic-defs.h" +#include "x86/apic.h" +#include "x86/desc.h" +#include "x86/isr.h" +#include "x86/vm.h" + +#include "libcflat.h" +#include "pmu.h" +#include <stdint.h> + +#define AMD64_NUM_COUNTERS 4 + +#define CPUID_VENDOR_AMD_1 0x68747541 /* "Auth" */ +#define CPUID_VENDOR_AMD_2 0x69746e65 /* "enti" */ +#define CPUID_VENDOR_AMD_3 0x444d4163 /* "cAMD" */ + +struct pmu_event gp_events[] = { + {"core cycles", 0x0076, 1*N, 50*N}, + {"instructions", 0x00c0, 10*N, 10.2*N}, + {"cache reference", 0x077d, 1, 2*N}, + {"cache misses", 0x077e, 0, 1*N}, + {"branches", 0x00c2, 1*N, 1.1*N}, + {"branch misses", 0x00c3, 0, 0.1*N}, + {"stalls frontend", 0x00d0, 0, 0.1*N}, + {"stalls backend", 0x00d1, 0, 30*N}, +}; + +char *buf; +static int num_counters; +volatile uint64_t irq_received; + +static bool check_irq(void) +{ + int i; + irq_received = 0; + + irq_enable(); + for (i = 0; i < 100000 && !irq_received; i++) + asm volatile("pause"); + irq_disable(); + + return irq_received; +} + +static inline void loop() +{ + unsigned long tmp, tmp2, tmp3; + + asm volatile("1: mov (%1), %2\n\t" + " add $64, %1\n\t" + " nop\n\t" + " nop\n\t" + " nop\n\t" + " nop\n\t" + " nop\n\t" + " nop\n\t" + " nop\n\t" + " loop 1b" + : "=c"(tmp), "=r"(tmp2), "=r"(tmp3) + : "0"(N), "1"(buf)); +} + +static struct pmu_event* get_counter_event(pmu_counter_t *cnt) +{ + int i; + + for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++) + if (gp_events[i].unit_sel == (cnt->config & 0xffff)) + return &gp_events[i]; + + return (void*)0; +} + +static void cnt_overflow(isr_regs_t *regs) +{ + irq_received++; + apic_write(APIC_EOI, 0); +} + +static int event_to_global_idx(pmu_counter_t *cnt) +{ + return cnt->ctr - MSR_K7_PERFCTR0; +} + +static bool verify_event(uint64_t count, struct pmu_event *e) +{ + return count >= e->min && count <= e->max; +} + +static bool verify_counter(pmu_counter_t *cnt) +{ + return verify_event(cnt->count, get_counter_event(cnt)); +} + +static void start_event(pmu_counter_t *evt) +{ + wrmsr(evt->ctr, evt->count); + wrmsr(MSR_K7_EVNTSEL0 + event_to_global_idx(evt), + evt->config | EVNTSEL_EN); +} + +static void stop_event(pmu_counter_t *evt) +{ + wrmsr(MSR_K7_EVNTSEL0 + event_to_global_idx(evt), + evt->config & ~EVNTSEL_EN); + evt->count = rdmsr(evt->ctr); +} + +static void measure(pmu_counter_t *evt, int count) +{ + int i; + + for (i = 0; i < count; i++) + start_event(&evt[i]); + + loop(); + + for (i = 0; i < count; i++) + stop_event(&evt[i]); +} + +static void check_gp_counter(struct pmu_event *evt) +{ + int i; + + pmu_counter_t cnt = { + .ctr = MSR_K7_PERFCTR0, + .config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel, + }; + + for (i = 0; i < num_counters; i++, cnt.ctr++) { + cnt.count = 0; + measure(&cnt, 1); + report("%s-%d", verify_event(cnt.count, evt), evt->name, i); + } +} + +static void check_gp_counters(void) +{ + int i; + + for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++) { + check_gp_counter(&gp_events[i]); + } +} + +static void check_counter_overflow(void) +{ + uint64_t count; + int i; + pmu_counter_t cnt = { + .ctr = MSR_K7_PERFCTR0, + .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel, + .count = 0, + }; + + measure(&cnt, 1); + count = cnt.count; + + report_prefix_push("overflow"); + + for (i = 0; i < num_counters; i++, cnt.ctr++) { + if (i % 2) + cnt.config |= EVNTSEL_INT; + else + cnt.config &= ~EVNTSEL_INT; + cnt.count = 1 - count; + measure(&cnt, 1); + report("cntr-%d", cnt.count < 20, i); + report("irq-%d", check_irq() == (i % 2), i); + } + + report_prefix_pop(); +} + +static void check_gp_counter_cmask(void) +{ + pmu_counter_t cnt = { + .ctr = MSR_K7_PERFCTR0, + .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel, + .count = 0, + }; + + cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT); + measure(&cnt, 1); + report("cmask", cnt.count < gp_events[1].min); +} + +static void check_rdpmc(void) +{ + uint64_t val = 0x1f3456789ull; + int i; + + report_prefix_push("rdpmc"); + + for (i = 0; i < num_counters; i++) { + uint64_t x = val; + wrmsr(MSR_K7_PERFCTR0 + i, val); + report("cntr-%d", rdpmc(i) == x, i); + } + + report_prefix_pop(); +} + +static void check_counters_many(void) +{ + pmu_counter_t cnt[10]; + int i, n; + + for (n = 0; n < num_counters; n++) { + cnt[n].count = 0; + cnt[n].ctr = MSR_K7_PERFCTR0 + n; + cnt[n].config = EVNTSEL_OS | EVNTSEL_USR | gp_events[n].unit_sel; + } + + measure(cnt, n); + + for (i = 0; i < n; i++) + if (!verify_counter(&cnt[i])) + break; + + report("all counters", i == n); +} + +#define IS_AMD_CPU(cpuid) ((cpuid).b == CPUID_VENDOR_AMD_1 && \ + (cpuid).d == CPUID_VENDOR_AMD_2 && \ + (cpuid).c == CPUID_VENDOR_AMD_3) +int main(int ac, char **av) +{ + struct cpuid id = cpuid(0); + + setup_vm(); + setup_idt(); + handle_irq(PC_VECTOR, cnt_overflow); + buf = vmalloc(N * 64); + + if (!IS_AMD_CPU(id)) { + printf("No AMD PMU detected!\n"); + return report_summary(); + } + + num_counters = AMD64_NUM_COUNTERS; + if (num_counters > ARRAY_SIZE(gp_events)) + num_counters = ARRAY_SIZE(gp_events); + + apic_write(APIC_LVTPC, PC_VECTOR); + + check_gp_counters(); + check_rdpmc(); + check_counters_many(); + check_counter_overflow(); + check_gp_counter_cmask(); + + return report_summary(); +} diff --git a/x86/unittests.cfg b/x86/unittests.cfg index 74156fa..98fb3e9 100644 --- a/x86/unittests.cfg +++ b/x86/unittests.cfg @@ -145,6 +145,10 @@ file = intel_pmu.flat extra_params = -cpu host check = /proc/sys/kernel/nmi_watchdog=0 +[amd_pmu] +file = amd_pmu.flat +extra_params = -cpu host + [port80] file = port80.flat -- 2.7.5