Hi Wei, I didn't review the important part of this test, i.e. what it's testing and how, so below are just some nits. On Thu, Aug 03, 2017 at 11:24:30PM -0500, Wei Huang wrote: > AMD PMU implementation is very different from Intel. This patch adds > a new test case, called amd_pmu, to x86 architecture. The > implementation of amd_pmu.c is based on intel_pmu.c, with focus on > AMD's general-purpose registers. To avoid running on Intel CPUs, > we check the CPU's vendor name before executing the tests. > > Signed-off-by: Wei Huang <wei@xxxxxxxxxx> > --- > x86/Makefile.common | 3 +- > x86/amd_pmu.c | 265 ++++++++++++++++++++++++++++++++++++++++++++++++++++ > x86/unittests.cfg | 4 + > 3 files changed, 271 insertions(+), 1 deletion(-) > create mode 100644 x86/amd_pmu.c > > diff --git a/x86/Makefile.common b/x86/Makefile.common > index d0f4ed1..36bcf90 100644 > --- a/x86/Makefile.common > +++ b/x86/Makefile.common > @@ -45,7 +45,8 @@ tests-common = $(TEST_DIR)/vmexit.flat $(TEST_DIR)/tsc.flat \ > $(TEST_DIR)/realmode.flat $(TEST_DIR)/msr.flat \ > $(TEST_DIR)/hypercall.flat $(TEST_DIR)/sieve.flat \ > $(TEST_DIR)/kvmclock_test.flat $(TEST_DIR)/eventinj.flat \ > - $(TEST_DIR)/s3.flat $(TEST_DIR)/intel_pmu.flat $(TEST_DIR)/setjmp.flat \ > + $(TEST_DIR)/s3.flat $(TEST_DIR)/intel_pmu.flat \ > + $(TEST_DIR)/amd_pmu.flat $(TEST_DIR)/setjmp.flat \ mixing tabs and spaces here > $(TEST_DIR)/tsc_adjust.flat $(TEST_DIR)/asyncpf.flat \ > $(TEST_DIR)/init.flat $(TEST_DIR)/smap.flat \ > $(TEST_DIR)/hyperv_synic.flat $(TEST_DIR)/hyperv_stimer.flat \ > diff --git a/x86/amd_pmu.c b/x86/amd_pmu.c > new file mode 100644 > index 0000000..006eccf > --- /dev/null > +++ b/x86/amd_pmu.c > @@ -0,0 +1,265 @@ > +/* > + * vPMU testing for AMD CPUs > + * > + * Copyright (c) 2017 Red Hat Inc > + * > + * This work is licensed under the terms of the GNU GPL, version 2. > + */ > + > +#include "x86/msr.h" > +#include "x86/processor.h" > +#include "x86/apic-defs.h" > +#include "x86/apic.h" apic-defs is included by apic > +#include "x86/desc.h" > +#include "x86/isr.h" > +#include "x86/vm.h" > + > +#include "libcflat.h" > +#include "pmu.h" > +#include <stdint.h> stdint is included by libcflat > + > +#define AMD64_NUM_COUNTERS 4 > + > +#define CPUID_VENDOR_AMD_1 0x68747541 /* "Auth" */ > +#define CPUID_VENDOR_AMD_2 0x69746e65 /* "enti" */ > +#define CPUID_VENDOR_AMD_3 0x444d4163 /* "cAMD" */ > + > +struct pmu_event gp_events[] = { > + {"core cycles", 0x0076, 1*N, 50*N}, > + {"instructions", 0x00c0, 10*N, 10.2*N}, > + {"cache reference", 0x077d, 1, 2*N}, > + {"cache misses", 0x077e, 0, 1*N}, > + {"branches", 0x00c2, 1*N, 1.1*N}, > + {"branch misses", 0x00c3, 0, 0.1*N}, > + {"stalls frontend", 0x00d0, 0, 0.1*N}, > + {"stalls backend", 0x00d1, 0, 30*N}, > +}; > + > +char *buf; > +static int num_counters; > +volatile uint64_t irq_received; all above globals can be static > + > +static bool check_irq(void) > +{ > + int i; > + irq_received = 0; > + > + irq_enable(); > + for (i = 0; i < 100000 && !irq_received; i++) > + asm volatile("pause"); > + irq_disable(); > + > + return irq_received; > +} > + > +static inline void loop() > +{ > + unsigned long tmp, tmp2, tmp3; > + > + asm volatile("1: mov (%1), %2\n\t" > + " add $64, %1\n\t" > + " nop\n\t" > + " nop\n\t" > + " nop\n\t" > + " nop\n\t" > + " nop\n\t" > + " nop\n\t" > + " nop\n\t" > + " loop 1b" > + : "=c"(tmp), "=r"(tmp2), "=r"(tmp3) > + : "0"(N), "1"(buf)); > +} > + > +static struct pmu_event* get_counter_event(pmu_counter_t *cnt) > +{ > + int i; > + > + for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++) > + if (gp_events[i].unit_sel == (cnt->config & 0xffff)) > + return &gp_events[i]; > + > + return (void*)0; NULL > +} > + > +static void cnt_overflow(isr_regs_t *regs) > +{ > + irq_received++; > + apic_write(APIC_EOI, 0); > +} > + > +static int event_to_global_idx(pmu_counter_t *cnt) > +{ > + return cnt->ctr - MSR_K7_PERFCTR0; > +} > + > +static bool verify_event(uint64_t count, struct pmu_event *e) > +{ > + return count >= e->min && count <= e->max; > +} > + > +static bool verify_counter(pmu_counter_t *cnt) > +{ > + return verify_event(cnt->count, get_counter_event(cnt)); > +} > + > +static void start_event(pmu_counter_t *evt) > +{ > + wrmsr(evt->ctr, evt->count); > + wrmsr(MSR_K7_EVNTSEL0 + event_to_global_idx(evt), > + evt->config | EVNTSEL_EN); > +} > + > +static void stop_event(pmu_counter_t *evt) > +{ > + wrmsr(MSR_K7_EVNTSEL0 + event_to_global_idx(evt), > + evt->config & ~EVNTSEL_EN); > + evt->count = rdmsr(evt->ctr); > +} > + > +static void measure(pmu_counter_t *evt, int count) > +{ > + int i; > + > + for (i = 0; i < count; i++) > + start_event(&evt[i]); > + > + loop(); > + > + for (i = 0; i < count; i++) > + stop_event(&evt[i]); > +} > + > +static void check_gp_counter(struct pmu_event *evt) > +{ > + int i; > + > + pmu_counter_t cnt = { > + .ctr = MSR_K7_PERFCTR0, > + .config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel, > + }; > + > + for (i = 0; i < num_counters; i++, cnt.ctr++) { > + cnt.count = 0; > + measure(&cnt, 1); > + report("%s-%d", verify_event(cnt.count, evt), evt->name, i); > + } > +} > + > +static void check_gp_counters(void) > +{ > + int i; > + > + for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++) { > + check_gp_counter(&gp_events[i]); > + } > +} > + > +static void check_counter_overflow(void) > +{ > + uint64_t count; > + int i; > + pmu_counter_t cnt = { > + .ctr = MSR_K7_PERFCTR0, > + .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel, > + .count = 0, > + }; > + > + measure(&cnt, 1); > + count = cnt.count; > + > + report_prefix_push("overflow"); > + > + for (i = 0; i < num_counters; i++, cnt.ctr++) { > + if (i % 2) > + cnt.config |= EVNTSEL_INT; > + else > + cnt.config &= ~EVNTSEL_INT; > + cnt.count = 1 - count; > + measure(&cnt, 1); > + report("cntr-%d", cnt.count < 20, i); > + report("irq-%d", check_irq() == (i % 2), i); > + } > + > + report_prefix_pop(); > +} > + > +static void check_gp_counter_cmask(void) > +{ > + pmu_counter_t cnt = { > + .ctr = MSR_K7_PERFCTR0, > + .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel, > + .count = 0, > + }; > + > + cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT); > + measure(&cnt, 1); > + report("cmask", cnt.count < gp_events[1].min); > +} > + > +static void check_rdpmc(void) > +{ > + uint64_t val = 0x1f3456789ull; > + int i; > + > + report_prefix_push("rdpmc"); > + > + for (i = 0; i < num_counters; i++) { > + uint64_t x = val; > + wrmsr(MSR_K7_PERFCTR0 + i, val); > + report("cntr-%d", rdpmc(i) == x, i); > + } > + > + report_prefix_pop(); > +} > + > +static void check_counters_many(void) > +{ > + pmu_counter_t cnt[10]; > + int i, n; > + > + for (n = 0; n < num_counters; n++) { > + cnt[n].count = 0; > + cnt[n].ctr = MSR_K7_PERFCTR0 + n; > + cnt[n].config = EVNTSEL_OS | EVNTSEL_USR | gp_events[n].unit_sel; > + } > + > + measure(cnt, n); > + > + for (i = 0; i < n; i++) > + if (!verify_counter(&cnt[i])) > + break; {} for the for loop, might be nice here > + > + report("all counters", i == n); > +} > + > +#define IS_AMD_CPU(cpuid) ((cpuid).b == CPUID_VENDOR_AMD_1 && \ > + (cpuid).d == CPUID_VENDOR_AMD_2 && \ > + (cpuid).c == CPUID_VENDOR_AMD_3) should add a blank line here > +int main(int ac, char **av) > +{ > + struct cpuid id = cpuid(0); > + > + setup_vm(); > + setup_idt(); > + handle_irq(PC_VECTOR, cnt_overflow); > + buf = vmalloc(N * 64); > + > + if (!IS_AMD_CPU(id)) { > + printf("No AMD PMU detected!\n"); > + return report_summary(); > + } > + > + num_counters = AMD64_NUM_COUNTERS; > + if (num_counters > ARRAY_SIZE(gp_events)) > + num_counters = ARRAY_SIZE(gp_events); > + > + apic_write(APIC_LVTPC, PC_VECTOR); > + > + check_gp_counters(); > + check_rdpmc(); > + check_counters_many(); > + check_counter_overflow(); > + check_gp_counter_cmask(); > + > + return report_summary(); > +} > diff --git a/x86/unittests.cfg b/x86/unittests.cfg > index 74156fa..98fb3e9 100644 > --- a/x86/unittests.cfg > +++ b/x86/unittests.cfg > @@ -145,6 +145,10 @@ file = intel_pmu.flat > extra_params = -cpu host > check = /proc/sys/kernel/nmi_watchdog=0 > > +[amd_pmu] > +file = amd_pmu.flat > +extra_params = -cpu host maybe add 'arch = x86_64', since AMD only supports 64 bit. > + > [port80] > file = port80.flat > > -- > 2.7.5 > Thanks, drew