On 08/04/2017 03:14 AM, Andrew Jones wrote: > Hi Wei, > > I didn't review the important part of this test, i.e. what it's > testing and how, so below are just some nits. Thanks for reviewing it. I agree with most nits below, and will fix them in the next respin. > > On Thu, Aug 03, 2017 at 11:24:30PM -0500, Wei Huang wrote: >> AMD PMU implementation is very different from Intel. This patch adds >> a new test case, called amd_pmu, to x86 architecture. The >> implementation of amd_pmu.c is based on intel_pmu.c, with focus on >> AMD's general-purpose registers. To avoid running on Intel CPUs, >> we check the CPU's vendor name before executing the tests. >> >> Signed-off-by: Wei Huang <wei@xxxxxxxxxx> >> --- >> x86/Makefile.common | 3 +- >> x86/amd_pmu.c | 265 ++++++++++++++++++++++++++++++++++++++++++++++++++++ >> x86/unittests.cfg | 4 + >> 3 files changed, 271 insertions(+), 1 deletion(-) >> create mode 100644 x86/amd_pmu.c >> >> diff --git a/x86/Makefile.common b/x86/Makefile.common >> index d0f4ed1..36bcf90 100644 >> --- a/x86/Makefile.common >> +++ b/x86/Makefile.common >> @@ -45,7 +45,8 @@ tests-common = $(TEST_DIR)/vmexit.flat $(TEST_DIR)/tsc.flat \ >> $(TEST_DIR)/realmode.flat $(TEST_DIR)/msr.flat \ >> $(TEST_DIR)/hypercall.flat $(TEST_DIR)/sieve.flat \ >> $(TEST_DIR)/kvmclock_test.flat $(TEST_DIR)/eventinj.flat \ >> - $(TEST_DIR)/s3.flat $(TEST_DIR)/intel_pmu.flat $(TEST_DIR)/setjmp.flat \ >> + $(TEST_DIR)/s3.flat $(TEST_DIR)/intel_pmu.flat \ >> + $(TEST_DIR)/amd_pmu.flat $(TEST_DIR)/setjmp.flat \ > > mixing tabs and spaces here > >> $(TEST_DIR)/tsc_adjust.flat $(TEST_DIR)/asyncpf.flat \ >> $(TEST_DIR)/init.flat $(TEST_DIR)/smap.flat \ >> $(TEST_DIR)/hyperv_synic.flat $(TEST_DIR)/hyperv_stimer.flat \ >> diff --git a/x86/amd_pmu.c b/x86/amd_pmu.c >> new file mode 100644 >> index 0000000..006eccf >> --- /dev/null >> +++ b/x86/amd_pmu.c >> @@ -0,0 +1,265 @@ >> +/* >> + * vPMU testing for AMD CPUs >> + * >> + * Copyright (c) 2017 Red Hat Inc >> + * >> + * This work is licensed under the terms of the GNU GPL, version 2. >> + */ >> + >> +#include "x86/msr.h" >> +#include "x86/processor.h" >> +#include "x86/apic-defs.h" >> +#include "x86/apic.h" > > apic-defs is included by apic > >> +#include "x86/desc.h" >> +#include "x86/isr.h" >> +#include "x86/vm.h" >> + >> +#include "libcflat.h" >> +#include "pmu.h" >> +#include <stdint.h> > > stdint is included by libcflat > >> + >> +#define AMD64_NUM_COUNTERS 4 >> + >> +#define CPUID_VENDOR_AMD_1 0x68747541 /* "Auth" */ >> +#define CPUID_VENDOR_AMD_2 0x69746e65 /* "enti" */ >> +#define CPUID_VENDOR_AMD_3 0x444d4163 /* "cAMD" */ >> + >> +struct pmu_event gp_events[] = { >> + {"core cycles", 0x0076, 1*N, 50*N}, >> + {"instructions", 0x00c0, 10*N, 10.2*N}, >> + {"cache reference", 0x077d, 1, 2*N}, >> + {"cache misses", 0x077e, 0, 1*N}, >> + {"branches", 0x00c2, 1*N, 1.1*N}, >> + {"branch misses", 0x00c3, 0, 0.1*N}, >> + {"stalls frontend", 0x00d0, 0, 0.1*N}, >> + {"stalls backend", 0x00d1, 0, 30*N}, >> +}; >> + >> +char *buf; >> +static int num_counters; >> +volatile uint64_t irq_received; > > all above globals can be static > >> + >> +static bool check_irq(void) >> +{ >> + int i; >> + irq_received = 0; >> + >> + irq_enable(); >> + for (i = 0; i < 100000 && !irq_received; i++) >> + asm volatile("pause"); >> + irq_disable(); >> + >> + return irq_received; >> +} >> + >> +static inline void loop() >> +{ >> + unsigned long tmp, tmp2, tmp3; >> + >> + asm volatile("1: mov (%1), %2\n\t" >> + " add $64, %1\n\t" >> + " nop\n\t" >> + " nop\n\t" >> + " nop\n\t" >> + " nop\n\t" >> + " nop\n\t" >> + " nop\n\t" >> + " nop\n\t" >> + " loop 1b" >> + : "=c"(tmp), "=r"(tmp2), "=r"(tmp3) >> + : "0"(N), "1"(buf)); >> +} >> + >> +static struct pmu_event* get_counter_event(pmu_counter_t *cnt) >> +{ >> + int i; >> + >> + for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++) >> + if (gp_events[i].unit_sel == (cnt->config & 0xffff)) >> + return &gp_events[i]; >> + >> + return (void*)0; > > NULL > >> +} >> + >> +static void cnt_overflow(isr_regs_t *regs) >> +{ >> + irq_received++; >> + apic_write(APIC_EOI, 0); >> +} >> + >> +static int event_to_global_idx(pmu_counter_t *cnt) >> +{ >> + return cnt->ctr - MSR_K7_PERFCTR0; >> +} >> + >> +static bool verify_event(uint64_t count, struct pmu_event *e) >> +{ >> + return count >= e->min && count <= e->max; >> +} >> + >> +static bool verify_counter(pmu_counter_t *cnt) >> +{ >> + return verify_event(cnt->count, get_counter_event(cnt)); >> +} >> + >> +static void start_event(pmu_counter_t *evt) >> +{ >> + wrmsr(evt->ctr, evt->count); >> + wrmsr(MSR_K7_EVNTSEL0 + event_to_global_idx(evt), >> + evt->config | EVNTSEL_EN); >> +} >> + >> +static void stop_event(pmu_counter_t *evt) >> +{ >> + wrmsr(MSR_K7_EVNTSEL0 + event_to_global_idx(evt), >> + evt->config & ~EVNTSEL_EN); >> + evt->count = rdmsr(evt->ctr); >> +} >> + >> +static void measure(pmu_counter_t *evt, int count) >> +{ >> + int i; >> + >> + for (i = 0; i < count; i++) >> + start_event(&evt[i]); >> + >> + loop(); >> + >> + for (i = 0; i < count; i++) >> + stop_event(&evt[i]); >> +} >> + >> +static void check_gp_counter(struct pmu_event *evt) >> +{ >> + int i; >> + >> + pmu_counter_t cnt = { >> + .ctr = MSR_K7_PERFCTR0, >> + .config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel, >> + }; >> + >> + for (i = 0; i < num_counters; i++, cnt.ctr++) { >> + cnt.count = 0; >> + measure(&cnt, 1); >> + report("%s-%d", verify_event(cnt.count, evt), evt->name, i); >> + } >> +} >> + >> +static void check_gp_counters(void) >> +{ >> + int i; >> + >> + for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++) { >> + check_gp_counter(&gp_events[i]); >> + } >> +} >> + >> +static void check_counter_overflow(void) >> +{ >> + uint64_t count; >> + int i; >> + pmu_counter_t cnt = { >> + .ctr = MSR_K7_PERFCTR0, >> + .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel, >> + .count = 0, >> + }; >> + >> + measure(&cnt, 1); >> + count = cnt.count; >> + >> + report_prefix_push("overflow"); >> + >> + for (i = 0; i < num_counters; i++, cnt.ctr++) { >> + if (i % 2) >> + cnt.config |= EVNTSEL_INT; >> + else >> + cnt.config &= ~EVNTSEL_INT; >> + cnt.count = 1 - count; >> + measure(&cnt, 1); >> + report("cntr-%d", cnt.count < 20, i); >> + report("irq-%d", check_irq() == (i % 2), i); >> + } >> + >> + report_prefix_pop(); >> +} >> + >> +static void check_gp_counter_cmask(void) >> +{ >> + pmu_counter_t cnt = { >> + .ctr = MSR_K7_PERFCTR0, >> + .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel, >> + .count = 0, >> + }; >> + >> + cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT); >> + measure(&cnt, 1); >> + report("cmask", cnt.count < gp_events[1].min); >> +} >> + >> +static void check_rdpmc(void) >> +{ >> + uint64_t val = 0x1f3456789ull; >> + int i; >> + >> + report_prefix_push("rdpmc"); >> + >> + for (i = 0; i < num_counters; i++) { >> + uint64_t x = val; >> + wrmsr(MSR_K7_PERFCTR0 + i, val); >> + report("cntr-%d", rdpmc(i) == x, i); >> + } >> + >> + report_prefix_pop(); >> +} >> + >> +static void check_counters_many(void) >> +{ >> + pmu_counter_t cnt[10]; >> + int i, n; >> + >> + for (n = 0; n < num_counters; n++) { >> + cnt[n].count = 0; >> + cnt[n].ctr = MSR_K7_PERFCTR0 + n; >> + cnt[n].config = EVNTSEL_OS | EVNTSEL_USR | gp_events[n].unit_sel; >> + } >> + >> + measure(cnt, n); >> + >> + for (i = 0; i < n; i++) >> + if (!verify_counter(&cnt[i])) >> + break; > > {} for the for loop, might be nice here > >> + >> + report("all counters", i == n); >> +} >> + >> +#define IS_AMD_CPU(cpuid) ((cpuid).b == CPUID_VENDOR_AMD_1 && \ >> + (cpuid).d == CPUID_VENDOR_AMD_2 && \ >> + (cpuid).c == CPUID_VENDOR_AMD_3) > > should add a blank line here > >> +int main(int ac, char **av) >> +{ >> + struct cpuid id = cpuid(0); >> + >> + setup_vm(); >> + setup_idt(); >> + handle_irq(PC_VECTOR, cnt_overflow); >> + buf = vmalloc(N * 64); >> + >> + if (!IS_AMD_CPU(id)) { >> + printf("No AMD PMU detected!\n"); >> + return report_summary(); >> + } >> + >> + num_counters = AMD64_NUM_COUNTERS; >> + if (num_counters > ARRAY_SIZE(gp_events)) >> + num_counters = ARRAY_SIZE(gp_events); >> + >> + apic_write(APIC_LVTPC, PC_VECTOR); >> + >> + check_gp_counters(); >> + check_rdpmc(); >> + check_counters_many(); >> + check_counter_overflow(); >> + check_gp_counter_cmask(); >> + >> + return report_summary(); >> +} >> diff --git a/x86/unittests.cfg b/x86/unittests.cfg >> index 74156fa..98fb3e9 100644 >> --- a/x86/unittests.cfg >> +++ b/x86/unittests.cfg >> @@ -145,6 +145,10 @@ file = intel_pmu.flat >> extra_params = -cpu host >> check = /proc/sys/kernel/nmi_watchdog=0 >> >> +[amd_pmu] >> +file = amd_pmu.flat >> +extra_params = -cpu host > > maybe add 'arch = x86_64', since AMD only supports 64 bit. > >> + >> [port80] >> file = port80.flat >> >> -- >> 2.7.5 >> > > Thanks, > drew >