From: Like Xu <likexu@xxxxxxxxxxx> AMD core PMU before Zen4 did not have version numbers, there were no fixed counters, it had a hard-coded number of generic counters, bit-width, and only hardware events common across amd generations (starting with K7) were added to amd_gp_events[] table. All above differences are instantiated at the detection step, and it also covers the K7 PMU registers, which is consistent with bare-metal. Signed-off-by: Like Xu <likexu@xxxxxxxxxxx> --- lib/x86/msr.h | 17 ++++++++++++ lib/x86/processor.h | 32 ++++++++++++++++++++-- x86/pmu.c | 67 ++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 106 insertions(+), 10 deletions(-) diff --git a/lib/x86/msr.h b/lib/x86/msr.h index 252e041..5f16a58 100644 --- a/lib/x86/msr.h +++ b/lib/x86/msr.h @@ -130,6 +130,23 @@ #define MSR_AMD64_IBSDCPHYSAD 0xc0011039 #define MSR_AMD64_IBSCTL 0xc001103a +/* Fam 15h MSRs */ +#define MSR_F15H_PERF_CTL 0xc0010200 +#define MSR_F15H_PERF_CTL0 MSR_F15H_PERF_CTL +#define MSR_F15H_PERF_CTL1 (MSR_F15H_PERF_CTL + 2) +#define MSR_F15H_PERF_CTL2 (MSR_F15H_PERF_CTL + 4) +#define MSR_F15H_PERF_CTL3 (MSR_F15H_PERF_CTL + 6) +#define MSR_F15H_PERF_CTL4 (MSR_F15H_PERF_CTL + 8) +#define MSR_F15H_PERF_CTL5 (MSR_F15H_PERF_CTL + 10) + +#define MSR_F15H_PERF_CTR 0xc0010201 +#define MSR_F15H_PERF_CTR0 MSR_F15H_PERF_CTR +#define MSR_F15H_PERF_CTR1 (MSR_F15H_PERF_CTR + 2) +#define MSR_F15H_PERF_CTR2 (MSR_F15H_PERF_CTR + 4) +#define MSR_F15H_PERF_CTR3 (MSR_F15H_PERF_CTR + 6) +#define MSR_F15H_PERF_CTR4 (MSR_F15H_PERF_CTR + 8) +#define MSR_F15H_PERF_CTR5 (MSR_F15H_PERF_CTR + 10) + /* Fam 10h MSRs */ #define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 #define FAM10H_MMIO_CONF_ENABLE (1<<0) diff --git a/lib/x86/processor.h b/lib/x86/processor.h index 0324220..10bca27 100644 --- a/lib/x86/processor.h +++ b/lib/x86/processor.h @@ -793,6 +793,9 @@ static inline void flush_tlb(void) static inline u8 pmu_version(void) { + if (!is_intel()) + return 0; + return cpuid(10).a & 0xff; } @@ -806,19 +809,39 @@ static inline bool this_cpu_has_perf_global_ctrl(void) return pmu_version() > 1; } +#define AMD64_NUM_COUNTERS 4 +#define AMD64_NUM_COUNTERS_CORE 6 + +static inline bool has_amd_perfctr_core(void) +{ + return cpuid(0x80000001).c & BIT_ULL(23); +} + static inline u8 pmu_nr_gp_counters(void) { - return (cpuid(10).a >> 8) & 0xff; + if (is_intel()) { + return (cpuid(10).a >> 8) & 0xff; + } else if (!has_amd_perfctr_core()) { + return AMD64_NUM_COUNTERS; + } + + return AMD64_NUM_COUNTERS_CORE; } static inline u8 pmu_gp_counter_width(void) { - return (cpuid(10).a >> 16) & 0xff; + if (is_intel()) + return (cpuid(10).a >> 16) & 0xff; + else + return 48; } static inline u8 pmu_gp_counter_mask_length(void) { - return (cpuid(10).a >> 24) & 0xff; + if (is_intel()) + return (cpuid(10).a >> 24) & 0xff; + else + return pmu_nr_gp_counters(); } static inline u8 pmu_nr_fixed_counters(void) @@ -843,6 +866,9 @@ static inline u8 pmu_fixed_counter_width(void) static inline bool pmu_gp_counter_is_available(int i) { + if (!is_intel()) + return i < pmu_nr_gp_counters(); + /* CPUID.0xA.EBX bit is '1 if they counter is NOT available. */ return !(cpuid(10).b & BIT(i)); } diff --git a/x86/pmu.c b/x86/pmu.c index 0706cb1..b6ab10c 100644 --- a/x86/pmu.c +++ b/x86/pmu.c @@ -62,6 +62,11 @@ struct pmu_event { {"fixed 1", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N}, {"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N}, {"fixed 3", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N} +}, amd_gp_events[] = { + {"core cycles", 0x0076, 1*N, 50*N}, + {"instructions", 0x00c0, 10*N, 10.2*N}, + {"branches", 0x00c2, 1*N, 1.1*N}, + {"branch misses", 0x00c3, 0, 0.1*N}, }; #define PMU_CAP_FW_WRITES (1ULL << 13) @@ -105,14 +110,24 @@ static bool check_irq(void) static bool is_gp(pmu_counter_t *evt) { + if (!is_intel()) + return true; + return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 || evt->ctr >= MSR_IA32_PMC0; } static int event_to_global_idx(pmu_counter_t *cnt) { - return cnt->ctr - (is_gp(cnt) ? gp_counter_base : - (MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX)); + if (is_intel()) + return cnt->ctr - (is_gp(cnt) ? gp_counter_base : + (MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX)); + + if (gp_counter_base == MSR_F15H_PERF_CTR0) { + return (cnt->ctr - gp_counter_base) / 2; + } else { + return cnt->ctr - gp_counter_base; + } } static struct pmu_event* get_counter_event(pmu_counter_t *cnt) @@ -150,11 +165,17 @@ static void global_disable(pmu_counter_t *cnt) static inline uint32_t get_gp_counter_msr(unsigned int i) { + if (gp_counter_base == MSR_F15H_PERF_CTR0) + return gp_counter_base + 2 * i; + return gp_counter_base + i; } static inline uint32_t get_gp_select_msr(unsigned int i) { + if (gp_select_base == MSR_F15H_PERF_CTL0) + return gp_select_base + 2 * i; + return gp_select_base + i; } @@ -334,6 +355,9 @@ static void check_counter_overflow(void) cnt.count &= (1ull << pmu_gp_counter_width()) - 1; if (i == nr_gp_counters) { + if (!is_intel()) + break; + cnt.ctr = fixed_events[0].unit_sel; __measure(&cnt, 0); count = cnt.count; @@ -494,7 +518,7 @@ static void check_running_counter_wrmsr(void) static void check_emulated_instr(void) { uint64_t status, instr_start, brnch_start; - unsigned int branch_idx = 5; + unsigned int branch_idx = is_intel() ? 5 : 2; pmu_counter_t brnch_cnt = { .ctr = get_gp_counter_msr(0), /* branch instructions */ @@ -695,13 +719,35 @@ static bool detect_intel_pmu(void) return true; } -static bool pmu_is_detected(void) +static void amd_switch_to_non_perfctr_core(void) { - if (!is_intel()) { - report_skip("AMD PMU is not supported."); + gp_counter_base = MSR_K7_PERFCTR0; + gp_select_base = MSR_K7_EVNTSEL0; + nr_gp_counters = AMD64_NUM_COUNTERS; +} + +static bool detect_amd_pmu(void) +{ + if (!has_amd_perfctr_core()) { + report_skip("Missing perfctr_core, unsupported AMD PMU."); return false; } + nr_gp_counters = pmu_nr_gp_counters(); + gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]); + gp_events = (PMU_EVENTS_ARRAY_t *)amd_gp_events; + gp_counter_base = MSR_F15H_PERF_CTR0; + gp_select_base = MSR_F15H_PERF_CTL0; + + report_prefix_push("AMD"); + return true; +} + +static bool pmu_is_detected(void) +{ + if (!is_intel()) + return detect_amd_pmu(); + return detect_intel_pmu(); } @@ -714,7 +760,8 @@ int main(int ac, char **av) if (!pmu_is_detected()) return report_summary(); - set_ref_cycle_expectations(); + if (is_intel()) + set_ref_cycle_expectations(); printf("PMU version: %d\n", pmu_version()); printf("GP counters: %d\n", nr_gp_counters); @@ -736,5 +783,11 @@ int main(int ac, char **av) report_prefix_pop(); } + if (!is_intel()) { + report_prefix_push("K7"); + amd_switch_to_non_perfctr_core(); + check_counters(); + } + return report_summary(); } -- 2.37.2