[kvm-unit-tests PATCH v5 26/27] x86/pmu: Update testcases to cover AMD PMU

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Like Xu <likexu@xxxxxxxxxxx>

AMD core PMU before Zen4 did not have version numbers, there were
no fixed counters, it had a hard-coded number of generic counters,
bit-width, and only hardware events common across amd generations
(starting with K7) were added to amd_gp_events[] table.

All above differences are instantiated at the detection step, and it
also covers the K7 PMU registers, which is consistent with bare-metal.

Cc: Sandipan Das <sandipan.das@xxxxxxx>
Signed-off-by: Like Xu <likexu@xxxxxxxxxxx>
Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
 lib/x86/msr.h       | 17 +++++++++++++
 lib/x86/pmu.c       | 59 ++++++++++++++++++++++++++++-----------------
 lib/x86/pmu.h       | 13 +++++++++-
 lib/x86/processor.h |  1 +
 x86/pmu.c           | 58 +++++++++++++++++++++++++++++++++++---------
 5 files changed, 114 insertions(+), 34 deletions(-)

diff --git a/lib/x86/msr.h b/lib/x86/msr.h
index 68d88371..6cf8f336 100644
--- a/lib/x86/msr.h
+++ b/lib/x86/msr.h
@@ -146,6 +146,23 @@
 #define FAM10H_MMIO_CONF_BASE_SHIFT	20
 #define MSR_FAM10H_NODE_ID		0xc001100c
 
+/* Fam 15h MSRs */
+#define MSR_F15H_PERF_CTL              0xc0010200
+#define MSR_F15H_PERF_CTL0             MSR_F15H_PERF_CTL
+#define MSR_F15H_PERF_CTL1             (MSR_F15H_PERF_CTL + 2)
+#define MSR_F15H_PERF_CTL2             (MSR_F15H_PERF_CTL + 4)
+#define MSR_F15H_PERF_CTL3             (MSR_F15H_PERF_CTL + 6)
+#define MSR_F15H_PERF_CTL4             (MSR_F15H_PERF_CTL + 8)
+#define MSR_F15H_PERF_CTL5             (MSR_F15H_PERF_CTL + 10)
+
+#define MSR_F15H_PERF_CTR              0xc0010201
+#define MSR_F15H_PERF_CTR0             MSR_F15H_PERF_CTR
+#define MSR_F15H_PERF_CTR1             (MSR_F15H_PERF_CTR + 2)
+#define MSR_F15H_PERF_CTR2             (MSR_F15H_PERF_CTR + 4)
+#define MSR_F15H_PERF_CTR3             (MSR_F15H_PERF_CTR + 6)
+#define MSR_F15H_PERF_CTR4             (MSR_F15H_PERF_CTR + 8)
+#define MSR_F15H_PERF_CTR5             (MSR_F15H_PERF_CTR + 10)
+
 /* K8 MSRs */
 #define MSR_K8_TOP_MEM1			0xc001001a
 #define MSR_K8_TOP_MEM2			0xc001001d
diff --git a/lib/x86/pmu.c b/lib/x86/pmu.c
index 837d2a6c..090e1115 100644
--- a/lib/x86/pmu.c
+++ b/lib/x86/pmu.c
@@ -4,36 +4,51 @@ struct pmu_caps pmu;
 
 void pmu_init(void)
 {
-	struct cpuid cpuid_10 = cpuid(10);
-
 	pmu.is_intel = is_intel();
 
-	if (!pmu.is_intel)
-		return;
+	if (pmu.is_intel) {
+		struct cpuid cpuid_10 = cpuid(10);
 
-	pmu.version = cpuid_10.a & 0xff;
+		pmu.version = cpuid_10.a & 0xff;
 
-	if (pmu.version > 1) {
-		pmu.nr_fixed_counters = cpuid_10.d & 0x1f;
-		pmu.fixed_counter_width = (cpuid_10.d >> 5) & 0xff;
-	}
+		if (pmu.version > 1) {
+			pmu.nr_fixed_counters = cpuid_10.d & 0x1f;
+			pmu.fixed_counter_width = (cpuid_10.d >> 5) & 0xff;
+		}
 
-	pmu.nr_gp_counters = (cpuid_10.a >> 8) & 0xff;
-	pmu.gp_counter_width = (cpuid_10.a >> 16) & 0xff;
-	pmu.gp_counter_mask_length = (cpuid_10.a >> 24) & 0xff;
+		if (pmu.version > 1) {
+			pmu.nr_fixed_counters = cpuid_10.d & 0x1f;
+			pmu.fixed_counter_width = (cpuid_10.d >> 5) & 0xff;
+		}
 
-	/* CPUID.0xA.EBX bit is '1' if a counter is NOT available. */
-	pmu.gp_counter_available = ~cpuid_10.b;
+		pmu.nr_gp_counters = (cpuid_10.a >> 8) & 0xff;
+		pmu.gp_counter_width = (cpuid_10.a >> 16) & 0xff;
+		pmu.gp_counter_mask_length = (cpuid_10.a >> 24) & 0xff;
 
-	if (this_cpu_has(X86_FEATURE_PDCM))
-		pmu.perf_cap = rdmsr(MSR_IA32_PERF_CAPABILITIES);
-	pmu.msr_gp_counter_base = MSR_IA32_PERFCTR0;
-	pmu.msr_gp_event_select_base = MSR_P6_EVNTSEL0;
+		/* CPUID.0xA.EBX bit is '1' if a counter is NOT available. */
+		pmu.gp_counter_available = ~cpuid_10.b;
 
-	if (this_cpu_has_perf_global_status()) {
-		pmu.msr_global_status = MSR_CORE_PERF_GLOBAL_STATUS;
-		pmu.msr_global_ctl = MSR_CORE_PERF_GLOBAL_CTRL;
-		pmu.msr_global_status_clr = MSR_CORE_PERF_GLOBAL_OVF_CTRL;
+		if (this_cpu_has(X86_FEATURE_PDCM))
+			pmu.perf_cap = rdmsr(MSR_IA32_PERF_CAPABILITIES);
+		pmu.msr_gp_counter_base = MSR_IA32_PERFCTR0;
+		pmu.msr_gp_event_select_base = MSR_P6_EVNTSEL0;
+
+		if (this_cpu_has_perf_global_status()) {
+			pmu.msr_global_status = MSR_CORE_PERF_GLOBAL_STATUS;
+			pmu.msr_global_ctl = MSR_CORE_PERF_GLOBAL_CTRL;
+			pmu.msr_global_status_clr = MSR_CORE_PERF_GLOBAL_OVF_CTRL;
+		}
+	} else {
+		pmu.msr_gp_counter_base = MSR_F15H_PERF_CTR0;
+		pmu.msr_gp_event_select_base = MSR_F15H_PERF_CTL0;
+		if (!this_cpu_has(X86_FEATURE_PERFCTR_CORE))
+			pmu.nr_gp_counters = AMD64_NUM_COUNTERS;
+		else
+			pmu.nr_gp_counters = AMD64_NUM_COUNTERS_CORE;
+
+		pmu.gp_counter_width = PMC_DEFAULT_WIDTH;
+		pmu.gp_counter_mask_length = pmu.nr_gp_counters;
+		pmu.gp_counter_available = (1u << pmu.nr_gp_counters) - 1;
 	}
 
 	pmu_reset_all_counters();
diff --git a/lib/x86/pmu.h b/lib/x86/pmu.h
index 460e2a19..8465e3c9 100644
--- a/lib/x86/pmu.h
+++ b/lib/x86/pmu.h
@@ -10,6 +10,11 @@
 /* Performance Counter Vector for the LVT PC Register */
 #define PMI_VECTOR	32
 
+#define AMD64_NUM_COUNTERS	4
+#define AMD64_NUM_COUNTERS_CORE	6
+
+#define PMC_DEFAULT_WIDTH	48
+
 #define DEBUGCTLMSR_LBR	  (1UL <<  0)
 
 #define PMU_CAP_LBR_FMT	  0x3f
@@ -72,17 +77,23 @@ void pmu_init(void);
 
 static inline u32 MSR_GP_COUNTERx(unsigned int i)
 {
+	if (pmu.msr_gp_counter_base == MSR_F15H_PERF_CTR0)
+		return pmu.msr_gp_counter_base + 2 * i;
+
 	return pmu.msr_gp_counter_base + i;
 }
 
 static inline u32 MSR_GP_EVENT_SELECTx(unsigned int i)
 {
+	if (pmu.msr_gp_event_select_base == MSR_F15H_PERF_CTL0)
+		return pmu.msr_gp_event_select_base + 2 * i;
+
 	return pmu.msr_gp_event_select_base + i;
 }
 
 static inline bool this_cpu_has_pmu(void)
 {
-	return !!pmu.version;
+	return !pmu.is_intel || !!pmu.version;
 }
 
 static inline bool this_cpu_has_perf_global_ctrl(void)
diff --git a/lib/x86/processor.h b/lib/x86/processor.h
index c0716663..681e1675 100644
--- a/lib/x86/processor.h
+++ b/lib/x86/processor.h
@@ -252,6 +252,7 @@ static inline bool is_intel(void)
  * Extended Leafs, a.k.a. AMD defined
  */
 #define	X86_FEATURE_SVM			(CPUID(0x80000001, 0, ECX, 2))
+#define	X86_FEATURE_PERFCTR_CORE	(CPUID(0x80000001, 0, ECX, 23))
 #define	X86_FEATURE_NX			(CPUID(0x80000001, 0, EDX, 20))
 #define	X86_FEATURE_GBPAGES		(CPUID(0x80000001, 0, EDX, 26))
 #define	X86_FEATURE_RDTSCP		(CPUID(0x80000001, 0, EDX, 27))
diff --git a/x86/pmu.c b/x86/pmu.c
index c40e2a96..72c2c9cf 100644
--- a/x86/pmu.c
+++ b/x86/pmu.c
@@ -38,6 +38,11 @@ struct pmu_event {
 	{"llc misses", 0x412e, 1, 1*N},
 	{"branches", 0x00c4, 1*N, 1.1*N},
 	{"branch misses", 0x00c5, 0, 0.1*N},
+}, amd_gp_events[] = {
+	{"core cycles", 0x0076, 1*N, 50*N},
+	{"instructions", 0x00c0, 10*N, 10.2*N},
+	{"branches", 0x00c2, 1*N, 1.1*N},
+	{"branch misses", 0x00c3, 0, 0.1*N},
 }, fixed_events[] = {
 	{"fixed 1", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N},
 	{"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N},
@@ -79,14 +84,23 @@ static bool check_irq(void)
 
 static bool is_gp(pmu_counter_t *evt)
 {
+	if (!pmu.is_intel)
+		return true;
+
 	return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 ||
 		evt->ctr >= MSR_IA32_PMC0;
 }
 
 static int event_to_global_idx(pmu_counter_t *cnt)
 {
-	return cnt->ctr - (is_gp(cnt) ? pmu.msr_gp_counter_base :
-		(MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX));
+	if (pmu.is_intel)
+		return cnt->ctr - (is_gp(cnt) ? pmu.msr_gp_counter_base :
+			(MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX));
+
+	if (pmu.msr_gp_counter_base == MSR_F15H_PERF_CTR0)
+		return (cnt->ctr - pmu.msr_gp_counter_base) / 2;
+	else
+		return cnt->ctr - pmu.msr_gp_counter_base;
 }
 
 static struct pmu_event* get_counter_event(pmu_counter_t *cnt)
@@ -306,6 +320,9 @@ static void check_counter_overflow(void)
 			cnt.count &= (1ull << pmu.gp_counter_width) - 1;
 
 		if (i == pmu.nr_gp_counters) {
+			if (!pmu.is_intel)
+				break;
+
 			cnt.ctr = fixed_events[0].unit_sel;
 			cnt.count = measure_for_overflow(&cnt);
 			cnt.count &= (1ull << pmu.gp_counter_width) - 1;
@@ -319,7 +336,10 @@ static void check_counter_overflow(void)
 			cnt.config &= ~EVNTSEL_INT;
 		idx = event_to_global_idx(&cnt);
 		__measure(&cnt, cnt.count);
-		report(cnt.count == 1, "cntr-%d", i);
+		if (pmu.is_intel)
+			report(cnt.count == 1, "cntr-%d", i);
+		else
+			report(cnt.count == 0xffffffffffff || cnt.count < 7, "cntr-%d", i);
 
 		if (!this_cpu_has_perf_global_status())
 			continue;
@@ -457,10 +477,11 @@ static void check_running_counter_wrmsr(void)
 static void check_emulated_instr(void)
 {
 	uint64_t status, instr_start, brnch_start;
+	unsigned int branch_idx = pmu.is_intel ? 5 : 2;
 	pmu_counter_t brnch_cnt = {
 		.ctr = MSR_GP_COUNTERx(0),
 		/* branch instructions */
-		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[5].unit_sel,
+		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[branch_idx].unit_sel,
 	};
 	pmu_counter_t instr_cnt = {
 		.ctr = MSR_GP_COUNTERx(1),
@@ -654,15 +675,21 @@ int main(int ac, char **av)
 
 	check_invalid_rdpmc_gp();
 
-	if (!pmu.version) {
-		report_skip("No Intel Arch PMU is detected!");
-		return report_summary();
+	if (pmu.is_intel) {
+		if (!pmu.version) {
+			report_skip("No Intel Arch PMU is detected!");
+			return report_summary();
+		}
+		gp_events = (struct pmu_event *)intel_gp_events;
+		gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]);
+		report_prefix_push("Intel");
+		set_ref_cycle_expectations();
+	} else {
+		gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]);
+		gp_events = (struct pmu_event *)amd_gp_events;
+		report_prefix_push("AMD");
 	}
 
-	gp_events = (struct pmu_event *)intel_gp_events;
-	gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]);
-	set_ref_cycle_expectations();
-
 	printf("PMU version:         %d\n", pmu.version);
 	printf("GP counters:         %d\n", pmu.nr_gp_counters);
 	printf("GP counter width:    %d\n", pmu.gp_counter_width);
@@ -683,5 +710,14 @@ int main(int ac, char **av)
 		report_prefix_pop();
 	}
 
+	if (!pmu.is_intel) {
+		report_prefix_push("K7");
+		pmu.nr_gp_counters = AMD64_NUM_COUNTERS;
+		pmu.msr_gp_counter_base = MSR_K7_PERFCTR0;
+		pmu.msr_gp_event_select_base = MSR_K7_EVNTSEL0;
+		check_counters();
+		report_prefix_pop();
+	}
+
 	return report_summary();
 }
-- 
2.38.1.431.g37b22c650d-goog




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux