On Mon, Aug 14, 2023, Jinrong Liang wrote: > Add test case for AMD Guest PerfMonV2. Also test Intel > MSR_CORE_PERF_GLOBAL_STATUS and MSR_CORE_PERF_GLOBAL_OVF_CTRL. > > Signed-off-by: Jinrong Liang <cloudliang@xxxxxxxxxxx> > --- > .../kvm/x86_64/pmu_basic_functionality_test.c | 48 ++++++++++++++++++- > 1 file changed, 46 insertions(+), 2 deletions(-) > > diff --git a/tools/testing/selftests/kvm/x86_64/pmu_basic_functionality_test.c b/tools/testing/selftests/kvm/x86_64/pmu_basic_functionality_test.c > index cb2a7ad5c504..02bd1fe3900b 100644 > --- a/tools/testing/selftests/kvm/x86_64/pmu_basic_functionality_test.c > +++ b/tools/testing/selftests/kvm/x86_64/pmu_basic_functionality_test.c > @@ -58,7 +58,9 @@ static uint64_t run_vcpu(struct kvm_vcpu *vcpu, uint64_t *ucall_arg) > > static void guest_measure_loop(uint64_t event_code) > { > + uint64_t global_ovf_ctrl_msr, global_status_msr, global_ctrl_msr; > uint8_t nr_gp_counters, pmu_version = 1; > + uint8_t gp_counter_bit_width = 48; > uint64_t event_sel_msr; > uint32_t counter_msr; > unsigned int i; > @@ -68,6 +70,12 @@ static void guest_measure_loop(uint64_t event_code) > pmu_version = this_cpu_property(X86_PROPERTY_PMU_VERSION); > event_sel_msr = MSR_P6_EVNTSEL0; > > + if (pmu_version > 1) { > + global_ovf_ctrl_msr = MSR_CORE_PERF_GLOBAL_OVF_CTRL; > + global_status_msr = MSR_CORE_PERF_GLOBAL_STATUS; > + global_ctrl_msr = MSR_CORE_PERF_GLOBAL_CTRL; > + } > + > if (rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES) > counter_msr = MSR_IA32_PMC0; > else > @@ -76,6 +84,17 @@ static void guest_measure_loop(uint64_t event_code) > nr_gp_counters = AMD64_NR_COUNTERS; > event_sel_msr = MSR_K7_EVNTSEL0; > counter_msr = MSR_K7_PERFCTR0; > + > + if (this_cpu_has(X86_FEATURE_AMD_PMU_EXT_CORE) && > + this_cpu_has(X86_FEATURE_AMD_PERFMON_V2)) { > + nr_gp_counters = this_cpu_property(X86_PROPERTY_AMD_PMU_NR_CORE_COUNTERS); > + global_ovf_ctrl_msr = MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR; > + global_status_msr = MSR_AMD64_PERF_CNTR_GLOBAL_STATUS; > + global_ctrl_msr = MSR_AMD64_PERF_CNTR_GLOBAL_CTL; > + event_sel_msr = MSR_F15H_PERF_CTL0; > + counter_msr = MSR_F15H_PERF_CTR0; > + pmu_version = 2; > + } Please use an if-else when the two things are completely exclusive, i.e. don't set "defaults" and then override them. > } > > for (i = 0; i < nr_gp_counters; i++) { > @@ -84,14 +103,39 @@ static void guest_measure_loop(uint64_t event_code) > ARCH_PERFMON_EVENTSEL_ENABLE | event_code); > > if (pmu_version > 1) { > - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i)); > + wrmsr(global_ctrl_msr, BIT_ULL(i)); > __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); > - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); > + wrmsr(global_ctrl_msr, 0); > GUEST_SYNC(_rdpmc(i)); > } else { > __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); > GUEST_SYNC(_rdpmc(i)); > } This is extremely difficult to follow. I think the same thing to do is to split this up into helpers, e.g. send pmu_version > 1 into one path, and pmu_version <= 1 into an entirely different path. E.g. something like this? static void guest_measure_loop(uint64_t event_code) { uint64_t global_ovf_ctrl_msr, global_status_msr, global_ctrl_msr; uint8_t nr_gp_counters, pmu_version; uint8_t gp_counter_bit_width; uint64_t event_sel_msr; uint32_t counter_msr; unsigned int i; if (host_cpu_is_intel) pmu_version = this_cpu_property(X86_PROPERTY_PMU_VERSION); else if (this_cpu_has(X86_FEATURE_PERFCTR_CORE) && this_cpu_has(X86_FEATURE_PERFMON_V2)) { pmu_version = 2; } else { pmu_version = 1; } if (pmu_version <= 1) { guest_measure_pmu_legacy(...); return; } if (host_cpu_is_intel) { nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); global_ovf_ctrl_msr = MSR_CORE_PERF_GLOBAL_OVF_CTRL; global_status_msr = MSR_CORE_PERF_GLOBAL_STATUS; global_ctrl_msr = MSR_CORE_PERF_GLOBAL_CTRL; gp_counter_bit_width = this_cpu_property(X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH); if (rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES) counter_msr = MSR_IA32_PMC0; else counter_msr = MSR_IA32_PERFCTR0; } else { nr_gp_counters = this_cpu_property(X86_PROPERTY_AMD_PMU_NR_CORE_COUNTERS); global_ovf_ctrl_msr = MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR; global_status_msr = MSR_AMD64_PERF_CNTR_GLOBAL_STATUS; global_ctrl_msr = MSR_AMD64_PERF_CNTR_GLOBAL_CTL; event_sel_msr = MSR_F15H_PERF_CTL0; counter_msr = MSR_F15H_PERF_CTR0; gp_counter_bit_width = 48; } for (i = 0; i < nr_gp_counters; i++) { wrmsr(counter_msr + i, 0); wrmsr(event_sel_msr + i, ARCH_PERFMON_EVENTSEL_OS | ARCH_PERFMON_EVENTSEL_ENABLE | event_code); wrmsr(global_ctrl_msr, BIT_ULL(i)); __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); wrmsr(global_ctrl_msr, 0); counter = _rdpmc(i); GUEST_ASSERT_EQ(this_pmu_has(...), !!counter); if ( _rdpmc(i)) { wrmsr(global_ctrl_msr, 0); wrmsr(counter_msr + i, 0); __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); GUEST_ASSERT(!_rdpmc(i)); wrmsr(global_ctrl_msr, BIT_ULL(i)); __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); GUEST_ASSERT(_rdpmc(i)); wrmsr(global_ctrl_msr, 0); wrmsr(counter_msr + i, (1ULL << gp_counter_bit_width) - 2); wrmsr(global_ctrl_msr, BIT_ULL(i)); __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); GUEST_ASSERT(rdmsr(global_status_msr) & BIT_ULL(i)); wrmsr(global_ctrl_msr, 0); wrmsr(global_ovf_ctrl_msr, BIT_ULL(i)); GUEST_ASSERT(!(rdmsr(global_status_msr) & BIT_ULL(i))); } }