On Fri, Oct 21, 2022, Aaron Lewis wrote: > --- > .../kvm/x86_64/pmu_event_filter_test.c | 344 +++++++++++++++++- > 1 file changed, 342 insertions(+), 2 deletions(-) > > diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c > index 0750e2fa7a38..926c449aac78 100644 > --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c > +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c > @@ -442,6 +442,337 @@ static bool use_amd_pmu(void) > is_zen3(entry->eax)); > } > > +/* > + * "MEM_INST_RETIRED.ALL_LOADS", "MEM_INST_RETIRED.ALL_STORES", and > + * "MEM_INST_RETIRED.ANY" from https://perfmon-events.intel.com/ > + * supported on Intel Xeon processors: > + * - Sapphire Rapids, Ice Lake, Cascade Lake, Skylake. > + */ > +#define MEM_INST_RETIRED 0xD0 > +#define MEM_INST_RETIRED_LOAD EVENT(MEM_INST_RETIRED, 0x81) > +#define MEM_INST_RETIRED_STORE EVENT(MEM_INST_RETIRED, 0x82) > +#define MEM_INST_RETIRED_LOAD_STORE EVENT(MEM_INST_RETIRED, 0x83) > + > +static bool supports_event_mem_inst_retired(void) > +{ > + uint32_t eax, ebx, ecx, edx; > + > + cpuid(1, &eax, &ebx, &ecx, &edx); > + if (x86_family(eax) == 0x6) { > + switch (x86_model(eax)) { > + /* Sapphire Rapids */ > + case 0x8F: I'm not sure which is worse, open coding, or turbostat's rather insane include shenanigans. tools/power/x86/turbostat/Makefile:override CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"' tools/power/x86/turbostat/turbostat.c:#include INTEL_FAMILY_HEADER As a follow-up, can you look into copying arch/x86/include/asm/intel-family.h into tools/arch/x86/include/asm/ and using the #defines here? > + /* Ice Lake */ > + case 0x6A: > + /* Skylake */ > + /* Cascade Lake */ > + case 0x55: > + return true; > + } > + } > + > + return false; > +} > + > +static int num_gp_counters(void) > +{ > + const struct kvm_cpuid_entry2 *entry; > + > + entry = kvm_get_supported_cpuid_entry(0xa); > + union cpuid10_eax eax = { .full = entry->eax }; > + > + return eax.split.num_counters; Rock-Paper-Scissors, loser has to handle the merge conflict? :-) https://lore.kernel.org/all/20221006005125.680782-10-seanjc@xxxxxxxxxx > +static uint64_t masked_events_guest_test(uint32_t msr_base) > +{ > + uint64_t ld0, ld1, st0, st1, ls0, ls1; > + struct perf_counter c; > + int val; A comment here to call out that the counts don't need to be exact would be helpful, i.e. that the goal is purely to ensure a non-zero count when the event is allowed. My initial reaction was that this code would be fragile, e.g. if the compiler throws in extra loads/stores, but the count is a pure pass/fail (which is a very good thing). > + ld0 = rdmsr(msr_base + 0); > + st0 = rdmsr(msr_base + 1); > + ls0 = rdmsr(msr_base + 2); > + > + __asm__ __volatile__("movl $0, %[v];" > + "movl %[v], %%eax;" > + "incl %[v];" > + : [v]"+m"(val) :: "eax"); > + > + ld1 = rdmsr(msr_base + 0); > + st1 = rdmsr(msr_base + 1); > + ls1 = rdmsr(msr_base + 2); > + > + c.loads = ld1 - ld0; > + c.stores = st1 - st0; > + c.loads_stores = ls1 - ls0; > + > + return c.raw;