Sean Christopherson <seanjc@xxxxxxxxxx> 于2023年11月7日周二 04:40写道: > > On Mon, Nov 06, 2023, JinrongLiang wrote: > > 在 2023/11/4 21:20, Jim Mattson 写道: > > > > diff --git a/tools/testing/selftests/kvm/include/pmu.h b/tools/testing/selftests/kvm/include/pmu.h > > > > new file mode 100644 > > > > index 000000000000..987602c62b51 > > > > --- /dev/null > > > > +++ b/tools/testing/selftests/kvm/include/pmu.h > > > > @@ -0,0 +1,84 @@ > > > > +/* SPDX-License-Identifier: GPL-2.0-only */ > > > > +/* > > > > + * Copyright (C) 2023, Tencent, Inc. > > > > + */ > > > > +#ifndef SELFTEST_KVM_PMU_H > > > > +#define SELFTEST_KVM_PMU_H > > > > + > > > > +#include <stdint.h> > > > > + > > > > +#define X86_PMC_IDX_MAX 64 > > > > +#define INTEL_PMC_MAX_GENERIC 32 > > > > > > I think this is actually 15. Note that IA32_PMC0 through IA32_PMC7 > > > have MSR indices from 0xc1 through 0xc8, and MSR 0xcf is > > > IA32_CORE_CAPABILITIES. At the very least, we have to handle > > > non-contiguous MSR indices if we ever go beyond IA32_PMC14. > > There's no reason to define this, it's not used in selftests. > > > > > +#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300 > > > > + > > > > +#define GP_COUNTER_NR_OFS_BIT 8 > > > > +#define EVENT_LENGTH_OFS_BIT 24 > > > > + > > > > +#define PMU_VERSION_MASK GENMASK_ULL(7, 0) > > > > +#define EVENT_LENGTH_MASK GENMASK_ULL(31, EVENT_LENGTH_OFS_BIT) > > > > +#define GP_COUNTER_NR_MASK GENMASK_ULL(15, GP_COUNTER_NR_OFS_BIT) > > > > +#define FIXED_COUNTER_NR_MASK GENMASK_ULL(4, 0) > > These are also unneeded, they're superseded by CPUID properties. > > > > > +#define ARCH_PERFMON_EVENTSEL_EVENT GENMASK_ULL(7, 0) > > > > +#define ARCH_PERFMON_EVENTSEL_UMASK GENMASK_ULL(15, 8) > > > > +#define ARCH_PERFMON_EVENTSEL_USR BIT_ULL(16) > > > > +#define ARCH_PERFMON_EVENTSEL_OS BIT_ULL(17) > > > > +#define ARCH_PERFMON_EVENTSEL_EDGE BIT_ULL(18) > > > > +#define ARCH_PERFMON_EVENTSEL_PIN_CONTROL BIT_ULL(19) > > > > +#define ARCH_PERFMON_EVENTSEL_INT BIT_ULL(20) > > > > +#define ARCH_PERFMON_EVENTSEL_ANY BIT_ULL(21) > > > > +#define ARCH_PERFMON_EVENTSEL_ENABLE BIT_ULL(22) > > > > +#define ARCH_PERFMON_EVENTSEL_INV BIT_ULL(23) > > > > +#define ARCH_PERFMON_EVENTSEL_CMASK GENMASK_ULL(31, 24) > > > > + > > > > +#define PMC_MAX_FIXED 16 > > Also unneeded. > > > > > +#define PMC_IDX_FIXED 32 > > This one is absolutely ridiculous. It's the shift for the enable bit in global > control, which is super obvious from the name. /s > > > > > + > > > > +/* RDPMC offset for Fixed PMCs */ > > > > +#define PMC_FIXED_RDPMC_BASE BIT_ULL(30) > > > > +#define PMC_FIXED_RDPMC_METRICS BIT_ULL(29) > > > > + > > > > +#define FIXED_BITS_MASK 0xFULL > > > > +#define FIXED_BITS_STRIDE 4 > > > > +#define FIXED_0_KERNEL BIT_ULL(0) > > > > +#define FIXED_0_USER BIT_ULL(1) > > > > +#define FIXED_0_ANYTHREAD BIT_ULL(2) > > > > +#define FIXED_0_ENABLE_PMI BIT_ULL(3) > > > > + > > > > +#define fixed_bits_by_idx(_idx, _bits) \ > > > > + ((_bits) << ((_idx) * FIXED_BITS_STRIDE)) > > *sigh* And now I see where the "i * 4" stuff in the new test comes from. My > plan is to redo the above as: > > /* RDPMC offset for Fixed PMCs */ > #define FIXED_PMC_RDPMC_METRICS BIT_ULL(29) > #define FIXED_PMC_RDPMC_BASE BIT_ULL(30) > > #define FIXED_PMC_GLOBAL_CTRL_ENABLE(_idx) BIT_ULL((32 + (_idx))) > > #define FIXED_PMC_KERNEL BIT_ULL(0) > #define FIXED_PMC_USER BIT_ULL(1) > #define FIXED_PMC_ANYTHREAD BIT_ULL(2) > #define FIXED_PMC_ENABLE_PMI BIT_ULL(3) > #define FIXED_PMC_NR_BITS 4 > #define FIXED_PMC_CTRL(_idx, _val) ((_val) << ((_idx) * FIXED_PMC_NR_BITS)) > > > > > +#define AMD64_NR_COUNTERS 4 > > > > +#define AMD64_NR_COUNTERS_CORE 6 > > These too can be dropped for now. > > > > > +#define PMU_CAP_FW_WRITES BIT_ULL(13) > > > > +#define PMU_CAP_LBR_FMT 0x3f > > > > + > > > > +enum intel_pmu_architectural_events { > > > > + /* > > > > + * The order of the architectural events matters as support for each > > > > + * event is enumerated via CPUID using the index of the event. > > > > + */ > > > > + INTEL_ARCH_CPU_CYCLES, > > > > + INTEL_ARCH_INSTRUCTIONS_RETIRED, > > > > + INTEL_ARCH_REFERENCE_CYCLES, > > > > + INTEL_ARCH_LLC_REFERENCES, > > > > + INTEL_ARCH_LLC_MISSES, > > > > + INTEL_ARCH_BRANCHES_RETIRED, > > > > + INTEL_ARCH_BRANCHES_MISPREDICTED, > > > > + NR_INTEL_ARCH_EVENTS, > > > > +}; > > > > + > > > > +enum amd_pmu_k7_events { > > > > + AMD_ZEN_CORE_CYCLES, > > > > + AMD_ZEN_INSTRUCTIONS, > > > > + AMD_ZEN_BRANCHES, > > > > + AMD_ZEN_BRANCH_MISSES, > > > > + NR_AMD_ARCH_EVENTS, > > > > +}; > > > > + > > > > +extern const uint64_t intel_pmu_arch_events[]; > > > > +extern const uint64_t amd_pmu_arch_events[]; > > > > > > AMD doesn't define *any* architectural events. Perhaps > > > amd_pmu_zen_events[], though who knows what Zen5 and beyond will > > > bring? > > > > > > > +extern const int intel_pmu_fixed_pmc_events[]; > > > > + > > > > +#endif /* SELFTEST_KVM_PMU_H */ > > > > diff --git a/tools/testing/selftests/kvm/lib/pmu.c b/tools/testing/selftests/kvm/lib/pmu.c > > > > new file mode 100644 > > > > index 000000000000..27a6c35f98a1 > > > > --- /dev/null > > > > +++ b/tools/testing/selftests/kvm/lib/pmu.c > > > > @@ -0,0 +1,28 @@ > > > > +// SPDX-License-Identifier: GPL-2.0-only > > > > +/* > > > > + * Copyright (C) 2023, Tencent, Inc. > > > > + */ > > > > + > > > > +#include <stdint.h> > > > > + > > > > +#include "pmu.h" > > > > + > > > > +/* Definitions for Architectural Performance Events */ > > > > +#define ARCH_EVENT(select, umask) (((select) & 0xff) | ((umask) & 0xff) << 8) > > > > > > There's nothing architectural about this. Perhaps RAW_EVENT() for > > > consistency with perf? > > Works for me. > > > > > +const uint64_t intel_pmu_arch_events[] = { > > > > + [INTEL_ARCH_CPU_CYCLES] = ARCH_EVENT(0x3c, 0x0), > > > > + [INTEL_ARCH_INSTRUCTIONS_RETIRED] = ARCH_EVENT(0xc0, 0x0), > > > > + [INTEL_ARCH_REFERENCE_CYCLES] = ARCH_EVENT(0x3c, 0x1), > > > > + [INTEL_ARCH_LLC_REFERENCES] = ARCH_EVENT(0x2e, 0x4f), > > > > + [INTEL_ARCH_LLC_MISSES] = ARCH_EVENT(0x2e, 0x41), > > > > + [INTEL_ARCH_BRANCHES_RETIRED] = ARCH_EVENT(0xc4, 0x0), > > > > + [INTEL_ARCH_BRANCHES_MISPREDICTED] = ARCH_EVENT(0xc5, 0x0), > > > > > > [INTEL_ARCH_TOPDOWN_SLOTS] = ARCH_EVENT(0xa4, 1), > > ... > > > > > @@ -63,7 +50,6 @@ > > > > > > > > #define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0) > > > > > > Now AMD_ZEN_BRANCHES, above? > > > > Yes, I forgot to replace INTEL_BR_RETIRED, AMD_ZEN_BR_RETIRED and > > INST_RETIRED in pmu_event_filter_test.c and remove their macro definitions. > > Having to go through an array to get a hardcoded value is silly, e.g. it makes > it unnecessarily difficult to reference the encodings because they aren't simple > literals. > > My vote is this: > > #define INTEL_ARCH_CPU_CYCLES RAW_EVENT(0x3c, 0x00) > #define INTEL_ARCH_INSTRUCTIONS_RETIRED RAW_EVENT(0xc0, 0x00) > #define INTEL_ARCH_REFERENCE_CYCLES RAW_EVENT(0x3c, 0x01) > #define INTEL_ARCH_LLC_REFERENCES RAW_EVENT(0x2e, 0x4f) > #define INTEL_ARCH_LLC_MISSES RAW_EVENT(0x2e, 0x41) > #define INTEL_ARCH_BRANCHES_RETIRED RAW_EVENT(0xc4, 0x00) > #define INTEL_ARCH_BRANCHES_MISPREDICTED RAW_EVENT(0xc5, 0x00) > #define INTEL_ARCH_TOPDOWN_SLOTS RAW_EVENT(0xa4, 0x01) > > #define AMD_ZEN_CORE_CYCLES RAW_EVENT(0x76, 0x00) > #define AMD_ZEN_INSTRUCTIONS_RETIRED RAW_EVENT(0xc0, 0x00) > #define AMD_ZEN_BRANCHES_RETIRED RAW_EVENT(0xc2, 0x00) > #define AMD_ZEN_BRANCHES_MISPREDICTED RAW_EVENT(0xc3, 0x00) > > /* > * Note! The order and thus the index of the architectural events matters as > * support for each event is enumerated via CPUID using the index of the event. > */ > enum intel_pmu_architectural_events { > INTEL_ARCH_CPU_CYCLES_INDEX, > INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX, > INTEL_ARCH_REFERENCE_CYCLES_INDEX, > INTEL_ARCH_LLC_REFERENCES_INDEX, > INTEL_ARCH_LLC_MISSES_INDEX, > INTEL_ARCH_BRANCHES_RETIRED_INDEX, > INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX, > INTEL_ARCH_TOPDOWN_SLOTS_INDEX, > NR_INTEL_ARCH_EVENTS, > }; > > enum amd_pmu_zen_events { > AMD_ZEN_CORE_CYCLES_INDEX, > AMD_ZEN_INSTRUCTIONS_INDEX, > AMD_ZEN_BRANCHES_INDEX, > AMD_ZEN_BRANCH_MISSES_INDEX, > NR_AMD_ZEN_EVENTS, > }; > > extern const uint64_t intel_pmu_arch_events[]; > extern const uint64_t amd_pmu_zen_events[]; > > ... > > > const uint64_t intel_pmu_arch_events[] = { > INTEL_ARCH_CPU_CYCLES, > INTEL_ARCH_INSTRUCTIONS_RETIRED, > INTEL_ARCH_REFERENCE_CYCLES, > INTEL_ARCH_LLC_REFERENCES, > INTEL_ARCH_LLC_MISSES, > INTEL_ARCH_BRANCHES_RETIRED, > INTEL_ARCH_BRANCHES_MISPREDICTED, > INTEL_ARCH_TOPDOWN_SLOTS, > }; > kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS); > > const uint64_t amd_pmu_zen_events[] = { > AMD_ZEN_CORE_CYCLES, > AMD_ZEN_INSTRUCTIONS_RETIRED, > AMD_ZEN_BRANCHES_RETIRED, > AMD_ZEN_BRANCHES_MISPREDICTED, > }; > kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS); LGTM, thanks.