Re: [PATCH v2 3/8] KVM: selftests: Test Intel PMU architectural events on gp counters

Jim Mattson <jmattson@xxxxxxxxxx> · Wed, 28 Jun 2023 14:03:13 -0700

On Wed, Jun 28, 2023 at 1:44 PM Sean Christopherson <seanjc@xxxxxxxxxx> wrote:
>
> On Tue, May 30, 2023, Jinrong Liang wrote:
> > +/* Guest payload for any performance counter counting */
> > +#define NUM_BRANCHES 10
> > +
> > +static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
> > +                                               void *guest_code)
> > +{
> > +     struct kvm_vm *vm;
> > +
> > +     vm = vm_create_with_one_vcpu(vcpu, guest_code);
> > +     vm_init_descriptor_tables(vm);
> > +     vcpu_init_descriptor_tables(*vcpu);
> > +
> > +     return vm;
> > +}
> > +
> > +static uint64_t run_vcpu(struct kvm_vcpu *vcpu, uint64_t *ucall_arg)
> > +{
> > +     struct ucall uc;
> > +
> > +     vcpu_run(vcpu);
> > +     switch (get_ucall(vcpu, &uc)) {
> > +     case UCALL_SYNC:
> > +             *ucall_arg = uc.args[1];
> > +             break;
> > +     case UCALL_DONE:
> > +             break;
> > +     default:
> > +             TEST_ASSERT(false, "Unexpected exit: %s",
> > +                         exit_reason_str(vcpu->run->exit_reason));
>
> TEST_FAIL()
>
> > +     }
> > +     return uc.cmd;
> > +}
> > +
> > +static void intel_guest_run_arch_event(uint8_t version, uint8_t max_gp_num,
>
> Unless I'm mistaken, this isn't specific to arch events.  And with a bit of
> massaging, it doesn't need to be Intel specific.  Typically we try to avoid
> speculatively creating infrastructure, but in this case we *know* AMD has vPMU
> support, and we *know* from KVM-Unit-Tests that accounting for the differences
> between MSRs on Intel vs. AMD is doable, so we should write code with an eye
> toward supporting both AMD and Intel.
>
> And then we can avoid having to prefix so many functions with "intel", e.g. this
> can be something like
>
>   static void guest_measure_loop()
>
> or whatever.
>
> > +                                    uint32_t ctr_base_msr, uint64_t evt_code)
> > +{
> > +     uint32_t global_msr = MSR_CORE_PERF_GLOBAL_CTRL;
> > +     unsigned int i;
> > +
> > +     for (i = 0; i < max_gp_num; i++) {
> > +             wrmsr(ctr_base_msr + i, 0);
> > +             wrmsr(MSR_P6_EVNTSEL0 + i, EVENTSEL_OS | EVENTSEL_EN | evt_code);
> > +             if (version > 1)
> > +                     wrmsr(global_msr, BIT_ULL(i));
> > +
> > +             __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
> > +
> > +             if (version > 1)
> > +                     wrmsr(global_msr, 0);
> > +
> > +             GUEST_SYNC(_rdpmc(i));
> > +     }
> > +
> > +     GUEST_DONE();
> > +}
> > +
> > +static void test_arch_events_cpuid(struct kvm_vcpu *vcpu, uint8_t evt_vector,
>
> "vector" is confusing, as "vector" usually refers to a vector number, e.g. for
> IRQs and exceptions.  This is the _length_ of a so called vector.  I vote to ignore
> the SDM's use of "vector" in this case and instead call it something like
> arch_events_bitmap_size.  And then arch_events_unavailable_mask?
>
> > +                                uint8_t unavl_mask, uint8_t idx)
> > +{
> > +     struct kvm_cpuid_entry2 *entry;
> > +     uint32_t ctr_msr = MSR_IA32_PERFCTR0;
> > +     bool is_supported;
> > +     uint64_t counter_val = 0;
> > +
> > +     entry = vcpu_get_cpuid_entry(vcpu, 0xa);
> > +     entry->eax = (entry->eax & ~EVT_LEN_MASK) |
> > +             (evt_vector << EVT_LEN_OFS_BIT);
>
> EVT_LEN_OFS_BIT can be a KVM_x86_PROPERTY.  And please also add a helper to set
> properties, the whole point of the FEATURE and PROPERTY frameworks is to avoid
> open coding CPUID manipulations.  E.g.
>
> static inline void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
>                                            struct kvm_x86_cpu_property property,
>                                            uint32_t value)
> {
>         ...
> }
>
> > +     entry->ebx = (entry->ebx & ~EVENTS_MASK) | unavl_mask;
> > +     vcpu_set_cpuid(vcpu);
> > +
> > +     if (vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
> > +             ctr_msr = MSR_IA32_PMC0;
>
> This can be done in the guest, no?
>
> > +
> > +     /* Arch event x is supported if EBX[x]=0 && EAX[31:24]>x */
> > +     is_supported = !(entry->ebx & BIT_ULL(idx)) &&
> > +             (((entry->eax & EVT_LEN_MASK) >> EVT_LEN_OFS_BIT) > idx);
>
> Please add a helper for this.
>
> > +
> > +     vcpu_args_set(vcpu, 4, X86_INTEL_PMU_VERSION, X86_INTEL_MAX_GP_CTR_NUM,
> > +                   ctr_msr, arch_events[idx]);
> > +
> > +     while (run_vcpu(vcpu, &counter_val) != UCALL_DONE)
> > +             TEST_ASSERT(is_supported == !!counter_val,
> > +                         "Unavailable arch event is counting.");
> > +}
> > +
> > +static void intel_check_arch_event_is_unavl(uint8_t idx)
> > +{
> > +     uint8_t eax_evt_vec, ebx_unavl_mask, i, j;
> > +     struct kvm_vcpu *vcpu;
> > +     struct kvm_vm *vm;
> > +
> > +     /*
> > +      * A brute force iteration of all combinations of values is likely to
> > +      * exhaust the limit of the single-threaded thread fd nums, so it's
> > +      * tested here by iterating through all valid values on a single bit.
> > +      */
> > +     for (i = 0; i < ARRAY_SIZE(arch_events); i++) {
> > +             eax_evt_vec = BIT_ULL(i);
> > +             for (j = 0; j < ARRAY_SIZE(arch_events); j++) {
> > +                     ebx_unavl_mask = BIT_ULL(j);
> > +                     vm = pmu_vm_create_with_one_vcpu(&vcpu,
> > +                                                      intel_guest_run_arch_event);
> > +                     test_arch_events_cpuid(vcpu, eax_evt_vec,
> > +                                            ebx_unavl_mask, idx);
> > +
> > +                     kvm_vm_free(vm);
> > +             }
> > +     }
> > +}
> > +
> > +static void intel_test_arch_events(void)
> > +{
> > +     uint8_t idx;
> > +
> > +     for (idx = 0; idx < ARRAY_SIZE(arch_events); idx++) {
> > +             /*
> > +              * Given the stability of performance event recurrence,
> > +              * only these arch events are currently being tested:
> > +              *
> > +              * - Core cycle event (idx = 0)
> > +              * - Instruction retired event (idx = 1)
> > +              * - Reference cycles event (idx = 2)
> > +              * - Branch instruction retired event (idx = 5)
> > +              *
> > +              * Note that reference cycles is one event that actually cannot
> > +              * be successfully virtualized.
> > +              */

Actually, there is no reason that reference cycles can't be
successfully virtualized. It just can't be done with the current vPMU
infrastructure.

> > +             if (idx > 2 && idx != 5)
>
> As request in a previous patch, use enums, then the need to document the magic
> numbers goes away.
>
> > +                     continue;
> > +
> > +             intel_check_arch_event_is_unavl(idx);
> > +     }
> > +}
> > +
> > +static void intel_test_pmu_cpuid(void)
> > +{
> > +     intel_test_arch_events();
>
> Either put the Intel-specific TEST_REQUIRE()s in here, or open code the calls.
> Adding a helper and then splitting code across the helper and its sole caller is
> unnecessary.
>
> > +}
> > +
> > +int main(int argc, char *argv[])
> > +{
> > +     TEST_REQUIRE(get_kvm_param_bool("enable_pmu"));
> > +
> > +     if (host_cpu_is_intel) {
>
> Presumably AMD will be supported at some point, but until then, this needs to be
>
>         TEST_REQUIRE(host_cpu_is_intel);
>
> > +             TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
> > +             TEST_REQUIRE(X86_INTEL_PMU_VERSION > 0);
> > +             TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM));
> > +
> > +             intel_test_pmu_cpuid();
> > +     }
> > +
> > +     return 0;
> > +}
> > --
> > 2.31.1
> >