> > What kind of issues you expect this selftest to find? > > IMO, it verifies the generic perf schedule rules but cannot find specific issues. Current vLBR event break the generic perf schedule rule. So I write the fix commits and selftest to avoid future broken again. > > e.g., whether the LBR data is corrupted in some cases. If the selftest can verify > whether > > guest/host data is maintained correctly in a high contention env., that can be > better to > > sever the purpose of selftest. Once vLBR event is preempted, I see designing gap and guest should get wrong data, it is out of this commits scope to fix the gap and to verify the result. I should add this into commit message. thanks > > > On 6/16/2023 7:33 PM, Xiong Zhang wrote: > > When guest access LBR msr at the first time, kvm will create a vLBR > > event, vLBR event joins perf scheduler and occupy physical LBR for guest usage. > > Once vLBR event is active and own LBR, guest could access LBR msr. > > > > But vLBR event is per process pinned event, perf has higher priority event: > > per cpu pinned LBR event, perf has lower priority events also: per cpu > > LBR event and per process LBR event. > > So if host doesn't have higher priority per cpu pinned LBR event, vLBR > > event could occupy physical LBR always. But once per cpu pinned LBR > > event is active, vLBR event couldn't be active anymore, then guest > > couldn't access LBR msr. > > > > This commit adds test case to cover guest and host lbr contend. > > > > Signed-off-by: Xiong Zhang <xiong.y.zhang@xxxxxxxxx> > > --- > > tools/testing/selftests/kvm/Makefile | 1 + > > .../selftests/kvm/include/ucall_common.h | 17 ++ > > .../kvm/x86_64/pmu_event_filter_test.c | 16 -- > > .../kvm/x86_64/vmx_pmu_lbr_contend.c | 171 ++++++++++++++++++ > > 4 files changed, 189 insertions(+), 16 deletions(-) > > create mode 100644 > > tools/testing/selftests/kvm/x86_64/vmx_pmu_lbr_contend.c > > > > diff --git a/tools/testing/selftests/kvm/Makefile > > b/tools/testing/selftests/kvm/Makefile > > index 4761b768b773..422bbc16ba2a 100644 > > --- a/tools/testing/selftests/kvm/Makefile > > +++ b/tools/testing/selftests/kvm/Makefile > > @@ -100,6 +100,7 @@ TEST_GEN_PROGS_x86_64 += > x86_64/vmx_dirty_log_test > > TEST_GEN_PROGS_x86_64 += > x86_64/vmx_exception_with_invalid_guest_state > > TEST_GEN_PROGS_x86_64 += x86_64/vmx_msrs_test > > TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state > > +TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_lbr_contend > > TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test > > TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test > > TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test > > diff --git a/tools/testing/selftests/kvm/include/ucall_common.h > > b/tools/testing/selftests/kvm/include/ucall_common.h > > index 1a6aaef5ccae..c1bb0cacf390 100644 > > --- a/tools/testing/selftests/kvm/include/ucall_common.h > > +++ b/tools/testing/selftests/kvm/include/ucall_common.h > > @@ -35,6 +35,23 @@ void ucall(uint64_t cmd, int nargs, ...); > > uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc); > > void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa); > > > > +/* > > + * Run the VM to the next GUEST_SYNC(value), and return the value > > +passed > > + * to the sync. Any other exit from the guest is fatal. > > + */ > > +static inline uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu) { > > + struct ucall uc; > > + > > + vcpu_run(vcpu); > > + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); > > + get_ucall(vcpu, &uc); > > + TEST_ASSERT(uc.cmd == UCALL_SYNC, > > + "Received ucall other than UCALL_SYNC: %lu", uc.cmd); > > + > > + return uc.args[1]; > > +} > > + > > /* > > * Perform userspace call without any associated data. This bare call avoids > > * allocating a ucall struct, which can be useful if the atomic > > operations in diff --git > > a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c > > b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c > > index 40507ed9fe8a..8c68029cfb4b 100644 > > --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c > > +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c > > @@ -177,22 +177,6 @@ static void amd_guest_code(void) > > } > > } > > > > -/* > > - * Run the VM to the next GUEST_SYNC(value), and return the value > > passed > > - * to the sync. Any other exit from the guest is fatal. > > - */ > > -static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu) -{ > > - struct ucall uc; > > - > > - vcpu_run(vcpu); > > - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); > > - get_ucall(vcpu, &uc); > > - TEST_ASSERT(uc.cmd == UCALL_SYNC, > > - "Received ucall other than UCALL_SYNC: %lu", uc.cmd); > > - return uc.args[1]; > > -} > > - > > static void run_vcpu_and_sync_pmc_results(struct kvm_vcpu *vcpu) > > { > > uint64_t r; > > diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_lbr_contend.c > > b/tools/testing/selftests/kvm/x86_64/vmx_pmu_lbr_contend.c > > new file mode 100644 > > index 000000000000..a6a793f08515 > > --- /dev/null > > +++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_lbr_contend.c > > @@ -0,0 +1,171 @@ > > +// SPDX-License-Identifier: GPL-2.0 > > +/* > > + * Test for host and guest LBR preemption > > + * > > + * Copyright (C) 2021 Intel Corporation > > + * > > + */ > > + > > +#define _GNU_SOURCEGG > > + > > +#include <linux/perf_event.h> > > +#include <sys/syscall.h> > > +#include <sys/sysinfo.h> > > +#include <unistd.h> > > + > > +#include "test_util.h" > > +#include "kvm_util.h" > > +#include "processor.h" > > + > > +static void create_perf_events(int *fds, int cpu_num, bool pinned) { > > + struct perf_event_attr attr = { > > + .type = PERF_TYPE_HARDWARE, > > + .size = sizeof(attr), > > + .config = PERF_COUNT_HW_CPU_CYCLES, > > + .sample_type = PERF_SAMPLE_BRANCH_STACK, > > + .sample_period = 1000, > > + .pinned = pinned, > > + .branch_sample_type = PERF_SAMPLE_BRANCH_CALL_STACK | > > + PERF_SAMPLE_BRANCH_USER | > > + PERF_SAMPLE_BRANCH_KERNEL, > > + }; > > + int i; > > + > > + for (i = 0; i < cpu_num; i++) { > > + fds[i] = syscall(__NR_perf_event_open, &attr, -1, i, -1, > PERF_FLAG_FD_CLOEXEC); > > + TEST_ASSERT(fds[i] != -1, "Failed to create lbr event on cpu%d", > i); > > + } > > +} > > + > > +static void release_perf_events(int *fds, int cpu_num) { > > + int i; > > + > > + for (i = 0; i < cpu_num; i++) > > + close(fds[i]); > > +} > > + > > +#define PERF_CAP_LBR_FMT_MASK 0x1F > > + > > +#define LBR_NOT_SUPPORTED 0xFFFE > > +#define LBR_MSR_WRITE_ERROR 0xFFFD > > + > > +#define LBR_MODE_CHECK_PASS 0x0 > > +#define LBR_MSR_WRITE_SUCC 0x1 > > + > > +static bool check_lbr_msr(void) > > +{ > > + uint64_t v, old_val; > > + > > + old_val = rdmsr(MSR_LBR_TOS); > > + > > + v = old_val ^ 0x3UL; > > + > > + wrmsr(MSR_LBR_TOS, v); > > + if (rdmsr(MSR_LBR_TOS) != v) > > + return false; > > + > > + wrmsr(MSR_LBR_TOS, old_val); > > + if (rdmsr(MSR_LBR_TOS) != old_val) > > + return false; > > + > > + return true; > > +} > > + > > +static void guest_code(void) > > +{ > > + uint64_t v; > > + > > + v = rdmsr(MSR_IA32_PERF_CAPABILITIES); > > + if ((v & PERF_CAP_LBR_FMT_MASK) == 0) > > + GUEST_SYNC(LBR_NOT_SUPPORTED); > > + > > + GUEST_SYNC(LBR_MODE_CHECK_PASS); > > + > > + while (1) { > > + if (!check_lbr_msr()) { > > + GUEST_SYNC(LBR_MSR_WRITE_ERROR); > > + continue; > > + } > > + > > + /* Enable LBR to avoid KVM recyling LBR. */ > > + v = rdmsr(MSR_IA32_DEBUGCTLMSR); > > + v |= DEBUGCTLMSR_LBR; > > + wrmsr(MSR_IA32_DEBUGCTLMSR, v); > > + > > + GUEST_SYNC(LBR_MSR_WRITE_SUCC); > > + } > > +} > > + > > +int main(int argc, char *argv[]) > > +{ > > + int *fds, ncpus; > > + struct kvm_vcpu *vcpu; > > + struct kvm_vm *vm; > > + uint64_t r; > > + > > + TEST_REQUIRE(get_kvm_param_bool("enable_pmu")); > > + TEST_REQUIRE(host_cpu_is_intel); > > + TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION)); > > + > > + vm = vm_create_with_one_vcpu(&vcpu, guest_code); > > + r = run_vcpu_to_sync(vcpu); > > + TEST_ASSERT(r == LBR_MODE_CHECK_PASS, > > + "LBR format in guest PERF_CAP msr isn't correct"); > > + > > + ncpus = get_nprocs(); > > + fds = malloc(sizeof(int) * ncpus); > > + TEST_ASSERT(fds != NULL, "Failed to create fds for all cpus"); > > + > > + /* Create per cpu pinned LBR event, then it will own LBR. */ > > + create_perf_events(fds, ncpus, true); > > + > > + /* Since LBR is owned by per cpu pinned LBR event, guest couldn't get it, > > + * so guest couldn't access LBR_TOS msr. > > + */ > > + r = run_vcpu_to_sync(vcpu); > > + TEST_ASSERT(r == LBR_MSR_WRITE_ERROR, > > + "1. Unexpected successfully read/write guest LBR_TO msr"); > > + > > + release_perf_events(fds, ncpus); > > + > > + /* Since per cpu pinned event is closed and LBR is free, guest could get it, > > + * so guest could access LBR_TOS msr. > > + */ > > + r = run_vcpu_to_sync(vcpu); > > + TEST_ASSERT(r == LBR_MSR_WRITE_SUCC, > > + "2. Failed to read/write guest LBR_TO msr"); > > + > > + /* Create per cpu LBR event, its priority is lower than vLBR event, and it > > + * couldn't get LBR back from vLBR > > + */ > > + create_perf_events(fds, ncpus, false); > > + > > + /* LBR is still owned by guest, So guest could access LBR_TOS > successfully. */ > > + r = run_vcpu_to_sync(vcpu); > > + TEST_ASSERT(r == LBR_MSR_WRITE_SUCC, > > + "3. Failed read/write guest LBR_TO msr"); > > + > > + release_perf_events(fds, ncpus); > > + > > + /* Create per cpu pinned LBR event, its priority is higher than vLBR event, > > + * so it will get LBR back from vLBR. > > + */ > > + create_perf_events(fds, ncpus, true); > > + > > + /* LBR is preepmted by per cpu pinned LBR event, guest couldn't access > > + * LBR_TOS msr. > > + */ > > + r = run_vcpu_to_sync(vcpu); > > + TEST_ASSERT(r == LBR_MSR_WRITE_ERROR, > > + "4. Unexpected successfully read/write guest LBR_TO msr"); > > + > > + release_perf_events(fds, ncpus); > > + > > + kvm_vm_free(vm); > > + > > + free(fds); > > + > > + return 0; > > +}