RE: [PATCH 4/4] KVM: selftests: Add test case for guest and host LBR preemption

"Zhang, Xiong Y" <xiong.y.zhang@xxxxxxxxx> · Thu, 29 Jun 2023 02:52:16 +0000



> 
> What kind of issues you expect this selftest to find?
> 
> IMO, it verifies the generic perf schedule rules but cannot find specific issues.
Current vLBR event break the generic perf schedule rule. So I write the fix commits and selftest to avoid future broken again. 
> 
> e.g., whether the LBR data is corrupted in some cases. If the selftest can verify
> whether
> 
> guest/host data is maintained correctly in a high contention env., that can be
> better to
> 
> sever the purpose of selftest.
Once vLBR event is preempted, I see designing gap and guest should get wrong data, it is out of this commits scope to fix the gap and to verify the result.
I should add this into commit message.

thanks
> 
> 
> On 6/16/2023 7:33 PM, Xiong Zhang wrote:
> > When guest access LBR msr at the first time, kvm will create a vLBR
> > event, vLBR event joins perf scheduler and occupy physical LBR for guest usage.
> > Once vLBR event is active and own LBR, guest could access LBR msr.
> >
> > But vLBR event is per process pinned event, perf has higher priority event:
> > per cpu pinned LBR event, perf has lower priority events also: per cpu
> > LBR event and per process LBR event.
> > So if host doesn't have higher priority per cpu pinned LBR event, vLBR
> > event could occupy physical LBR always. But once per cpu pinned LBR
> > event is active, vLBR event couldn't be active anymore, then guest
> > couldn't access LBR msr.
> >
> > This commit adds test case to cover guest and host lbr contend.
> >
> > Signed-off-by: Xiong Zhang <xiong.y.zhang@xxxxxxxxx>
> > ---
> >   tools/testing/selftests/kvm/Makefile          |   1 +
> >   .../selftests/kvm/include/ucall_common.h      |  17 ++
> >   .../kvm/x86_64/pmu_event_filter_test.c        |  16 --
> >   .../kvm/x86_64/vmx_pmu_lbr_contend.c          | 171 ++++++++++++++++++
> >   4 files changed, 189 insertions(+), 16 deletions(-)
> >   create mode 100644
> > tools/testing/selftests/kvm/x86_64/vmx_pmu_lbr_contend.c
> >
> > diff --git a/tools/testing/selftests/kvm/Makefile
> > b/tools/testing/selftests/kvm/Makefile
> > index 4761b768b773..422bbc16ba2a 100644
> > --- a/tools/testing/selftests/kvm/Makefile
> > +++ b/tools/testing/selftests/kvm/Makefile
> > @@ -100,6 +100,7 @@ TEST_GEN_PROGS_x86_64 +=
> x86_64/vmx_dirty_log_test
> >   TEST_GEN_PROGS_x86_64 +=
> x86_64/vmx_exception_with_invalid_guest_state
> >   TEST_GEN_PROGS_x86_64 += x86_64/vmx_msrs_test
> >   TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state
> > +TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_lbr_contend
> >   TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
> >   TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
> >   TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test
> > diff --git a/tools/testing/selftests/kvm/include/ucall_common.h
> > b/tools/testing/selftests/kvm/include/ucall_common.h
> > index 1a6aaef5ccae..c1bb0cacf390 100644
> > --- a/tools/testing/selftests/kvm/include/ucall_common.h
> > +++ b/tools/testing/selftests/kvm/include/ucall_common.h
> > @@ -35,6 +35,23 @@ void ucall(uint64_t cmd, int nargs, ...);
> >   uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc);
> >   void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa);
> >
> > +/*
> > + * Run the VM to the next GUEST_SYNC(value), and return the value
> > +passed
> > + * to the sync. Any other exit from the guest is fatal.
> > + */
> > +static inline uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu) {
> > +	struct ucall uc;
> > +
> > +	vcpu_run(vcpu);
> > +	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
> > +	get_ucall(vcpu, &uc);
> > +	TEST_ASSERT(uc.cmd == UCALL_SYNC,
> > +		    "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
> > +
> > +	return uc.args[1];
> > +}
> > +
> >   /*
> >    * Perform userspace call without any associated data.  This bare call avoids
> >    * allocating a ucall struct, which can be useful if the atomic
> > operations in diff --git
> > a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
> > b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
> > index 40507ed9fe8a..8c68029cfb4b 100644
> > --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
> > +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
> > @@ -177,22 +177,6 @@ static void amd_guest_code(void)
> >   	}
> >   }
> >
> > -/*
> > - * Run the VM to the next GUEST_SYNC(value), and return the value
> > passed
> > - * to the sync. Any other exit from the guest is fatal.
> > - */
> > -static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu) -{
> > -	struct ucall uc;
> > -
> > -	vcpu_run(vcpu);
> > -	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
> > -	get_ucall(vcpu, &uc);
> > -	TEST_ASSERT(uc.cmd == UCALL_SYNC,
> > -		    "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
> > -	return uc.args[1];
> > -}
> > -
> >   static void run_vcpu_and_sync_pmc_results(struct kvm_vcpu *vcpu)
> >   {
> >   	uint64_t r;
> > diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_lbr_contend.c
> > b/tools/testing/selftests/kvm/x86_64/vmx_pmu_lbr_contend.c
> > new file mode 100644
> > index 000000000000..a6a793f08515
> > --- /dev/null
> > +++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_lbr_contend.c
> > @@ -0,0 +1,171 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Test for host and guest LBR preemption
> > + *
> > + * Copyright (C) 2021 Intel Corporation
> > + *
> > + */
> > +
> > +#define _GNU_SOURCEGG
> > +
> > +#include <linux/perf_event.h>
> > +#include <sys/syscall.h>
> > +#include <sys/sysinfo.h>
> > +#include <unistd.h>
> > +
> > +#include "test_util.h"
> > +#include "kvm_util.h"
> > +#include "processor.h"
> > +
> > +static void create_perf_events(int *fds, int cpu_num, bool pinned) {
> > +	struct perf_event_attr attr = {
> > +		.type = PERF_TYPE_HARDWARE,
> > +		.size = sizeof(attr),
> > +		.config = PERF_COUNT_HW_CPU_CYCLES,
> > +		.sample_type = PERF_SAMPLE_BRANCH_STACK,
> > +		.sample_period = 1000,
> > +		.pinned = pinned,
> > +		.branch_sample_type = PERF_SAMPLE_BRANCH_CALL_STACK |
> > +				      PERF_SAMPLE_BRANCH_USER |
> > +				      PERF_SAMPLE_BRANCH_KERNEL,
> > +	};
> > +	int i;
> > +
> > +	for (i = 0; i < cpu_num; i++) {
> > +		fds[i] = syscall(__NR_perf_event_open, &attr, -1, i, -1,
> PERF_FLAG_FD_CLOEXEC);
> > +		TEST_ASSERT(fds[i] != -1, "Failed to create lbr event on cpu%d",
> i);
> > +	}
> > +}
> > +
> > +static void release_perf_events(int *fds, int cpu_num) {
> > +	int i;
> > +
> > +	for (i = 0; i < cpu_num; i++)
> > +		close(fds[i]);
> > +}
> > +
> > +#define PERF_CAP_LBR_FMT_MASK  0x1F
> > +
> > +#define LBR_NOT_SUPPORTED  0xFFFE
> > +#define LBR_MSR_WRITE_ERROR 0xFFFD
> > +
> > +#define LBR_MODE_CHECK_PASS 0x0
> > +#define LBR_MSR_WRITE_SUCC  0x1
> > +
> > +static bool check_lbr_msr(void)
> > +{
> > +	uint64_t v, old_val;
> > +
> > +	old_val = rdmsr(MSR_LBR_TOS);
> > +
> > +	v  = old_val ^ 0x3UL;
> > +
> > +	wrmsr(MSR_LBR_TOS, v);
> > +	if (rdmsr(MSR_LBR_TOS) != v)
> > +		return false;
> > +
> > +	wrmsr(MSR_LBR_TOS, old_val);
> > +	if (rdmsr(MSR_LBR_TOS) != old_val)
> > +		return false;
> > +
> > +	return true;
> > +}
> > +
> > +static void guest_code(void)
> > +{
> > +	uint64_t v;
> > +
> > +	v = rdmsr(MSR_IA32_PERF_CAPABILITIES);
> > +	if ((v & PERF_CAP_LBR_FMT_MASK) == 0)
> > +		GUEST_SYNC(LBR_NOT_SUPPORTED);
> > +
> > +	GUEST_SYNC(LBR_MODE_CHECK_PASS);
> > +
> > +	while (1) {
> > +		if (!check_lbr_msr()) {
> > +			GUEST_SYNC(LBR_MSR_WRITE_ERROR);
> > +			continue;
> > +		}
> > +
> > +		/* Enable LBR to avoid KVM recyling LBR. */
> > +		 v = rdmsr(MSR_IA32_DEBUGCTLMSR);
> > +		 v |= DEBUGCTLMSR_LBR;
> > +		 wrmsr(MSR_IA32_DEBUGCTLMSR, v);
> > +
> > +		GUEST_SYNC(LBR_MSR_WRITE_SUCC);
> > +	}
> > +}
> > +
> > +int main(int argc, char *argv[])
> > +{
> > +	int *fds, ncpus;
> > +	struct kvm_vcpu *vcpu;
> > +	struct kvm_vm *vm;
> > +	uint64_t r;
> > +
> > +	TEST_REQUIRE(get_kvm_param_bool("enable_pmu"));
> > +	TEST_REQUIRE(host_cpu_is_intel);
> > +	TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION));
> > +
> > +	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
> > +	r = run_vcpu_to_sync(vcpu);
> > +	TEST_ASSERT(r == LBR_MODE_CHECK_PASS,
> > +		    "LBR format in guest PERF_CAP msr isn't correct");
> > +
> > +	ncpus = get_nprocs();
> > +	fds = malloc(sizeof(int) * ncpus);
> > +	TEST_ASSERT(fds != NULL, "Failed to create fds for all cpus");
> > +
> > +	/* Create per cpu pinned LBR event, then it will own LBR. */
> > +	create_perf_events(fds, ncpus, true);
> > +
> > +	/* Since LBR is owned by per cpu pinned LBR event, guest couldn't get it,
> > +	 * so guest couldn't access LBR_TOS msr.
> > +	 */
> > +	r = run_vcpu_to_sync(vcpu);
> > +	TEST_ASSERT(r == LBR_MSR_WRITE_ERROR,
> > +		    "1. Unexpected successfully read/write guest LBR_TO msr");
> > +
> > +	release_perf_events(fds, ncpus);
> > +
> > +	/* Since per cpu pinned event is closed and LBR is free, guest could get it,
> > +	 * so guest could access LBR_TOS msr.
> > +	 */
> > +	r = run_vcpu_to_sync(vcpu);
> > +	TEST_ASSERT(r == LBR_MSR_WRITE_SUCC,
> > +		    "2. Failed to read/write guest LBR_TO msr");
> > +
> > +	/* Create per cpu LBR event, its priority is lower than vLBR event, and it
> > +	 *  couldn't get LBR back from vLBR
> > +	 */
> > +	create_perf_events(fds, ncpus, false);
> > +
> > +	/* LBR is still owned by guest, So guest could access LBR_TOS
> successfully. */
> > +	r = run_vcpu_to_sync(vcpu);
> > +	TEST_ASSERT(r == LBR_MSR_WRITE_SUCC,
> > +		    "3. Failed read/write guest LBR_TO msr");
> > +
> > +	release_perf_events(fds, ncpus);
> > +
> > +	/* Create per cpu pinned LBR event, its priority is higher than vLBR event,
> > +	 * so it will get LBR back from vLBR.
> > +	 */
> > +	create_perf_events(fds, ncpus, true);
> > +
> > +	/* LBR is preepmted by per cpu pinned LBR event, guest couldn't access
> > +	 * LBR_TOS msr.
> > +	 */
> > +	r = run_vcpu_to_sync(vcpu);
> > +	TEST_ASSERT(r == LBR_MSR_WRITE_ERROR,
> > +		    "4. Unexpected successfully read/write guest LBR_TO msr");
> > +
> > +	release_perf_events(fds, ncpus);
> > +
> > +	kvm_vm_free(vm);
> > +
> > +	free(fds);
> > +
> > +	return 0;
> > +}