On Tue, 2021-05-11 at 15:47 +0300, Maxim Levitsky wrote: > On Tue, 2021-05-11 at 11:16 +0000, Stamatis, Ilias wrote: > > On Mon, 2021-05-10 at 16:59 +0300, Maxim Levitsky wrote: > > > On Thu, 2021-05-06 at 10:32 +0000, ilstam@xxxxxxxxxxx wrote: > > > > From: Ilias Stamatis <ilstam@xxxxxxxxxx> > > > > > > > > Test that nested TSC scaling works as expected with both L1 and L2 > > > > scaled. > > > > > > > > Signed-off-by: Ilias Stamatis <ilstam@xxxxxxxxxx> > > > > --- > > > > tools/testing/selftests/kvm/.gitignore | 1 + > > > > tools/testing/selftests/kvm/Makefile | 1 + > > > > .../kvm/x86_64/vmx_nested_tsc_scaling_test.c | 209 ++++++++++++++++++ > > > > 3 files changed, 211 insertions(+) > > > > create mode 100644 tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c > > > > > > > > diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore > > > > index bd83158e0e0b..cc02022f9951 100644 > > > > --- a/tools/testing/selftests/kvm/.gitignore > > > > +++ b/tools/testing/selftests/kvm/.gitignore > > > > @@ -29,6 +29,7 @@ > > > > /x86_64/vmx_preemption_timer_test > > > > /x86_64/vmx_set_nested_state_test > > > > /x86_64/vmx_tsc_adjust_test > > > > +/x86_64/vmx_nested_tsc_scaling_test > > > > /x86_64/xapic_ipi_test > > > > /x86_64/xen_shinfo_test > > > > /x86_64/xen_vmcall_test > > > > diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile > > > > index e439d027939d..1078240b1313 100644 > > > > --- a/tools/testing/selftests/kvm/Makefile > > > > +++ b/tools/testing/selftests/kvm/Makefile > > > > @@ -60,6 +60,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test > > > > TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test > > > > TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test > > > > TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test > > > > +TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test > > > > TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test > > > > TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test > > > > TEST_GEN_PROGS_x86_64 += x86_64/debug_regs > > > > diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c > > > > new file mode 100644 > > > > index 000000000000..b05f5151ecbe > > > > --- /dev/null > > > > +++ b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c > > > > @@ -0,0 +1,209 @@ > > > > +// SPDX-License-Identifier: GPL-2.0-only > > > > +/* > > > > + * vmx_nested_tsc_scaling_test > > > > + * > > > > + * Copyright (C) 2021 Amazon.com, Inc. or its affiliates. > > > > + * > > > > + * This test case verifies that nested TSC scaling behaves as expected when > > > > + * both L1 and L2 are scaled using different ratios. For this test we scale > > > > + * L1 down and scale L2 up. > > > > + */ > > > > + > > > > + > > > > +#include "kvm_util.h" > > > > +#include "vmx.h" > > > > +#include "kselftest.h" > > > > + > > > > + > > > > +#define VCPU_ID 0 > > > > + > > > > +/* L1 is scaled down by this factor */ > > > > +#define L1_SCALE_FACTOR 2ULL > > > > +/* L2 is scaled up (from L1's perspective) by this factor */ > > > > +#define L2_SCALE_FACTOR 4ULL > > > > > > For fun, I might have randomized these factors as well. > > > > So L2_SCALE_FACTOR (or rather TSC_MULTIPLIER_L2 that depends on it) is > > referenced from within l1_guest_code(). If we change this to a static variable > > we won't be able to access it from there. How could this be done? > > I also had this thought after I wrote the reply. I don't have much experience > yet with KVM selftests so this might indeed be not possible. > OK, I can make the L1 scale factor random, but the L2 fixed for now. Does a range of 2 to 10 sound reasonable for L1? > > > > For the L1 factor it's easy as we only use it in main(). > > > > > > + > > > > +#define TSC_OFFSET_L2 (1UL << 32) > > > > +#define TSC_MULTIPLIER_L2 (L2_SCALE_FACTOR << 48) > > > > > > It would be fun to use a negative offset here (also randomally). > > > > Do you mean a random offset that is always negative or a random offset that > > sometimes is positive and sometimes is negative? > > Yep, to test the special case for negative numbers. OK, I will use a negative offset then but it won't be random for the same reason as above. > > > > > > + > > > > +#define L2_GUEST_STACK_SIZE 64 > > > > + > > > > +enum { USLEEP, UCHECK_L1, UCHECK_L2 }; > > > > +#define GUEST_SLEEP(sec) ucall(UCALL_SYNC, 2, USLEEP, sec) > > > > +#define GUEST_CHECK(level, freq) ucall(UCALL_SYNC, 2, level, freq) > > > > + > > > > + > > > > +/* > > > > + * This function checks whether the "actual" TSC frequency of a guest matches > > > > + * its expected frequency. In order to account for delays in taking the TSC > > > > + * measurements, a difference of 1% between the actual and the expected value > > > > + * is tolerated. > > > > + */ > > > > +static void compare_tsc_freq(uint64_t actual, uint64_t expected) > > > > +{ > > > > + uint64_t tolerance, thresh_low, thresh_high; > > > > + > > > > + tolerance = expected / 100; > > > > + thresh_low = expected - tolerance; > > > > + thresh_high = expected + tolerance; > > > > + > > > > + TEST_ASSERT(thresh_low < actual, > > > > + "TSC freq is expected to be between %"PRIu64" and %"PRIu64 > > > > + " but it actually is %"PRIu64, > > > > + thresh_low, thresh_high, actual); > > > > + TEST_ASSERT(thresh_high > actual, > > > > + "TSC freq is expected to be between %"PRIu64" and %"PRIu64 > > > > + " but it actually is %"PRIu64, > > > > + thresh_low, thresh_high, actual); > > > > +} > > > > + > > > > +static void check_tsc_freq(int level) > > > > +{ > > > > + uint64_t tsc_start, tsc_end, tsc_freq; > > > > + > > > > + /* > > > > + * Reading the TSC twice with about a second's difference should give > > > > + * us an approximation of the TSC frequency from the guest's > > > > + * perspective. Now, this won't be completely accurate, but it should > > > > + * be good enough for the purposes of this test. > > > > + */ > > > > > > It would be nice to know if the host has stable TSC (you can obtain this via > > > KVM_GET_CLOCK, the KVM_CLOCK_TSC_STABLE flag). > > > > > > And if not stable skip the test, to avoid false positives. > > > (Yes I have a laptop I just bought that has an unstable TSC....) > > > > > > > Hmm, this is a vm ioctl but I noticed that one of its vcpus needs to have been > > run at least once otherwise it won't return KVM_CLOCK_TSC_STABLE in the flags. > > > > So... > > Yes, now I remember that this thing relies on the TSC sync logic, > master clock thing, etc... Oh well... > > To be honest we really need the kernel to export the information > it knows about the TSC because it is useful to many users and > not limited to virtualization. > > Currently other than KVM's KVM_GET_TSC_KHZ there is no clean way > to know even the TSC frequency, let alone if kernel considers > the TSC to be stable AFAIK. > > Other more or less reliable (but hacky) way to know if TSC is stable is to see > if the kernel is using tsc via > (/sys/devices/system/clocksource/clocksource0/current_clocksource = tsc) > > Oh well... So do you suggest checking the content of this file over doing a KVM_GET_CLOCK ioctl after the vcpu has run once? > > Best regards, > Maxim Levitsky > > > > > > > + tsc_start = rdmsr(MSR_IA32_TSC); > > > > + GUEST_SLEEP(1); > > > > + tsc_end = rdmsr(MSR_IA32_TSC); > > > > + > > > > + tsc_freq = tsc_end - tsc_start; > > > > + > > > > + GUEST_CHECK(level, tsc_freq); > > > > +} > > > > + > > > > +static void l2_guest_code(void) > > > > +{ > > > > + check_tsc_freq(UCHECK_L2); > > > > + > > > > + /* exit to L1 */ > > > > + __asm__ __volatile__("vmcall"); > > > > +} > > > > + > > > > +static void l1_guest_code(struct vmx_pages *vmx_pages) > > > > +{ > > > > + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; > > > > + uint32_t control; > > > > + > > > > + /* check that L1's frequency looks alright before launching L2 */ > > > > + check_tsc_freq(UCHECK_L1); > > > > + > > > > + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); > > > > + GUEST_ASSERT(load_vmcs(vmx_pages)); > > > > + > > > > + /* prepare the VMCS for L2 execution */ > > > > + prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); > > > > + > > > > + /* enable TSC offsetting and TSC scaling for L2 */ > > > > + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); > > > > + control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING; > > > > + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); > > > > + > > > > + control = vmreadz(SECONDARY_VM_EXEC_CONTROL); > > > > + control |= SECONDARY_EXEC_TSC_SCALING; > > > > + vmwrite(SECONDARY_VM_EXEC_CONTROL, control); > > > > + > > > > + vmwrite(TSC_OFFSET, TSC_OFFSET_L2); > > > > + vmwrite(TSC_MULTIPLIER, TSC_MULTIPLIER_L2); > > > > + vmwrite(TSC_MULTIPLIER_HIGH, TSC_MULTIPLIER_L2 >> 32); > > > > + > > > > + /* launch L2 */ > > > > + GUEST_ASSERT(!vmlaunch()); > > > > + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); > > > > + > > > > + /* check that L1's frequency still looks good */ > > > > + check_tsc_freq(UCHECK_L1); > > > > + > > > > + GUEST_DONE(); > > > > +} > > > > + > > > > +static void tsc_scaling_check_supported(void) > > > > +{ > > > > + if (!kvm_check_cap(KVM_CAP_TSC_CONTROL)) { > > > > + print_skip("TSC scaling not supported by the HW"); > > > > + exit(KSFT_SKIP); > > > > + } > > > > +} > > > > + > > > > +int main(int argc, char *argv[]) > > > > +{ > > > > + struct kvm_vm *vm; > > > > + vm_vaddr_t vmx_pages_gva; > > > > + > > > > + uint64_t tsc_start, tsc_end; > > > > + uint64_t tsc_khz; > > > > + uint64_t l0_tsc_freq = 0; > > > > + uint64_t l1_tsc_freq = 0; > > > > + uint64_t l2_tsc_freq = 0; > > > > + > > > > + nested_vmx_check_supported(); > > > > + tsc_scaling_check_supported(); > > > > I can't add the check here > > > > > > + > > > > + tsc_start = rdtsc(); > > > > + sleep(1); > > > > + tsc_end = rdtsc(); > > > > + > > > > + l0_tsc_freq = tsc_end - tsc_start; > > > > + printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq); > > > > + > > > > + vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); > > > > + vcpu_alloc_vmx(vm, &vmx_pages_gva); > > > > + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); > > > > nor here > > > > > > + > > > > + tsc_khz = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_TSC_KHZ, NULL); > > > > + TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed"); > > > > + > > > > + /* scale down L1's TSC frequency */ > > > > + vcpu_ioctl(vm, VCPU_ID, KVM_SET_TSC_KHZ, > > > > + (void *) (tsc_khz / L1_SCALE_FACTOR)); > > > > + > > > > + for (;;) { > > > > + volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); > > > > + struct ucall uc; > > > > + > > > > + vcpu_run(vm, VCPU_ID); > > > > + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, > > > > + "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", > > > > + run->exit_reason, > > > > + exit_reason_str(run->exit_reason)); > > > > should I add it here? > > > > > > + > > > > + switch (get_ucall(vm, VCPU_ID, &uc)) { > > > > + case UCALL_ABORT: > > > > + TEST_FAIL("%s", (const char *) uc.args[0]); > > > > + case UCALL_SYNC: > > > > + switch (uc.args[0]) { > > > > + case USLEEP: > > > > + sleep(uc.args[1]); > > > > + break; > > > > + case UCHECK_L1: > > > > + l1_tsc_freq = uc.args[1]; > > > > + printf("L1's TSC frequency is around: %"PRIu64 > > > > + "\n", l1_tsc_freq); > > > > + > > > > + compare_tsc_freq(l1_tsc_freq, > > > > + l0_tsc_freq / L1_SCALE_FACTOR); > > > > + break; > > > > + case UCHECK_L2: > > > > + l2_tsc_freq = uc.args[1]; > > > > + printf("L2's TSC frequency is around: %"PRIu64 > > > > + "\n", l2_tsc_freq); > > > > + > > > > + compare_tsc_freq(l2_tsc_freq, > > > > + l1_tsc_freq * L2_SCALE_FACTOR); > > > > + break; > > > > + } > > > > + break; > > > > + case UCALL_DONE: > > > > + goto done; > > > > + default: > > > > + TEST_FAIL("Unknown ucall %lu", uc.cmd); > > > > + } > > > > + } > > > > + > > > > +done: > > > > + kvm_vm_free(vm); > > > > + return 0; > > > > +} > > > > > > Overall looks OK to me. > > > > > > I can't test it, since the most recent Intel laptop I have (i7-7600U) > > > still lacks TSC scaling (or did Intel cripple this feature on clients like what > > > they did with APICv ?) > > > > > > Best regards, > > > Maxim Levitsky > > > > > > > > > > >