A specific kselftesting based program to allow set the VMX preempt timer value from VM directly. Introduce 2 hypercall 0x56780001/2, 01 to set the preempt timer value, 02 to wait for the preemption time expired. Usage: Reload kvm applied this change, then: $KRNEL_SRC_ROOT/tools/testing/selftests/kvm/x86_64/preempt_test -p 'preempt_timer_vale' 'preempt_timer_vale' is the preempt timer value in DEC format, HEX is not supported. For example: perf record -e "kvm:*" tools/testing/selftests/kvm/x86_64/preempt_test -p 2281718445 Above set the preempt value to 2281718445(0x880042AD) and capture the trace, then check the kvm_vmx_debug in the trace to know the preempt timer behavior. Signed-off-by: Yao Yuan <yuan.yao@xxxxxxxxx> --- tools/testing/selftests/kvm/Makefile | 1 + arch/x86/kvm/vmx/vmx.h | 5 + arch/x86/kvm/vmx/vmx.c | 113 +++++++++++++++++- .../selftests/kvm/x86_64/preempt_test.c | 82 +++++++++++++ 4 files changed, 198 insertions(+), 3 deletions(-) create mode 100644 tools/testing/selftests/kvm/x86_64/preempt_test.c diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index ad8b5d15f2bd..957509957f80 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -129,6 +129,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/amx_test TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test TEST_GEN_PROGS_x86_64 += x86_64/recalc_apic_map_test +TEST_GEN_PROGS_x86_64 += x86_64/preempt_test TEST_GEN_PROGS_x86_64 += access_tracking_perf_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 42498fa63abb..82ea0ccc7a63 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -368,6 +368,11 @@ struct vcpu_vmx { /* ve_info must be page aligned. */ struct vmx_ve_information *ve_info; + + volatile bool debug_timer; + bool debug_timer_set_to_hardware; + u32 debug_timer_val; + u64 debug_timer_deadline_tsc; }; struct kvm_vmx { diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f18c2d8c7476..73f084c29f9a 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4431,8 +4431,9 @@ static u32 vmx_vmexit_ctrl(void) * Not used by KVM and never set in vmcs01 or vmcs02, but emulated for * nested virtualization and thus allowed to be set in vmcs12. */ - vmexit_ctrl &= ~(VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER | - VM_EXIT_SAVE_VMX_PREEMPTION_TIMER); + vmexit_ctrl &= ~(VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER); + pr_info("Set VM_EXIT_SAVE_VMX_PREEMPTION_TIMER forcedly for preempt timer debug\n"); + if (vmx_pt_mode_is_system()) vmexit_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP | @@ -5993,11 +5994,41 @@ static int handle_pml_full(struct kvm_vcpu *vcpu) return 1; } +static fastpath_t handle_fastpath_debug_timer(struct kvm_vcpu *vcpu, + bool force_immediate_exit) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + u64 tscl; + u32 delta; + + tscl = rdtsc(); + + if (vmx->debug_timer_deadline_tsc > tscl) + delta = (u32)((vmx->debug_timer_deadline_tsc - tscl) >> + cpu_preemption_timer_multi); + else + delta = 0; + + trace_kvm_vmx_debug(2UL, + (unsigned long)vmcs_read32(VM_EXIT_REASON), + (unsigned long)vmcs_read32(VMX_PREEMPTION_TIMER_VALUE), + (unsigned long)delta, tscl); + + vmx->debug_timer = false; + + return EXIT_FASTPATH_REENTER_GUEST; +} + static fastpath_t handle_fastpath_preemption_timer(struct kvm_vcpu *vcpu, bool force_immediate_exit) { struct vcpu_vmx *vmx = to_vmx(vcpu); + WARN_ON(vmx->debug_timer && force_immediate_exit); + if (vmx->debug_timer) + return handle_fastpath_debug_timer(vcpu, + force_immediate_exit); + /* * In the *extremely* unlikely scenario that this is a spurious VM-Exit * due to the timer expiring while it was "soft" disabled, just eat the @@ -6096,6 +6127,60 @@ static int handle_notify(struct kvm_vcpu *vcpu) return 1; } +static unsigned long vmx_debug_set_preempt_timer(struct kvm_vcpu *vcpu, + unsigned long a0, + unsigned long a1, + unsigned long a2, + unsigned long a3) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + vmx->debug_timer = true; + vmx->debug_timer_set_to_hardware = false; + vmx->debug_timer_val = a0; + vmx->debug_timer_deadline_tsc = rdtsc() + (a0 << cpu_preemption_timer_multi); + pr_info("debug_timer = %u\n", (u32)a0); + + return 0; +} + + +static unsigned long vmx_debug_get_preempt_timer_result(struct kvm_vcpu *vcpu, + unsigned long a0, + unsigned long a1, + unsigned long a2, + unsigned long a3) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (vmx->debug_timer) + return 1; + return 0; +} + +static int vmx_emulate_hypercall(struct kvm_vcpu *vcpu) +{ + unsigned long nr, a0, a1, a2, a3; + unsigned long ret; + + nr = kvm_rax_read(vcpu); + if (nr != 0x87650001 && nr != 0x87650002) + return kvm_emulate_hypercall(vcpu); + + a0 = kvm_rbx_read(vcpu); + a1 = kvm_rcx_read(vcpu); + a2 = kvm_rdx_read(vcpu); + a3 = kvm_rsi_read(vcpu); + + if (nr == 0x87650001) + ret = vmx_debug_set_preempt_timer(vcpu, a0, a1, a2, a3); + else + ret = vmx_debug_get_preempt_timer_result(vcpu, a0, a1, a2, a3); + + kvm_rax_write(vcpu, ret); + return kvm_skip_emulated_instruction(vcpu); +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -6117,7 +6202,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_INVD] = kvm_emulate_invd, [EXIT_REASON_INVLPG] = handle_invlpg, [EXIT_REASON_RDPMC] = kvm_emulate_rdpmc, - [EXIT_REASON_VMCALL] = kvm_emulate_hypercall, + [EXIT_REASON_VMCALL] = vmx_emulate_hypercall, [EXIT_REASON_VMCLEAR] = handle_vmx_instruction, [EXIT_REASON_VMLAUNCH] = handle_vmx_instruction, [EXIT_REASON_VMPTRLD] = handle_vmx_instruction, @@ -7199,6 +7284,28 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu, bool force_immediate_exit if (force_immediate_exit) { vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0); vmx->loaded_vmcs->hv_timer_soft_disabled = false; + } else if (vmx->debug_timer) { + u32 old; + + tscl = rdtsc(); + + if (!vmx->debug_timer_set_to_hardware) { + delta_tsc = vmx->debug_timer_val; + vmx->debug_timer_set_to_hardware = true; + } else { + if (vmx->debug_timer_deadline_tsc > tscl) + delta_tsc = (u32)((vmx->debug_timer_deadline_tsc - tscl) + >> cpu_preemption_timer_multi); + else + delta_tsc = 0; + } + + old = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc); + trace_kvm_vmx_debug(3UL, old, + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE), + delta_tsc, tscl); + vmx->loaded_vmcs->hv_timer_soft_disabled = false; } else if (vmx->hv_deadline_tsc != -1) { tscl = rdtsc(); if (vmx->hv_deadline_tsc > tscl) diff --git a/tools/testing/selftests/kvm/x86_64/preempt_test.c b/tools/testing/selftests/kvm/x86_64/preempt_test.c new file mode 100644 index 000000000000..2e58cfee61d0 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/preempt_test.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024 Intel Corporation + * + * Debug the preemption timer behavior + */ + +#include "test_util.h" +#include "processor.h" +#include "ucall_common.h" + +uint32_t preempt_timer_val = 0x1000000; +static void guest_code(uint64_t apic_hz, uint64_t delay_ms) +{ + volatile unsigned long r; + + kvm_hypercall(0x87650001, preempt_timer_val, 0, 0, 0); + do { + udelay(100); + r = kvm_hypercall(0x87650002, 0, 0, 0, 0); + } while(r != 0); + + GUEST_DONE(); +} + +static void do_test(struct kvm_vcpu *vcpu) +{ + bool done = false; + struct ucall uc; + + while (!done) { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + done = true; + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + break; + } + } +} + +static void run_test(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + vm = vm_create(1); + + sync_global_to_guest(vm, preempt_timer_val); + + vcpu = vm_vcpu_add(vm, 0, guest_code); + + do_test(vcpu); + + kvm_vm_free(vm); +} + + +int main(int argc, char *argv[]) +{ + int opt; + + while ((opt = getopt(argc, argv, "p:h")) != -1) { + switch (opt) { + case 'p': + preempt_timer_val = atoi(optarg); + break; + default: + exit(KSFT_SKIP); + } + } + + printf("preempt timer value:%u(0x%x)\n", + preempt_timer_val, preempt_timer_val); + + run_test(); +} -- 2.27.0