On Sat, 21 Jan 2023 02:07:38 +0000 Kechen Lu <kechenl@xxxxxxxxxx> wrote: It works on my box now. LGTM. I was curious if there is any other userspace application using this? It would be interesting to see their strategies. > Add selftests for KVM cap KVM_CAP_X86_DISABLE_EXITS overriding flags > in VM and vCPU scope both working as expected. > > Suggested-by: Chao Gao <chao.gao@xxxxxxxxx> > Suggested-by: Shaoqin Huang <shaoqin.huang@xxxxxxxxx> > Suggested-by: Zhi Wang <zhi.wang.linux@xxxxxxxxx> > Signed-off-by: Kechen Lu <kechenl@xxxxxxxxxx> > --- > tools/testing/selftests/kvm/Makefile | 1 + > .../selftests/kvm/x86_64/disable_exits_test.c | 363 ++++++++++++++++++ > 2 files changed, 364 insertions(+) > create mode 100644 tools/testing/selftests/kvm/x86_64/disable_exits_test.c > > diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile > index 1750f91dd936..eeeba35e2536 100644 > --- a/tools/testing/selftests/kvm/Makefile > +++ b/tools/testing/selftests/kvm/Makefile > @@ -114,6 +114,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests > TEST_GEN_PROGS_x86_64 += x86_64/amx_test > TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test > TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test > +TEST_GEN_PROGS_x86_64 += x86_64/disable_exits_test > TEST_GEN_PROGS_x86_64 += access_tracking_perf_test > TEST_GEN_PROGS_x86_64 += demand_paging_test > TEST_GEN_PROGS_x86_64 += dirty_log_test > diff --git a/tools/testing/selftests/kvm/x86_64/disable_exits_test.c b/tools/testing/selftests/kvm/x86_64/disable_exits_test.c > new file mode 100644 > index 000000000000..74a2152b35dd > --- /dev/null > +++ b/tools/testing/selftests/kvm/x86_64/disable_exits_test.c > @@ -0,0 +1,363 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * Test per-VM and per-vCPU disable exits cap > + * 1) Per-VM scope > + * 2) Per-vCPU scope > + * > + */ > + > +#define _GNU_SOURCE /* for program_invocation_short_name */ > +#include <pthread.h> > +#include <inttypes.h> > +#include <string.h> > +#include <time.h> > +#include <sys/ioctl.h> > + > +#include "test_util.h" > +#include "kvm_util.h" > +#include "svm_util.h" > +#include "vmx.h" > +#include "processor.h" > +#include "asm/kvm.h" > +#include "linux/kvm.h" > + > +/* Arbitrary chosen IPI vector value from sender to halter vCPU */ > +#define IPI_VECTOR 0xa5 > +/* Number of HLTs halter vCPU thread executes */ > +#define LOOP_DURATION 3 > + > +struct guest_stats { > + uint32_t halter_apic_id; > + volatile uint64_t hlt_count; > + volatile uint64_t wake_count; > +}; > + > +static u64 read_vcpu_stats_halt_exits(struct kvm_vcpu *vcpu) > +{ > + int i; > + struct kvm_stats_header header; > + u64 *stats_data; > + u64 ret = 0; > + struct kvm_stats_desc *stats_desc; > + struct kvm_stats_desc *pdesc; > + int stats_fd = vcpu_get_stats_fd(vcpu); > + > + read_stats_header(stats_fd, &header); > + if (header.num_desc == 0) { > + fprintf(stderr, > + "Cannot read halt exits since no KVM stats defined\n"); > + return ret; > + } > + > + stats_desc = read_stats_descriptors(stats_fd, &header); > + for (i = 0; i < header.num_desc; ++i) { > + pdesc = get_stats_descriptor(stats_desc, i, &header); > + if (!strncmp(pdesc->name, "halt_exits", 10)) { > + stats_data = malloc(pdesc->size * sizeof(*stats_data)); > + read_stat_data(stats_fd, &header, pdesc, stats_data, > + pdesc->size); > + ret = *stats_data; > + free(stats_data); > + break; > + } > + } > + free(stats_desc); > + return ret; > +} > + > +/* HLT multiple times in one vCPU */ > +static void halter_guest_code(struct guest_stats *data) > +{ > + xapic_enable(); > + data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)); > + > + for (;;) { > + data->hlt_count++; > + asm volatile("sti; hlt; cli"); > + data->wake_count++; > + } > +} > + > +/* Runs on halter vCPU when IPI arrives */ > +static void guest_ipi_handler(struct ex_regs *regs) > +{ > + xapic_write_reg(APIC_EOI, 11); > +} > + > +/* Sender vCPU waits for ~1sec to assume HLT executed */ > +static void sender_wait_loop(struct guest_stats *data, uint64_t old_hlt_count, > + uint64_t old_wake_count) > +{ > + uint64_t tsc_start = rdtsc(); > + > + while (rdtsc() - tsc_start < 4000000000) { > + if ((data->wake_count != old_wake_count) && > + (data->hlt_count != old_hlt_count)) > + break; > + } > + GUEST_ASSERT((data->wake_count != old_wake_count) && > + (data->hlt_count != old_hlt_count)); > +} > + > +/* Sender vCPU loops sending IPI to halter vCPU every ~1sec */ > +static void sender_guest_code(struct guest_stats *data) > +{ > + uint32_t icr_val; > + uint32_t icr2_val; > + uint64_t old_hlt_count = 0; > + uint64_t old_wake_count = 0; > + > + xapic_enable(); > + /* Init interrupt command register for sending IPIs */ > + icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR); > + icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id); > + > + for (;;) { > + /* > + * Send IPI to halted vCPU > + * First IPI sends here as already waited before sender vCPU > + * thread creation > + */ > + xapic_write_reg(APIC_ICR2, icr2_val); > + xapic_write_reg(APIC_ICR, icr_val); > + sender_wait_loop(data, old_hlt_count, old_wake_count); > + old_wake_count = data->wake_count; > + old_hlt_count = data->hlt_count; > + } > +} > + > +static void *vcpu_thread(void *arg) > +{ > + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg; > + int old; > + int r; > + > + r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); > + TEST_ASSERT(r == 0, > + "pthread_setcanceltype failed on vcpu_id=%u with errno=%d", > + vcpu->id, r); > + fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id); > + vcpu_run(vcpu); > + return NULL; > +} > + > +static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu) > +{ > + void *retval; > + int r; > + > + r = pthread_cancel(thread); > + TEST_ASSERT(r == 0, > + "pthread_cancel on vcpu_id=%d failed with errno=%d", > + vcpu->id, r); > + > + r = pthread_join(thread, &retval); > + TEST_ASSERT(r == 0, > + "pthread_join on vcpu_id=%d failed with errno=%d", > + vcpu->id, r); > +} > + > +static void vm_run_with_threads(struct kvm_vcpu *halter_vcpu, > + struct kvm_vcpu *sender_vcpu) > +{ > + int r; > + pthread_t threads[2]; > + > + /* Start halter vCPU thread and wait for it to execute first HLT. */ > + r = pthread_create(&threads[0], NULL, vcpu_thread, halter_vcpu); > + TEST_ASSERT(r == 0, > + "pthread_create halter failed errno=%d", errno); > + fprintf(stderr, "Halter vCPU thread started\n"); > + > + sleep(1); > + > + /* > + * After guest halter vCPU executed first HLT, start the sender > + * vCPU thread to wakeup halter vCPU > + */ > + r = pthread_create(&threads[1], NULL, vcpu_thread, sender_vcpu); > + TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno); > + > + sleep(LOOP_DURATION); > + > + cancel_join_vcpu_thread(threads[0], halter_vcpu); > + cancel_join_vcpu_thread(threads[1], sender_vcpu); > +} > + > +/* > + * Test case 1: > + * Normal VM running with one vCPU keeps executing HLTs, > + * another vCPU sending IPIs to wake it up, should expect > + * all HLTs exiting to host > + * and Test case 2: > + * VM scoped exits disabling, HLT instructions > + * stay inside guest without exits > + */ > +static void test_vm_disable_exits_cap(bool cap_enabled) > +{ > + uint64_t kvm_halt_exits; > + struct kvm_vm *vm; > + struct kvm_vcpu *halter_vcpu; > + struct kvm_vcpu *sender_vcpu; > + struct guest_stats *data; > + vm_vaddr_t guest_stats_page_vaddr; > + > + /* Create VM */ > + vm = vm_create(2); > + > + /* > + * Before adding any vCPUs, enable the KVM_X86_DISABLE_EXITS cap > + * with flag KVM_X86_DISABLE_EXITS_HLT > + */ > + if (cap_enabled) > + vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, > + KVM_X86_DISABLE_EXITS_HLT); > + > + /* Add vCPU with loops halting */ > + halter_vcpu = vm_vcpu_add(vm, 0, halter_guest_code); > + > + vm_init_descriptor_tables(vm); > + vcpu_init_descriptor_tables(halter_vcpu); > + vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler); > + virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); > + > + /* Add vCPU with IPIs waking up halter vCPU */ > + sender_vcpu = vm_vcpu_add(vm, 1, sender_guest_code); > + > + guest_stats_page_vaddr = vm_vaddr_alloc_page(vm); > + data = addr_gva2hva(vm, guest_stats_page_vaddr); > + memset(data, 0, sizeof(*data)); > + > + vcpu_args_set(halter_vcpu, 1, guest_stats_page_vaddr); > + vcpu_args_set(sender_vcpu, 1, guest_stats_page_vaddr); > + > + vm_run_with_threads(halter_vcpu, sender_vcpu); > + kvm_halt_exits = read_vcpu_stats_halt_exits(halter_vcpu); > + if (cap_enabled) > + TEST_ASSERT(kvm_halt_exits == 0, > + "Halter vCPU had unexpected halt exits occurring after disabling VM-scoped halt exits cap\n"); > + else > + TEST_ASSERT(kvm_halt_exits == data->hlt_count, > + "Halter vCPU had unmatched %lu halt exits - %lu HLTs executed, when not disabling VM halt exits\n", > + kvm_halt_exits, data->hlt_count); > + fprintf(stderr, "Halter vCPU had %lu halt exits\n", > + kvm_halt_exits); > + fprintf(stderr, "Guest records %lu HLTs executed, waked %lu times\n", > + data->hlt_count, data->wake_count); > + > + kvm_vm_free(vm); > +} > + > +/* > + * Test case 3: > + * VM overrides exits disable flags after vCPU created, > + * which is not allowed > + */ > +static void test_vm_disable_exits_cap_with_vcpu_created(void) > +{ > + int r; > + struct kvm_vm *vm; > + struct kvm_enable_cap cap = { > + .cap = KVM_CAP_X86_DISABLE_EXITS, > + .args[0] = KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_OVERRIDE, > + }; > + > + /* Create VM */ > + vm = vm_create(1); > + /* Add vCPU with loops halting */ > + vm_vcpu_add(vm, 0, halter_guest_code); > + > + /* > + * After creating vCPU, the current VM-scoped ABI should > + * discard the cap enable of KVM_CAP_X86_DISABLE_EXITS > + * and return non-zero. Since vm_enabled_cap() not able > + * to assert the return value, so use the __vm_ioctl() > + */ > + r = __vm_ioctl(vm, KVM_ENABLE_CAP, &cap); > + > + TEST_ASSERT(r != 0, > + "Setting VM-scoped KVM_CAP_X86_DISABLE_EXITS after vCPUs created is not allowed, but it succeeds here\n"); > +} > + > +/* > + * Test case 4: > + * vCPU scoped halt exits disabling and enabling tests, > + * verify overides are working after vCPU created > + */ > +static void test_vcpu_toggling_disable_exits_cap(void) > +{ > + uint64_t kvm_halt_exits; > + uint64_t kvm_halt_exits_in_guest; > + struct kvm_vm *vm; > + struct kvm_vcpu *halter_vcpu; > + struct kvm_vcpu *sender_vcpu; > + struct guest_stats *data; > + vm_vaddr_t guest_stats_page_vaddr; > + > + /* Create VM */ > + vm = vm_create(2); > + > + /* Add vCPU with loops halting */ > + halter_vcpu = vm_vcpu_add(vm, 0, halter_guest_code); > + /* Set KVM_CAP_X86_DISABLE_EXITS_HLT for halter vCPU */ > + vcpu_enable_cap(halter_vcpu, KVM_CAP_X86_DISABLE_EXITS, > + KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_OVERRIDE); > + > + vm_init_descriptor_tables(vm); > + vcpu_init_descriptor_tables(halter_vcpu); > + vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler); > + virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); > + > + /* Add vCPU with IPIs waking up halter vCPU */ > + sender_vcpu = vm_vcpu_add(vm, 1, sender_guest_code); > + > + guest_stats_page_vaddr = vm_vaddr_alloc_page(vm); > + data = addr_gva2hva(vm, guest_stats_page_vaddr); > + memset(data, 0, sizeof(*data)); > + > + vcpu_args_set(halter_vcpu, 1, guest_stats_page_vaddr); > + vcpu_args_set(sender_vcpu, 1, guest_stats_page_vaddr); > + > + /* > + * For the first phase of the running, halt exits > + * are disabled, halter vCPU executes HLT instruction > + * but never exits to host > + */ > + vm_run_with_threads(halter_vcpu, sender_vcpu); > + kvm_halt_exits_in_guest = data->hlt_count; > + fprintf(stderr, "Guest records %lu HLTs with halt exits disabled\n", > + data->hlt_count); > + /* > + * Override and clean KVM_CAP_X86_DISABLE_EXITS flags > + * for halter vCPU. Expect to see halt exits occurs then. > + */ > + vcpu_enable_cap(halter_vcpu, KVM_CAP_X86_DISABLE_EXITS, > + KVM_X86_DISABLE_EXITS_OVERRIDE); > + /* > + * Second phase of the test, after guest halter vCPU > + * reenabled halt exits, start the sender > + * vCPU thread to wakeup halter vCPU > + */ > + vm_run_with_threads(halter_vcpu, sender_vcpu); > + kvm_halt_exits = read_vcpu_stats_halt_exits(halter_vcpu); > + TEST_ASSERT(kvm_halt_exits == data->hlt_count - kvm_halt_exits_in_guest, > + "Halter vCPU had unexpected %lu (should be %lu) halt exits\n", > + kvm_halt_exits, data->hlt_count - kvm_halt_exits_in_guest); > + fprintf(stderr, "Halter vCPU had %lu halt exits\n", > + kvm_halt_exits); > + fprintf(stderr, "Guest records %lu HLTs executed, waked %lu times\n", > + data->hlt_count, data->wake_count); > + > + kvm_vm_free(vm); > +} > + > +int main(int argc, char *argv[]) > +{ > + fprintf(stderr, "VM-scoped tests start\n"); > + test_vm_disable_exits_cap(false); > + test_vm_disable_exits_cap(true); > + test_vm_disable_exits_cap_with_vcpu_created(); > + fprintf(stderr, "vCPU-scoped test starts\n"); > + test_vcpu_toggling_disable_exits_cap(); > + return 0; > +}