On Tue, Apr 27, 2021 at 10:01 AM Aaron Lewis <aaronlewis@xxxxxxxxxx> wrote: > > This test exercises the feature KVM_CAP_EXIT_ON_EMULATION_FAILURE. When > enabled, errors in the in-kernel instruction emulator are forwarded to > userspace with the instruction bytes stored in the exit struct for > KVM_EXIT_INTERNAL_ERROR. So, when the guest attempts to emulate an > 'flds' instruction, which isn't able to be emulated in KVM, instead > of failing, KVM sends the instruction to userspace to handle. > > For this test to work properly the module parameter > 'allow_smaller_maxphyaddr' has to be set. > > Signed-off-by: Aaron Lewis <aaronlewis@xxxxxxxxxx> > --- > tools/testing/selftests/kvm/.gitignore | 1 + > tools/testing/selftests/kvm/Makefile | 1 + > .../selftests/kvm/include/x86_64/processor.h | 3 + > .../selftests/kvm/lib/x86_64/processor.c | 79 ++++++ > .../kvm/x86_64/emulator_error_test.c | 224 ++++++++++++++++++ > 5 files changed, 308 insertions(+) > create mode 100644 tools/testing/selftests/kvm/x86_64/emulator_error_test.c > > diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore > index 7bd7e776c266..ec9e20a2f752 100644 > --- a/tools/testing/selftests/kvm/.gitignore > +++ b/tools/testing/selftests/kvm/.gitignore > @@ -7,6 +7,7 @@ > /x86_64/cr4_cpuid_sync_test > /x86_64/debug_regs > /x86_64/evmcs_test > +/x86_64/emulator_error_test > /x86_64/get_cpuid_test > /x86_64/get_msr_index_features > /x86_64/kvm_pv_test > diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile > index 67eebb53235f..5ff705d92d02 100644 > --- a/tools/testing/selftests/kvm/Makefile > +++ b/tools/testing/selftests/kvm/Makefile > @@ -41,6 +41,7 @@ LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_ha > TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test > TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features > TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test > +TEST_GEN_PROGS_x86_64 += x86_64/emulator_error_test > TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test > TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock > TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid > diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h > index 0b30b4e15c38..40f70f91e6a2 100644 > --- a/tools/testing/selftests/kvm/include/x86_64/processor.h > +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h > @@ -394,6 +394,9 @@ void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid); > void vm_handle_exception(struct kvm_vm *vm, int vector, > void (*handler)(struct ex_regs *)); > > +uint64_t vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr); > +void vm_set_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, uint64_t pte); > + > /* > * set_cpuid() - overwrites a matching cpuid entry with the provided value. > * matches based on ent->function && ent->index. returns true > diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c > index a8906e60a108..195af5a9c9ec 100644 > --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c > +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c > @@ -292,6 +292,85 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, > pte[index[0]].present = 1; > } > > +static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, > + uint64_t vaddr) > +{ > + uint16_t index[4]; > + struct pageMapL4Entry *pml4e; > + struct pageDirectoryPointerEntry *pdpe; > + struct pageDirectoryEntry *pde; > + struct pageTableEntry *pte; > + struct kvm_cpuid_entry2 *entry; > + int max_phy_addr; > + /* Set the bottom 52 bits. */ > + uint64_t rsvd_mask = 0x000fffffffffffff; > + > + entry = kvm_get_supported_cpuid_index(0x80000008, 0); > + max_phy_addr = entry->eax & 0x000000ff; > + /* Clear the bottom bits of the reserved mask. */ > + rsvd_mask = (rsvd_mask >> max_phy_addr) << max_phy_addr; > + > + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " > + "unknown or unsupported guest mode, mode: 0x%x", vm->mode); > + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, > + (vaddr >> vm->page_shift)), > + "Invalid virtual address, vaddr: 0x%lx", > + vaddr); > + /* > + * Based on the mode check above there are 48 bits in the vaddr, so > + * shift 16 to sign extend the last bit (bit-47), > + */ > + TEST_ASSERT(vaddr == ((vaddr << 16) >> 16), > + "Canonical check failed. The virtual addres is invalid."); This trick doesn't work with an unsigned vaddr. (Shifting an unsigned value right introduces zeroes on the left, rather than replicating the sign bit. Nit: address is misspelled. > + > + index[0] = (vaddr >> 12) & 0x1ffu; > + index[1] = (vaddr >> 21) & 0x1ffu; > + index[2] = (vaddr >> 30) & 0x1ffu; > + index[3] = (vaddr >> 39) & 0x1ffu; > + > + pml4e = addr_gpa2hva(vm, vm->pgd); > + TEST_ASSERT(pml4e[index[3]].present, > + "Expected pml4e to be present for gva: 0x%08lx", vaddr); > + TEST_ASSERT(((pml4e[index[3]].address * vm->page_size) & rsvd_mask) == 0, > + "Unexpected reserved bits set."); Bit 7 of a PML4E is reserved, but it's not in your rsvd_mask. Also, are we assuming that the guest EFER.NXE bit is set? If not, then bit 63 of every page table level is reserved. > + > + pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); > + TEST_ASSERT(pdpe[index[2]].present, > + "Expected pdpe to be present for gva: 0x%08lx", vaddr); > + TEST_ASSERT(pdpe[index[2]].page_size == 0, > + "Expected pdpe to map a pde not a 1-GByte page."); > + TEST_ASSERT(((pdpe[index[2]].address * vm->page_size) & rsvd_mask) == 0, > + "Unexpected reserved bits set."); > + > + pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); > + TEST_ASSERT(pde[index[1]].present, > + "Expected pde to be present for gva: 0x%08lx", vaddr); > + TEST_ASSERT(pde[index[1]].page_size == 0, > + "Expected pde to map a pte not a 2-MByte page."); > + TEST_ASSERT(((pde[index[1]].address * vm->page_size) & rsvd_mask) == 0, > + "Unexpected reserved bits set."); > + > + pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); > + TEST_ASSERT(pte[index[0]].present, > + "Expected pte to be present for gva: 0x%08lx", vaddr); > + > + return &pte[index[0]]; > +} > + > +uint64_t vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr) > +{ > + struct pageTableEntry *pte = _vm_get_page_table_entry(vm, vaddr); > + > + return *(uint64_t *)pte; > +} > + > +void vm_set_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, uint64_t pte) > +{ > + struct pageTableEntry *_pte = _vm_get_page_table_entry(vm, vaddr); Perhaps the argument could be renamed to 'new_pte' or 'val' or something, so you don't have to resort to an underscore prefix here? > + > + *(uint64_t *)_pte = pte; > +} > + > void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) > { > struct pageMapL4Entry *pml4e, *pml4e_start; > diff --git a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c > new file mode 100644 > index 000000000000..8d824af5f17b > --- /dev/null > +++ b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c > @@ -0,0 +1,224 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright (C) 2020, Google LLC. > + * > + * Tests for KVM_CAP_EXIT_ON_EMULATION_FAILURE capability. > + */ > + > +#define _GNU_SOURCE /* for program_invocation_short_name */ > + > +#include "test_util.h" > +#include "kvm_util.h" > +#include "vmx.h" > + > +#define VCPU_ID 1 > +#define PAGE_SIZE 4096 > +#define MAXPHYADDR 36 > + > +#define MEM_REGION_GVA 0x0000123456789000 > +#define MEM_REGION_GPA 0x0000000700000000 > +#define MEM_REGION_SLOT 10 > +#define MEM_REGION_SIZE PAGE_SIZE > + > +extern char fld_start, fld_end; > + > +static void guest_code(void) > +{ > + __asm__ __volatile__("fld_start: flds (%[addr]); fld_end:" > + :: [addr]"r"(MEM_REGION_GVA)); Do you need the labels any more? > + > + GUEST_DONE(); > +} > + > +static void run_guest(struct kvm_vm *vm) > +{ > + int rc; > + > + rc = _vcpu_run(vm, VCPU_ID); > + TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc); > +} > + > +/* > + * Accessors to get R/M, REG, and Mod bits described in the SDM vol 2, > + * figure 2-2 "Table Interpretation of ModR/M Byte (C8H)". > + */ > +#define GET_RM(insn_byte) (insn_byte & 0x7) > +#define GET_REG(insn_byte) ((insn_byte & 0x38) >> 3) > +#define GET_MOD(insn_byte) ((insn_byte & 0xc) >> 6) > + > +/* Ensure we are dealing with a simple 2-byte flds instruction. */ > +static bool is_flds(uint8_t *insn_bytes, uint8_t insn_size) > +{ > + return insn_size >= 2 && > + insn_bytes[0] == 0xd9 && > + GET_REG(insn_bytes[1]) == 0x0 && > + GET_MOD(insn_bytes[1]) == 0x0 && > + /* Ensure there is no SIB byte. */ > + GET_RM(insn_bytes[1]) != 0x4 && > + /* Ensure there is no displacement byte. */ > + GET_RM(insn_bytes[1]) != 0x5; > +} > + > +static void process_exit_on_emulation_error(struct kvm_vm *vm) > +{ > + struct kvm_run *run = vcpu_state(vm, VCPU_ID); > + struct kvm_regs regs; > + uint8_t *insn_bytes; > + uint8_t insn_size; > + uint64_t flags; > + > + TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR, > + "Unexpected exit reason: %u (%s)", > + run->exit_reason, > + exit_reason_str(run->exit_reason)); > + > + TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION, > + "Unexpected suberror: %u", > + run->emulation_failure.suberror); > + > + if (run->emulation_failure.ndata >= 1) { > + flags = run->emulation_failure.flags; > + if ((flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES) && > + run->emulation_failure.ndata >= 3) { > + insn_size = run->emulation_failure.insn_size; > + insn_bytes = run->emulation_failure.insn_bytes; > + > + TEST_ASSERT(insn_size <= 15 && insn_size > 0, > + "Unexpected instruction size: %u", > + insn_size); > + > + TEST_ASSERT(is_flds(insn_bytes, insn_size), > + "Unexpected instruction. Expected 'flds' (0xd9 /0), encountered instruction with opcode: 0x%x", > + insn_bytes[0]); Nit: insn_bytes[0] may not be an opcode (or even part of an opcode). It may be a prefix. > + > + /* > + * If is_flds() succeeded then the instruction bytes > + * contained an flds instruction that is 2-bytes in > + * length (ie: no SIB nor displacement). > + */ > + vcpu_regs_get(vm, VCPU_ID, ®s); > + regs.rip += 2; > + vcpu_regs_set(vm, VCPU_ID, ®s); > + } > + } > +} > + > +static void do_guest_assert(struct kvm_vm *vm, struct ucall *uc) > +{ > + TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0], __FILE__, > + uc->args[1]); > +} > + > +static void check_for_guest_assert(struct kvm_vm *vm) > +{ > + struct kvm_run *run = vcpu_state(vm, VCPU_ID); > + struct ucall uc; > + > + if (run->exit_reason == KVM_EXIT_IO && > + get_ucall(vm, VCPU_ID, &uc) == UCALL_ABORT) { > + do_guest_assert(vm, &uc); > + } > +} > + > +static void process_ucall_done(struct kvm_vm *vm) > +{ > + struct kvm_run *run = vcpu_state(vm, VCPU_ID); > + struct ucall uc; > + > + check_for_guest_assert(vm); > + > + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, > + "Unexpected exit reason: %u (%s)", > + run->exit_reason, > + exit_reason_str(run->exit_reason)); > + > + TEST_ASSERT(get_ucall(vm, VCPU_ID, &uc) == UCALL_DONE, > + "Unexpected ucall command: %lu, expected UCALL_DONE (%d)", > + uc.cmd, UCALL_DONE); > +} > + > +static uint64_t process_ucall(struct kvm_vm *vm) > +{ > + struct kvm_run *run = vcpu_state(vm, VCPU_ID); > + struct ucall uc; > + > + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, > + "Unexpected exit reason: %u (%s)", > + run->exit_reason, > + exit_reason_str(run->exit_reason)); > + > + switch (get_ucall(vm, VCPU_ID, &uc)) { > + case UCALL_SYNC: > + break; > + case UCALL_ABORT: > + do_guest_assert(vm, &uc); > + break; > + case UCALL_DONE: > + process_ucall_done(vm); > + break; > + default: > + TEST_ASSERT(false, "Unexpected ucall"); > + } > + > + return uc.cmd; > +} > + > +int main(int argc, char *argv[]) > +{ > + struct kvm_enable_cap emul_failure_cap = { > + .cap = KVM_CAP_EXIT_ON_EMULATION_FAILURE, > + .args[0] = 1, > + }; > + struct kvm_cpuid_entry2 *entry; > + struct kvm_cpuid2 *cpuid; > + struct kvm_vm *vm; > + uint64_t gpa, pte; > + uint64_t *hva; > + int rc; > + > + /* Tell stdout not to buffer its content */ > + setbuf(stdout, NULL); > + > + vm = vm_create_default(VCPU_ID, 0, guest_code); > + > + if (!kvm_check_cap(KVM_CAP_SMALLER_MAXPHYADDR)) { > + printf("module parameter 'allow_smaller_maxphyaddr' is not set. Skipping test.\n"); > + return 0; > + } > + > + cpuid = kvm_get_supported_cpuid(); > + > + entry = kvm_get_supported_cpuid_index(0x80000008, 0); > + entry->eax = (entry->eax & 0xffffff00) | MAXPHYADDR; > + set_cpuid(cpuid, entry); > + > + vcpu_set_cpuid(vm, VCPU_ID, cpuid); > + > + entry = kvm_get_supported_cpuid_index(0x80000008, 0); Is the above line detritus? > + > + rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE); > + TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable"); > + vm_enable_cap(vm, &emul_failure_cap); > + > + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, > + MEM_REGION_GPA, MEM_REGION_SLOT, > + MEM_REGION_SIZE / PAGE_SIZE, 0); > + gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE, > + MEM_REGION_GPA, MEM_REGION_SLOT); > + TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n"); > + virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1, 0); > + hva = addr_gpa2hva(vm, MEM_REGION_GPA); > + memset(hva, 0, PAGE_SIZE); > + pte = vm_get_page_table_entry(vm, MEM_REGION_GVA); > + vm_set_page_table_entry(vm, MEM_REGION_GVA, pte | (1ull << 36)); > + > + run_guest(vm); > + process_exit_on_emulation_error(vm); > + run_guest(vm); > + > + TEST_ASSERT(process_ucall(vm) == UCALL_DONE, "Expected UCALL_DONE"); > + > + kvm_vm_free(vm); > + > + return 0; > +} > -- > 2.31.1.498.g6c1eba8ee3d-goog >