Maxim Levitsky <mlevitsk@xxxxxxxxxx> writes: > On Thu, 2022-04-14 at 15:20 +0200, Vitaly Kuznetsov wrote: >> Introduce a selftest for Hyper-V PV TLB flush hypercalls >> (HvFlushVirtualAddressSpace/HvFlushVirtualAddressSpaceEx, >> HvFlushVirtualAddressList/HvFlushVirtualAddressListEx). >> >> The test creates one 'sender' vCPU and two 'worker' vCPU which do busy >> loop reading from a certain GVA checking the observed value. Sender >> vCPU drops to the host to swap the data page with another page filled >> with a different value. The expectation for workers is also >> altered. Without TLB flush on worker vCPUs, they may continue to >> observe old value. To guard against accidental TLB flushes for worker >> vCPUs the test is repeated 100 times. >> >> Hyper-V TLB flush hypercalls are tested in both 'normal' and 'XMM >> fast' modes. >> >> Signed-off-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> >> --- >> tools/testing/selftests/kvm/.gitignore | 1 + >> tools/testing/selftests/kvm/Makefile | 1 + >> .../selftests/kvm/include/x86_64/hyperv.h | 1 + >> .../selftests/kvm/x86_64/hyperv_tlb_flush.c | 647 ++++++++++++++++++ >> 4 files changed, 650 insertions(+) >> create mode 100644 tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c >> >> diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore >> index 5d5fbb161d56..1a1d09e414d5 100644 >> --- a/tools/testing/selftests/kvm/.gitignore >> +++ b/tools/testing/selftests/kvm/.gitignore >> @@ -25,6 +25,7 @@ >> /x86_64/hyperv_features >> /x86_64/hyperv_ipi >> /x86_64/hyperv_svm_test >> +/x86_64/hyperv_tlb_flush >> /x86_64/mmio_warning_test >> /x86_64/mmu_role_test >> /x86_64/platform_info_test >> diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile >> index 44889f897fe7..8b83abc09a1a 100644 >> --- a/tools/testing/selftests/kvm/Makefile >> +++ b/tools/testing/selftests/kvm/Makefile >> @@ -54,6 +54,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid >> TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features >> TEST_GEN_PROGS_x86_64 += x86_64/hyperv_ipi >> TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test >> +TEST_GEN_PROGS_x86_64 += x86_64/hyperv_tlb_flush >> TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test >> TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test >> TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test >> diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h >> index f51d6fab8e93..1e34dd7c5075 100644 >> --- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h >> +++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h >> @@ -185,6 +185,7 @@ >> /* hypercall options */ >> #define HV_HYPERCALL_FAST_BIT BIT(16) >> #define HV_HYPERCALL_VARHEAD_OFFSET 17 >> +#define HV_HYPERCALL_REP_COMP_OFFSET 32 >> >> #define HYPERV_LINUX_OS_ID ((u64)0x8100 << 48) >> >> diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c >> new file mode 100644 >> index 000000000000..00bcae45ddd2 >> --- /dev/null >> +++ b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c >> @@ -0,0 +1,647 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> +/* >> + * Hyper-V HvFlushVirtualAddress{List,Space}{,Ex} tests >> + * >> + * Copyright (C) 2022, Red Hat, Inc. >> + * >> + */ >> + >> +#define _GNU_SOURCE /* for program_invocation_short_name */ >> +#include <pthread.h> >> +#include <inttypes.h> >> + >> +#include "kvm_util.h" >> +#include "hyperv.h" >> +#include "processor.h" >> +#include "test_util.h" >> +#include "vmx.h" >> + >> +#define SENDER_VCPU_ID 1 >> +#define WORKER_VCPU_ID_1 2 >> +#define WORKER_VCPU_ID_2 65 >> + >> +#define NTRY 100 >> + >> +struct thread_params { >> + struct kvm_vm *vm; >> + uint32_t vcpu_id; >> +}; >> + >> +struct hv_vpset { >> + u64 format; >> + u64 valid_bank_mask; >> + u64 bank_contents[]; >> +}; >> + >> +enum HV_GENERIC_SET_FORMAT { >> + HV_GENERIC_SET_SPARSE_4K, >> + HV_GENERIC_SET_ALL, >> +}; >> + >> +#define HV_FLUSH_ALL_PROCESSORS BIT(0) >> +#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1) >> +#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2) >> +#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3) >> + >> +/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */ >> +struct hv_tlb_flush { >> + u64 address_space; >> + u64 flags; >> + u64 processor_mask; >> + u64 gva_list[]; >> +} __packed; >> + >> +/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */ >> +struct hv_tlb_flush_ex { >> + u64 address_space; >> + u64 flags; >> + struct hv_vpset hv_vp_set; >> + u64 gva_list[]; >> +} __packed; >> + >> +static inline void hv_init(vm_vaddr_t pgs_gpa) >> +{ >> + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); >> + wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); >> +} >> + >> +static void worker_code(void *test_pages, vm_vaddr_t pgs_gpa) >> +{ >> + u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX); >> + unsigned char chr; >> + >> + x2apic_enable(); >> + hv_init(pgs_gpa); >> + >> + for (;;) { >> + chr = READ_ONCE(*(unsigned char *)(test_pages + 4096 * 2 + vcpu_id)); > It would be nice to wrap this into a function, like set_expected_char does for ease > of code understanding. > >> + if (chr) >> + GUEST_ASSERT(*(unsigned char *)test_pages == chr); >> + asm volatile("nop"); >> + } >> +} >> + >> +static inline u64 hypercall(u64 control, vm_vaddr_t arg1, vm_vaddr_t arg2) >> +{ >> + u64 hv_status; >> + >> + asm volatile("mov %3, %%r8\n" >> + "vmcall" >> + : "=a" (hv_status), >> + "+c" (control), "+d" (arg1) >> + : "r" (arg2) >> + : "cc", "memory", "r8", "r9", "r10", "r11"); >> + >> + return hv_status; >> +} >> + >> +static inline void nop_loop(void) >> +{ >> + int i; >> + >> + for (i = 0; i < 10000000; i++) >> + asm volatile("nop"); >> +} >> + >> +static inline void sync_to_xmm(void *data) >> +{ >> + int i; >> + >> + for (i = 0; i < 8; i++) >> + write_sse_reg(i, (sse128_t *)(data + sizeof(sse128_t) * i)); >> +} > > Nitpick: I see duplicated code, I complain ;-) - maybe put the above to some common file? > Gone now. >> + >> +static void set_expected_char(void *addr, unsigned char chr, int vcpu_id) >> +{ >> + asm volatile("mfence"); > > I remember that Paolo once told me (I might not remember that correctly though), > that on x86 the actual hardware barriers like mfence are not really > needed, because hardware already does memory accesses in order, > unless fancy (e.g non WB) memory types are used. Even if it can be dropped we still need a compile barrier so I prefer to keep explicit 'mfence'/'lfence'/... -- especially in tests where performance doesn't matter much. > >> + *(unsigned char *)(addr + 2 * 4096 + vcpu_id) = chr; >> +} >> + >> +static void sender_guest_code(void *hcall_page, void *test_pages, vm_vaddr_t pgs_gpa) >> +{ >> + struct hv_tlb_flush *flush = (struct hv_tlb_flush *)hcall_page; >> + struct hv_tlb_flush_ex *flush_ex = (struct hv_tlb_flush_ex *)hcall_page; >> + int stage = 1, i; >> + u64 res; >> + >> + hv_init(pgs_gpa); >> + >> + /* "Slow" hypercalls */ > > I hopefully understand it correctly, see my comments below, > but it might be worthy to add something similar to my comments > to the code to make it easier for someone reading the code to understand it. > >> + >> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */ >> + for (i = 0; i < NTRY; i++) { >> + memset(hcall_page, 0, 4096); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2); > > Here we set expected char to 0, meaning that now workers will not assert > if there is mismatch. > >> + GUEST_SYNC(stage++); > Now there is a mismatch, the host swapped pages for us. > >> + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; >> + flush->processor_mask = BIT(WORKER_VCPU_ID_1); >> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, pgs_gpa, pgs_gpa + 4096); >> + GUEST_ASSERT((res & 0xffff) == 0); > > Now we flushed the TLB, the guest should see correct value. > >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1); > > Now we force the workers to check it. > > Btw, an idea: it might be nice to use more that two test pages, > like say 100 test pages each filled with different value, > memory is cheap, and this way there will be no way for something > to cause 'double error' which could hide the bug by a chance. > > > Another thing, it might be nice to wrap this into a macro/function > to avoid *that* much duplication. In the next version I still keep two pages and two workers for simpliciy, but I wrap all these pre- and post- guts into wrapper functions. > > >> + nop_loop(); >> + } >> + >> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */ >> + for (i = 0; i < NTRY; i++) { >> + memset(hcall_page, 0, 4096); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2); >> + GUEST_SYNC(stage++); >> + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; >> + flush->processor_mask = BIT(WORKER_VCPU_ID_1); >> + flush->gva_list[0] = (u64)test_pages; >> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST | >> + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), >> + pgs_gpa, pgs_gpa + 4096); >> + GUEST_ASSERT((res & 0xffff) == 0); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1); >> + nop_loop(); >> + } >> + >> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */ >> + for (i = 0; i < NTRY; i++) { >> + memset(hcall_page, 0, 4096); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2); >> + GUEST_SYNC(stage++); >> + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS; >> + flush->processor_mask = 0; >> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, pgs_gpa, pgs_gpa + 4096); >> + GUEST_ASSERT((res & 0xffff) == 0); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2); >> + nop_loop(); >> + } >> + >> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */ >> + for (i = 0; i < NTRY; i++) { >> + memset(hcall_page, 0, 4096); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2); >> + GUEST_SYNC(stage++); >> + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS; >> + flush->gva_list[0] = (u64)test_pages; >> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST | >> + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), >> + pgs_gpa, pgs_gpa + 4096); >> + GUEST_ASSERT((res & 0xffff) == 0); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2); >> + nop_loop(); >> + } >> + >> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */ >> + for (i = 0; i < NTRY; i++) { >> + memset(hcall_page, 0, 4096); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2); >> + GUEST_SYNC(stage++); >> + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; >> + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; >> + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64); >> + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64); >> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | >> + (1 << HV_HYPERCALL_VARHEAD_OFFSET), >> + pgs_gpa, pgs_gpa + 4096); >> + GUEST_ASSERT((res & 0xffff) == 0); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2); >> + nop_loop(); >> + } >> + >> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */ >> + for (i = 0; i < NTRY; i++) { >> + memset(hcall_page, 0, 4096); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2); >> + GUEST_SYNC(stage++); >> + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; >> + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; >> + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64); >> + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64); >> + /* bank_contents and gva_list occupy the same space, thus [1] */ >> + flush_ex->gva_list[1] = (u64)test_pages; >> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | >> + (1 << HV_HYPERCALL_VARHEAD_OFFSET) | >> + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), >> + pgs_gpa, pgs_gpa + 4096); >> + GUEST_ASSERT((res & 0xffff) == 0); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2); >> + nop_loop(); >> + } >> + >> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */ >> + for (i = 0; i < NTRY; i++) { >> + memset(hcall_page, 0, 4096); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2); >> + GUEST_SYNC(stage++); >> + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; >> + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; >> + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) | >> + BIT_ULL(WORKER_VCPU_ID_1 / 64); >> + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64); >> + flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64); >> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | >> + (2 << HV_HYPERCALL_VARHEAD_OFFSET), >> + pgs_gpa, pgs_gpa + 4096); >> + GUEST_ASSERT((res & 0xffff) == 0); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2); >> + nop_loop(); >> + } >> + >> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */ >> + for (i = 0; i < NTRY; i++) { >> + memset(hcall_page, 0, 4096); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2); >> + GUEST_SYNC(stage++); >> + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; >> + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; >> + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) | >> + BIT_ULL(WORKER_VCPU_ID_2 / 64); >> + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64); >> + flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64); >> + /* bank_contents and gva_list occupy the same space, thus [2] */ >> + flush_ex->gva_list[2] = (u64)test_pages; >> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | >> + (2 << HV_HYPERCALL_VARHEAD_OFFSET) | >> + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), >> + pgs_gpa, pgs_gpa + 4096); >> + GUEST_ASSERT((res & 0xffff) == 0); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2); >> + nop_loop(); >> + } >> + >> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */ >> + for (i = 0; i < NTRY; i++) { >> + memset(hcall_page, 0, 4096); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2); >> + GUEST_SYNC(stage++); >> + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; >> + flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL; >> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, >> + pgs_gpa, pgs_gpa + 4096); >> + GUEST_ASSERT((res & 0xffff) == 0); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2); >> + nop_loop(); >> + } >> + >> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */ >> + for (i = 0; i < NTRY; i++) { >> + memset(hcall_page, 0, 4096); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2); >> + GUEST_SYNC(stage++); >> + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; >> + flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL; >> + flush_ex->gva_list[0] = (u64)test_pages; >> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | >> + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), >> + pgs_gpa, pgs_gpa + 4096); >> + GUEST_ASSERT((res & 0xffff) == 0); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2); >> + nop_loop(); >> + } >> + >> + /* "Fast" hypercalls */ >> + >> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */ >> + for (i = 0; i < NTRY; i++) { >> + memset(hcall_page, 0, 4096); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2); >> + GUEST_SYNC(stage++); >> + flush->processor_mask = BIT(WORKER_VCPU_ID_1); >> + sync_to_xmm(&flush->processor_mask); >> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | >> + HV_HYPERCALL_FAST_BIT, 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); >> + GUEST_ASSERT((res & 0xffff) == 0); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1); >> + nop_loop(); >> + } >> + >> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */ >> + for (i = 0; i < NTRY; i++) { >> + memset(hcall_page, 0, 4096); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2); >> + GUEST_SYNC(stage++); >> + flush->processor_mask = BIT(WORKER_VCPU_ID_1); >> + flush->gva_list[0] = (u64)test_pages; >> + sync_to_xmm(&flush->processor_mask); >> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST | HV_HYPERCALL_FAST_BIT | >> + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), >> + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); >> + GUEST_ASSERT((res & 0xffff) == 0); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1); >> + nop_loop(); >> + } >> + >> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */ >> + for (i = 0; i < NTRY; i++) { >> + memset(hcall_page, 0, 4096); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2); >> + GUEST_SYNC(stage++); >> + sync_to_xmm(&flush->processor_mask); >> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0, >> + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS); >> + GUEST_ASSERT((res & 0xffff) == 0); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2); >> + nop_loop(); >> + } >> + >> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */ >> + for (i = 0; i < NTRY; i++) { >> + memset(hcall_page, 0, 4096); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2); >> + GUEST_SYNC(stage++); >> + flush->gva_list[0] = (u64)test_pages; >> + sync_to_xmm(&flush->processor_mask); >> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST | HV_HYPERCALL_FAST_BIT | >> + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), 0x0, >> + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS); >> + GUEST_ASSERT((res & 0xffff) == 0); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2); >> + nop_loop(); >> + } >> + >> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */ >> + for (i = 0; i < NTRY; i++) { >> + memset(hcall_page, 0, 4096); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2); >> + GUEST_SYNC(stage++); >> + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; >> + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64); >> + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64); >> + sync_to_xmm(&flush_ex->hv_vp_set); >> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | HV_HYPERCALL_FAST_BIT | >> + (1 << HV_HYPERCALL_VARHEAD_OFFSET), >> + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); >> + GUEST_ASSERT((res & 0xffff) == 0); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2); >> + nop_loop(); >> + } >> + >> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */ >> + for (i = 0; i < NTRY; i++) { >> + memset(hcall_page, 0, 4096); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2); >> + GUEST_SYNC(stage++); >> + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; >> + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64); >> + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64); >> + /* bank_contents and gva_list occupy the same space, thus [1] */ >> + flush_ex->gva_list[1] = (u64)test_pages; >> + sync_to_xmm(&flush_ex->hv_vp_set); >> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | HV_HYPERCALL_FAST_BIT | >> + (1 << HV_HYPERCALL_VARHEAD_OFFSET) | >> + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), >> + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); >> + GUEST_ASSERT((res & 0xffff) == 0); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2); >> + nop_loop(); >> + } >> + >> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */ >> + for (i = 0; i < NTRY; i++) { >> + memset(hcall_page, 0, 4096); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2); >> + GUEST_SYNC(stage++); >> + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; >> + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) | >> + BIT_ULL(WORKER_VCPU_ID_1 / 64); >> + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64); >> + flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64); >> + sync_to_xmm(&flush_ex->hv_vp_set); >> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | HV_HYPERCALL_FAST_BIT | >> + (2 << HV_HYPERCALL_VARHEAD_OFFSET), >> + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); >> + GUEST_ASSERT((res & 0xffff) == 0); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2); >> + nop_loop(); >> + } >> + >> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */ >> + for (i = 0; i < NTRY; i++) { >> + memset(hcall_page, 0, 4096); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2); >> + GUEST_SYNC(stage++); >> + flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; >> + flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) | >> + BIT_ULL(WORKER_VCPU_ID_2 / 64); >> + flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64); >> + flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64); >> + /* bank_contents and gva_list occupy the same space, thus [2] */ >> + flush_ex->gva_list[2] = (u64)test_pages; >> + sync_to_xmm(&flush_ex->hv_vp_set); >> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | HV_HYPERCALL_FAST_BIT | >> + (2 << HV_HYPERCALL_VARHEAD_OFFSET) | >> + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), >> + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); >> + GUEST_ASSERT((res & 0xffff) == 0); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2); >> + nop_loop(); >> + } >> + >> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */ >> + for (i = 0; i < NTRY; i++) { >> + memset(hcall_page, 0, 4096); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2); >> + GUEST_SYNC(stage++); >> + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; >> + flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL; >> + sync_to_xmm(&flush_ex->hv_vp_set); >> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX | HV_HYPERCALL_FAST_BIT, >> + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); >> + GUEST_ASSERT((res & 0xffff) == 0); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2); >> + nop_loop(); >> + } >> + >> + /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */ >> + for (i = 0; i < NTRY; i++) { >> + memset(hcall_page, 0, 4096); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, 0x0, WORKER_VCPU_ID_2); >> + GUEST_SYNC(stage++); >> + flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; >> + flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL; >> + flush_ex->gva_list[0] = (u64)test_pages; >> + sync_to_xmm(&flush_ex->hv_vp_set); >> + res = hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX | HV_HYPERCALL_FAST_BIT | >> + (1UL << HV_HYPERCALL_REP_COMP_OFFSET), >> + 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES); >> + GUEST_ASSERT((res & 0xffff) == 0); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_1); >> + set_expected_char(test_pages, i % 2 ? 0x1 : 0x2, WORKER_VCPU_ID_2); >> + nop_loop(); >> + } >> + >> + GUEST_DONE(); >> +} >> + >> +static void *vcpu_thread(void *arg) >> +{ >> + struct thread_params *params = (struct thread_params *)arg; >> + struct ucall uc; >> + int old; >> + int r; >> + unsigned int exit_reason; >> + >> + r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); >> + TEST_ASSERT(r == 0, >> + "pthread_setcanceltype failed on vcpu_id=%u with errno=%d", >> + params->vcpu_id, r); >> + >> + vcpu_run(params->vm, params->vcpu_id); >> + exit_reason = vcpu_state(params->vm, params->vcpu_id)->exit_reason; >> + >> + TEST_ASSERT(exit_reason == KVM_EXIT_IO, >> + "vCPU %u exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO", >> + params->vcpu_id, exit_reason, exit_reason_str(exit_reason)); >> + >> + if (get_ucall(params->vm, params->vcpu_id, &uc) == UCALL_ABORT) { >> + TEST_ASSERT(false, >> + "vCPU %u exited with error: %s.\n", >> + params->vcpu_id, (const char *)uc.args[0]); >> + } >> + >> + return NULL; >> +} >> + >> +static void cancel_join_vcpu_thread(pthread_t thread, uint32_t vcpu_id) >> +{ >> + void *retval; >> + int r; >> + >> + r = pthread_cancel(thread); >> + TEST_ASSERT(r == 0, >> + "pthread_cancel on vcpu_id=%d failed with errno=%d", >> + vcpu_id, r); >> + >> + r = pthread_join(thread, &retval); >> + TEST_ASSERT(r == 0, >> + "pthread_join on vcpu_id=%d failed with errno=%d", >> + vcpu_id, r); >> + TEST_ASSERT(retval == PTHREAD_CANCELED, >> + "expected retval=%p, got %p", PTHREAD_CANCELED, >> + retval); >> +} >> + >> +int main(int argc, char *argv[]) >> +{ >> + int r; >> + pthread_t threads[2]; >> + struct thread_params params[2]; >> + struct kvm_vm *vm; >> + struct kvm_run *run; >> + vm_vaddr_t hcall_page, test_pages; >> + struct ucall uc; >> + int stage = 1; >> + >> + vm = vm_create_default(SENDER_VCPU_ID, 0, sender_guest_code); >> + params[0].vm = vm; >> + params[1].vm = vm; >> + >> + /* Hypercall input/output */ >> + hcall_page = vm_vaddr_alloc_pages(vm, 2); >> + memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); >> + >> + /* >> + * Test pages: the first one is filled with '0x1's, the second with '0x2's >> + * and the test will swap their mappings. The third page keeps the indication >> + * about the current state of mappings. >> + */ >> + test_pages = vm_vaddr_alloc_pages(vm, 3); >> + memset(addr_gva2hva(vm, test_pages), 0x1, 4096); >> + memset(addr_gva2hva(vm, test_pages) + 4096, 0x2, 4096); >> + set_expected_char(addr_gva2hva(vm, test_pages), 0x0, WORKER_VCPU_ID_1); >> + set_expected_char(addr_gva2hva(vm, test_pages), 0x0, WORKER_VCPU_ID_2); >> + >> + vm_vcpu_add_default(vm, WORKER_VCPU_ID_1, worker_code); >> + vcpu_args_set(vm, WORKER_VCPU_ID_1, 2, test_pages, addr_gva2gpa(vm, hcall_page)); >> + vcpu_set_msr(vm, WORKER_VCPU_ID_1, HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_1); >> + vcpu_set_hv_cpuid(vm, WORKER_VCPU_ID_1); >> + >> + vm_vcpu_add_default(vm, WORKER_VCPU_ID_2, worker_code); >> + vcpu_args_set(vm, WORKER_VCPU_ID_2, 2, test_pages, addr_gva2gpa(vm, hcall_page)); >> + vcpu_set_msr(vm, WORKER_VCPU_ID_2, HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_2); >> + vcpu_set_hv_cpuid(vm, WORKER_VCPU_ID_2); >> + >> + vcpu_args_set(vm, SENDER_VCPU_ID, 3, hcall_page, test_pages, >> + addr_gva2gpa(vm, hcall_page)); > > It seems that all worker vCPUs get pointer to the hypercall page, > which they don't need and if used will create a race. > Dropped (actually, I've created a new 'test_data' structure which is shared by workers and sender). > >> + vcpu_set_hv_cpuid(vm, SENDER_VCPU_ID); >> + >> + params[0].vcpu_id = WORKER_VCPU_ID_1; >> + r = pthread_create(&threads[0], NULL, vcpu_thread, ¶ms[0]); >> + TEST_ASSERT(r == 0, >> + "pthread_create halter failed errno=%d", errno); >> + >> + params[1].vcpu_id = WORKER_VCPU_ID_2; >> + r = pthread_create(&threads[1], NULL, vcpu_thread, ¶ms[1]); >> + TEST_ASSERT(r == 0, >> + "pthread_create halter failed errno=%d", errno); > > Also here worker threads don't halt, the message was not updated I think. > Fixed! > >> + >> + run = vcpu_state(vm, SENDER_VCPU_ID); >> + >> + while (true) { >> + r = _vcpu_run(vm, SENDER_VCPU_ID); >> + TEST_ASSERT(!r, "vcpu_run failed: %d\n", r); >> + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, >> + "unexpected exit reason: %u (%s)", >> + run->exit_reason, exit_reason_str(run->exit_reason)); >> + >> + switch (get_ucall(vm, SENDER_VCPU_ID, &uc)) { >> + case UCALL_SYNC: >> + TEST_ASSERT(uc.args[1] == stage, >> + "Unexpected stage: %ld (%d expected)\n", >> + uc.args[1], stage); >> + break; >> + case UCALL_ABORT: >> + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], >> + __FILE__, uc.args[1]); >> + return 1; >> + case UCALL_DONE: >> + return 0; >> + } >> + >> + /* Swap test pages */ >> + if (stage % 2) { >> + __virt_pg_map(vm, test_pages, addr_gva2gpa(vm, test_pages) + 4096, >> + X86_PAGE_SIZE_4K, true); >> + __virt_pg_map(vm, test_pages + 4096, addr_gva2gpa(vm, test_pages) - 4096, >> + X86_PAGE_SIZE_4K, true); >> + } else { >> + __virt_pg_map(vm, test_pages, addr_gva2gpa(vm, test_pages) - 4096, >> + X86_PAGE_SIZE_4K, true); >> + __virt_pg_map(vm, test_pages + 4096, addr_gva2gpa(vm, test_pages) + 4096, >> + X86_PAGE_SIZE_4K, true); >> + } > > Another question: why the host doing the swapping of the pages? Since > !EPT/!NPT is not the goal of this test, > > no doubt, why not let the guest vCPU (the sender) do the swapping, which should eliminate the VM exits > to the host (which can interfere with TLB flush even) and make it > closer to the real world usage. This is actually a good idea. It required some APIs to be exported and some trickery so the guest can actually reach its PTEs but I think it's worth it so the next version will be doing all updates from the guest itself. > > >> + >> + stage++; >> + } >> + >> + cancel_join_vcpu_thread(threads[0], WORKER_VCPU_ID_1); >> + cancel_join_vcpu_thread(threads[1], WORKER_VCPU_ID_2); >> + kvm_vm_free(vm); >> + >> + return 0; >> +} > > -- Vitaly