Re: [RFC v4 8/8] KVM: selftests: Add a self test for UCNA injection.

Jue Wang <juew@xxxxxxxxxx> · Fri, 20 May 2022 15:16:04 -0700

Thanks, David.

On Fri, May 20, 2022 at 2:08 PM David Matlack <dmatlack@xxxxxxxxxx> wrote:
>
> On Fri, May 20, 2022 at 10:37 AM Jue Wang <juew@xxxxxxxxxx> wrote:
> >
> > This patch add a self test that verifies user space can inject
> > UnCorrectable No Action required (UCNA) memory errors to the guest.
> >
> > Signed-off-by: Jue Wang <juew@xxxxxxxxxx>
> > ---
> >  tools/testing/selftests/kvm/.gitignore        |   1 +
> >  tools/testing/selftests/kvm/Makefile          |   1 +
> >  .../selftests/kvm/include/x86_64/apic.h       |   1 +
> >  .../selftests/kvm/include/x86_64/mce.h        |  25 ++
> >  .../selftests/kvm/include/x86_64/processor.h  |   1 +
> >  .../selftests/kvm/lib/x86_64/processor.c      |   2 +-
> >  .../kvm/x86_64/ucna_injection_test.c          | 287 ++++++++++++++++++
> >  7 files changed, 317 insertions(+), 1 deletion(-)
> >  create mode 100644 tools/testing/selftests/kvm/include/x86_64/mce.h
> >  create mode 100644 tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
> >
> > diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
> > index 0b0e4402bba6..a8eb9d3f082c 100644
> > --- a/tools/testing/selftests/kvm/.gitignore
> > +++ b/tools/testing/selftests/kvm/.gitignore
> > @@ -37,6 +37,7 @@
> >  /x86_64/tsc_scaling_sync
> >  /x86_64/sync_regs_test
> >  /x86_64/tsc_msrs_test
> > +/x86_64/ucna_injection_test
> >  /x86_64/userspace_io_test
> >  /x86_64/userspace_msr_exit_test
> >  /x86_64/vmx_apic_access_test
> > diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
> > index 681b173aa87c..8d9858b54b98 100644
> > --- a/tools/testing/selftests/kvm/Makefile
> > +++ b/tools/testing/selftests/kvm/Makefile
> > @@ -66,6 +66,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
> >  TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
> >  TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test
> >  TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
> > +TEST_GEN_PROGS_x86_64 += x86_64/ucna_injection_test
> >  TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test
> >  TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
> >  TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
> > diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86_64/apic.h
> > index ac88557dcc9a..bed316fdecd5 100644
> > --- a/tools/testing/selftests/kvm/include/x86_64/apic.h
> > +++ b/tools/testing/selftests/kvm/include/x86_64/apic.h
> > @@ -35,6 +35,7 @@
> >  #define                APIC_SPIV_APIC_ENABLED          (1 << 8)
> >  #define APIC_IRR       0x200
> >  #define        APIC_ICR        0x300
> > +#define        APIC_LVTCMCI    0x2f0
> >  #define                APIC_DEST_SELF          0x40000
> >  #define                APIC_DEST_ALLINC        0x80000
> >  #define                APIC_DEST_ALLBUT        0xC0000
> > diff --git a/tools/testing/selftests/kvm/include/x86_64/mce.h b/tools/testing/selftests/kvm/include/x86_64/mce.h
> > new file mode 100644
> > index 000000000000..6119321f3f5d
> > --- /dev/null
> > +++ b/tools/testing/selftests/kvm/include/x86_64/mce.h
> > @@ -0,0 +1,25 @@
> > +/* SPDX-License-Identifier: GPL-2.0-only */
> > +/*
> > + * tools/testing/selftests/kvm/include/x86_64/mce.h
> > + *
> > + * Copyright (C) 2022, Google LLC.
> > + */
> > +
> > +#ifndef SELFTEST_KVM_MCE_H
> > +#define SELFTEST_KVM_MCE_H
> > +
> > +#define MCG_CTL_P              BIT_ULL(8)   /* MCG_CTL register available */
> > +#define MCG_SER_P              BIT_ULL(24)  /* MCA recovery/new status bits */
> > +#define MCG_LMCE_P             BIT_ULL(27)  /* Local machine check supported */
> > +#define MCG_CMCI_P             BIT_ULL(10)  /* CMCI supported */
> > +#define KVM_MAX_MCE_BANKS 32
> > +#define MCG_CAP_BANKS_MASK 0xff       /* Bit 0-7 of the MCG_CAP register are #banks */
> > +#define MCI_STATUS_VAL (1ULL << 63)   /* valid error */
> > +#define MCI_STATUS_UC (1ULL << 61)    /* uncorrected error */
> > +#define MCI_STATUS_EN (1ULL << 60)    /* error enabled */
> > +#define MCI_STATUS_MISCV (1ULL << 59) /* misc error reg. valid */
> > +#define MCI_STATUS_ADDRV (1ULL << 58) /* addr reg. valid */
> > +#define MCM_ADDR_PHYS 2    /* physical address */
> > +#define MCI_CTL2_CMCI_EN               BIT_ULL(30)
> > +
> > +#endif /* SELFTEST_KVM_MCE_H */
> > diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
> > index d0d51adec76e..fa316c3b7725 100644
> > --- a/tools/testing/selftests/kvm/include/x86_64/processor.h
> > +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
> > @@ -481,6 +481,7 @@ struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void);
> >  void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
> >  struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
> >  void vm_xsave_req_perm(int bit);
> > +void vcpu_setup(struct kvm_vm *vm, int vcpuid);
> >
> >  enum x86_page_size {
> >         X86_PAGE_SIZE_4K = 0,
> > diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
> > index 33ea5e9955d9..bb1ef665cd10 100644
> > --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
> > +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
> > @@ -580,7 +580,7 @@ static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
> >         kvm_seg_fill_gdt_64bit(vm, segp);
> >  }
> >
> > -static void vcpu_setup(struct kvm_vm *vm, int vcpuid)
> > +void vcpu_setup(struct kvm_vm *vm, int vcpuid)
> >  {
> >         struct kvm_sregs sregs;
> >
> > diff --git a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
> > new file mode 100644
> > index 000000000000..104a9f116b23
> > --- /dev/null
> > +++ b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
> > @@ -0,0 +1,287 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * ucna_injection_test
> > + *
> > + * Copyright (C) 2022, Google LLC.
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2.
> > + *
> > + * Test that user space can inject UnCorrectable No Action required (UCNA)
> > + * memory errors to the guest.
> > + *
> > + * The test starts one vCPU with the MCG_CMCI_P enabled. It verifies that
> > + * proper UCNA errors can be injected to a vCPU with MCG_CMCI_P and
> > + * corresponding per-bank control register (MCI_CTL2) bit enabled.
> > + * The test also checks that the UCNA errors get recorded in the
> > + * Machine Check bank registers no matter the error signal interrupts get
> > + * delivered into the guest or not.
> > + *
> > + */
> > +
> > +#define _GNU_SOURCE /* for program_invocation_short_name */
> > +#include <pthread.h>
> > +#include <inttypes.h>
> > +#include <string.h>
> > +#include <time.h>
> > +
> > +#include "kvm_util_base.h"
> > +#include "kvm_util.h"
> > +#include "mce.h"
> > +#include "processor.h"
> > +#include "test_util.h"
> > +#include "apic.h"
> > +
> > +#define UCNA_VCPU_ID 0
> > +#define SYNC_FIRST_UCNA 9
> > +#define SYNC_SECOND_UCNA 10
> > +#define FIRST_UCNA_ADDR 0xdeadbeef
> > +#define SECOND_UCNA_ADDR 0xcafeb0ba
> > +
> > +/*
> > + * Vector for the CMCI interrupt.
> > + * Value is arbitrary. Any value in 0x20-0xFF should work:
> > + * https://wiki.osdev.org/Interrupt_Vector_Table
> > + */
> > +#define CMCI_VECTOR  0xa9
> > +
> > +#define UCNA_BANK  0x7 // IMC0 bank
> > +
> > +/*
> > + * Record states about the injected UCNA.
> > + * The variables started with the 'i_' prefixes are recorded in interrupt
> > + * handler. Variables without the 'i_' prefixes are recorded in guest main
> > + * execution thread.
> > + */
> > +static volatile uint64_t i_ucna_rcvd;
> > +static volatile uint64_t i_ucna_addr;
> > +static volatile uint64_t ucna_addr;
> > +static volatile uint64_t ucna_addr2;
> > +
> > +struct thread_params {
> > +       struct kvm_vm *vm;
> > +       uint32_t vcpu_id;
> > +};
> > +
> > +static void verify_apic_base_addr(void)
> > +{
> > +       uint64_t msr = rdmsr(MSR_IA32_APICBASE);
> > +       uint64_t base = GET_APIC_BASE(msr);
> > +
> > +       GUEST_ASSERT(base == APIC_DEFAULT_GPA);
> > +}
> > +
> > +static void guest_code(void)
> > +{
> > +       uint64_t ctl2;
> > +       verify_apic_base_addr();
> > +       xapic_enable();
> > +
> > +       /* Sets up the interrupt vector and enables per-bank CMCI sigaling. */
> > +       xapic_write_reg(APIC_LVTCMCI, CMCI_VECTOR | APIC_DM_FIXED);
> > +       ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
> > +       wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
> > +
> > +       /* Enables interrupt in guest. */
> > +       asm volatile("sti");
> > +
> > +       /* Let user space inject the first UCNA */
> > +       GUEST_SYNC(SYNC_FIRST_UCNA);
> > +
> > +       ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
> > +
> > +       /* Disables the per-bank CMCI signaling. */
> > +       ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
> > +       wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 & ~MCI_CTL2_CMCI_EN);
> > +
> > +       /* Let the user space inject the second UCNA */
> > +       GUEST_SYNC(SYNC_SECOND_UCNA);
> > +
> > +       ucna_addr2 = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
> > +       GUEST_DONE();
> > +}
> > +
> > +static void guest_cmci_handler(struct ex_regs *regs)
> > +{
> > +       i_ucna_rcvd++;
> > +       i_ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
> > +       xapic_write_reg(APIC_EOI, 0);
> > +}
> > +
> > +static void inject_ucna(struct kvm_vm *vm, uint32_t vcpu_id, uint64_t addr) {
> > +       /*
> > +        * A UCNA error is indicated with VAL=1, UC=1, PCC=0, S=0 and AR=0 in
> > +        * the IA32_MCi_STATUS register.
> > +        * MSCOD=1 (BIT[16] - MscodDataRdErr).
> > +        * MCACOD=0x0090 (Memory controller error format, channel 0)
> > +        */
> > +       uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
> > +                         MCI_STATUS_MISCV | MCI_STATUS_ADDRV | 0x10090;
> > +       struct kvm_x86_mce mce = {};
> > +       mce.status = status;
> > +       mce.mcg_status = 0;
> > +       /*
> > +        * MCM_ADDR_PHYS indicates the reported address is a physical address.
> > +        * Lowest 6 bits is the recoverable address LSB, i.e., the injected MCE
> > +        * is at 4KB granularity.
> > +        */
> > +       mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
> > +       mce.addr = addr;
> > +       mce.bank = UCNA_BANK;
> > +
> > +       TEST_ASSERT(_vcpu_ioctl(vm, vcpu_id, KVM_X86_SET_MCE, &mce) != -1,
> > +                       "Inject UCNA");
> > +}
> > +
> > +static void *vcpu_thread(void *arg)
> > +{
> > +       struct thread_params *params = (struct thread_params *)arg;
> > +       struct ucall uc;
> > +       int old;
> > +       int r;
> > +       unsigned int exit_reason;
> > +
> > +       r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
> > +       TEST_ASSERT(r == 0,
> > +                   "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
> > +                   params->vcpu_id, r);
> > +
> > +       fprintf(stderr, "vCPU thread running vCPU %u\n", params->vcpu_id);
> > +       vcpu_run(params->vm, params->vcpu_id);
> > +
> > +       exit_reason = vcpu_state(params->vm, params->vcpu_id)->exit_reason;
> > +       TEST_ASSERT(exit_reason == KVM_EXIT_IO,
> > +                   "vCPU %u exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO",
> > +                   params->vcpu_id, exit_reason, exit_reason_str(exit_reason));
> > +       TEST_ASSERT(get_ucall(params->vm, params->vcpu_id, &uc) == UCALL_SYNC,
> > +                   "Expect UCALL_SYNC\n");
> > +       TEST_ASSERT(uc.args[1] == SYNC_FIRST_UCNA, "Injecting first UCNA.");
> > +
> > +       fprintf(stderr, "Injecting first UCNA at %#x.\n", FIRST_UCNA_ADDR);
> > +
> > +       inject_ucna(params->vm, params->vcpu_id, FIRST_UCNA_ADDR);
> > +       vcpu_run(params->vm, params->vcpu_id);
> > +
> > +       exit_reason = vcpu_state(params->vm, params->vcpu_id)->exit_reason;
> > +       TEST_ASSERT(exit_reason == KVM_EXIT_IO,
> > +                   "vCPU %u exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO",
> > +                   params->vcpu_id, exit_reason, exit_reason_str(exit_reason));
> > +       TEST_ASSERT(get_ucall(params->vm, params->vcpu_id, &uc) == UCALL_SYNC,
> > +                   "Expect UCALL_SYNC\n");
> > +       TEST_ASSERT(uc.args[1] == SYNC_SECOND_UCNA, "Injecting second UCNA.");
> > +
> > +       fprintf(stderr, "Injecting second UCNA at %#x.\n", SECOND_UCNA_ADDR);
> > +
> > +       inject_ucna(params->vm, params->vcpu_id, SECOND_UCNA_ADDR);
> > +       vcpu_run(params->vm, params->vcpu_id);
> > +
> > +       exit_reason = vcpu_state(params->vm, params->vcpu_id)->exit_reason;
> > +       TEST_ASSERT(exit_reason == KVM_EXIT_IO,
> > +                   "vCPU %u exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO",
> > +                   params->vcpu_id, exit_reason, exit_reason_str(exit_reason));
> > +       if (get_ucall(params->vm, params->vcpu_id, &uc) == UCALL_ABORT) {
> > +               TEST_ASSERT(false,
> > +                           "vCPU %u exited with error: %s.\n",
> > +                           params->vcpu_id, (const char *)uc.args[0]);
> > +       }
> > +
> > +       return NULL;
> > +}
> > +
> > +static void setup_mce_cap(struct kvm_vm *vm, uint32_t vcpuid)
> > +{
> > +       uint64_t supported_mcg_caps = 0;
> > +       uint64_t mcg_caps = MCG_CMCI_P | MCG_CTL_P | MCG_SER_P | MCG_LMCE_P
> > +               | KVM_MAX_MCE_BANKS;
> > +
> > +       TEST_ASSERT(_kvm_ioctl(vm, KVM_X86_GET_MCE_CAP_SUPPORTED,
> > +                              &supported_mcg_caps) != -1,
> > +                   "KVM_GET_MCE_CAP_SUPPORTED");
> > +       fprintf(stderr, "KVM supported MCG_CAP: %#lx\n", supported_mcg_caps);
> > +
> > +       TEST_ASSERT(supported_mcg_caps & MCG_CMCI_P, "MCG_CMCI_P is not supported");
>
> Instead of failing the test on older kernels, mark it as skipped.
>
> if (!(supported_mcg_caps & MCG_CMCI_P)) {
>         print_skip(" MCG_CMCI_P is not supported");
>         exit(KSFT_SKIP);
> }
>
Thanks, updated.
> > +
> > +       mcg_caps &= supported_mcg_caps | MCG_CAP_BANKS_MASK;
> > +       TEST_ASSERT(_vcpu_ioctl(vm, vcpuid, KVM_X86_SETUP_MCE, &mcg_caps) != -1,
> > +                   "KVM_X86_SETUP_MCE");
> > +}
> > +
> > +static void create_vcpu_with_mce_cap(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
> > +{
> > +       struct kvm_mp_state mp_state;
> > +       struct kvm_regs regs;
> > +       vm_vaddr_t stack_vaddr;
> > +       stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
> > +                                    DEFAULT_GUEST_STACK_VADDR_MIN);
> > +
> > +       vm_vcpu_add(vm, vcpuid);
> > +       setup_mce_cap(vm, vcpuid);
>
> Is some ordering requirement between KVM_X86_SETUP_MCE and other vCPU
> ioctls? I'm wondering why this function has to duplicate so much of
> the vCPU setup code.

Yes, KVM_X86_SETUP_MCE sets up the per vCPU mcg_cap and other MCE
related registers.

This has to be done before kvm_lapic_reset or
kvm_vcpu_after_set_cpuid because otherwise the kvm_apic_set_version
call will effectively set a value in APIC_LVR register that the number
of supported APIC_LVT* vectors is 5 instead of 6 (including CMCI).

In practice, this is how we would configure the per-vCPU
KVM_X86_SETUP_MCE in some other user space VMM impl (and the way it
has been done).
> > +
> > +       vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
> > +       vcpu_setup(vm, vcpuid);
> > +
> > +       /* Setup guest general purpose registers */
> > +       vcpu_regs_get(vm, vcpuid, &regs);
> > +       regs.rflags = regs.rflags | 0x2;
> > +       regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize());
> > +       regs.rip = (unsigned long) guest_code;
> > +       vcpu_regs_set(vm, vcpuid, &regs);
> > +
> > +       /* Setup the MP state */
> > +       mp_state.mp_state = 0;
> > +       vcpu_set_mp_state(vm, vcpuid, &mp_state);
> > +}
> > +
> > +int main(int argc, char *argv[])
> > +{
> > +       int r;
> > +       void *retval;
> > +       int run_secs = 3;
> > +       pthread_t threads[2];
> > +       struct thread_params params[2];
> > +       struct kvm_vm *vm;
> > +       uint64_t *p_i_ucna_rcvd, *p_i_ucna_addr;
> > +       uint64_t *p_ucna_addr, *p_ucna_addr2;
> > +
> > +       kvm_check_cap(KVM_CAP_MCE);
> > +
> > +       vm = vm_create_without_vcpus(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES);
> > +
> > +       create_vcpu_with_mce_cap(vm, UCNA_VCPU_ID, guest_code);
> > +
> > +       params[0].vm = vm;
> > +
> > +       vm_init_descriptor_tables(vm);
> > +       vcpu_init_descriptor_tables(vm, UCNA_VCPU_ID);
> > +       vm_install_exception_handler(vm, CMCI_VECTOR, guest_cmci_handler);
> > +
> > +       virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
> > +
> > +       p_i_ucna_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_rcvd);
> > +       p_i_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_addr);
> > +       p_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr);
> > +       p_ucna_addr2 = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr2);
> > +
> > +       /* Start vCPU thread and wait for it to execute first HLT. */
>
> Stale comment from xapic_ipi_test.c
>
> > +       params[0].vcpu_id = UCNA_VCPU_ID;
> > +       r = pthread_create(&threads[0], NULL, vcpu_thread, &params[0]);
> > +       TEST_ASSERT(r == 0,
> > +                   "pthread_create vcpu thread failed errno=%d", errno);
> > +       fprintf(stderr, "vCPU thread started\n");
> > +
> > +       sleep(run_secs);
> > +
> > +       r = pthread_join(threads[0], &retval);
> > +       TEST_ASSERT(r == 0,
> > +                   "pthread_join on vcpu_id=%d failed with errno=%d",
> > +                   UCNA_VCPU_ID, r);
>
> Creating a here is unnecessary, e.g. you could just replace all this
> code with a call to vcpu_thread().

Good point, updated.
>
> > +
> > +       fprintf(stderr,
> > +               "Test successful after running for %d seconds.\n"
> > +               "UCNA CMCI interrupts received: %ld\n"
> > +               "Last UCNA address received via CMCI: %lx\n"
> > +               "First UCNA address in vCPU thread: %lx\n"
> > +               "Second UCNA address in vCPU thread: %lx\n",
> > +               run_secs, *p_i_ucna_rcvd, *p_i_ucna_addr, *p_ucna_addr, *p_ucna_addr2);
>
> Instead of printing out all these fields, can you make assertions
> about them? e.g. Assert that the expected number of interrupts were
> received, etc.

Updated.
>
> > +
> > +       kvm_vm_free(vm);
> > +}
> > --
> > 2.36.1.124.g0e6072fb45-goog
> >