Re: [PATCH 1/2] kvm/x86: Hyper-V based guest crash data handling

Peter Hornyack <peterhornyack@xxxxxxxxxx> · Fri, 12 Jun 2015 16:03:51 -0700

Hi Denis, Andrey, I have a few comments and questions. (re-sending in
plain-text mode, apologies for sending twice.)

On Thu, Jun 11, 2015 at 6:18 AM, Denis V. Lunev <den@xxxxxxxxxx> wrote:
> From: Andrey Smetanin <asmetanin@xxxxxxxxxxxxx>
>
> Windows 2012 guests can notify hypervisor about occurred guest crash
> (Windows bugcheck(BSOD)) by writing specific Hyper-V msrs. This patch does
> handling of this MSR's by KVM and sending notification to user space that
> allows to gather Windows guest crash dump by QEMU/LIBVIRT.
>
> The idea is to provide functionality equal to pvpanic device without
> QEMU guest agent for Windows.
>
> The idea is borrowed from Linux HyperV bus driver and validated against
> Windows 2k12.
>
> Signed-off-by: Andrey Smetanin <asmetanin@xxxxxxxxxxxxx>
> Signed-off-by: Denis V. Lunev <den@xxxxxxxxxx>
> CC: Gleb Natapov <gleb@xxxxxxxxxx>
> CC: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> ---
>  arch/x86/include/uapi/asm/hyperv.h | 10 +++++
>  arch/x86/kvm/Makefile              |  2 +-
>  arch/x86/kvm/mshv.c                | 84 ++++++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/mshv.h                | 32 +++++++++++++++
>  arch/x86/kvm/x86.c                 | 25 ++++++++++++
>  include/linux/kvm_host.h           | 17 ++++++++
>  include/uapi/linux/kvm.h           | 11 +++++
>  7 files changed, 180 insertions(+), 1 deletion(-)
>  create mode 100644 arch/x86/kvm/mshv.c
>  create mode 100644 arch/x86/kvm/mshv.h
>
> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> index ce6068d..25f3064 100644
> --- a/arch/x86/include/uapi/asm/hyperv.h
> +++ b/arch/x86/include/uapi/asm/hyperv.h
> @@ -199,6 +199,16 @@
>  #define HV_X64_MSR_STIMER3_CONFIG              0x400000B6
>  #define HV_X64_MSR_STIMER3_COUNT               0x400000B7
>
> +
> +/* Hypev-V guest crash notification MSR's */
> +#define HV_X64_MSR_CRASH_P0                    0x40000100
> +#define HV_X64_MSR_CRASH_P1                    0x40000101
> +#define HV_X64_MSR_CRASH_P2                    0x40000102
> +#define HV_X64_MSR_CRASH_P3                    0x40000103
> +#define HV_X64_MSR_CRASH_P4                    0x40000104
> +#define HV_X64_MSR_CRASH_CTL                   0x40000105
> +#define HV_CRASH_CTL_CRASH_NOTIFY              (1ULL << 63)
> +
>  #define HV_X64_MSR_HYPERCALL_ENABLE            0x00000001
>  #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT        12
>  #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \
> diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
> index 16e8f96..b1ec24d 100644
> --- a/arch/x86/kvm/Makefile
> +++ b/arch/x86/kvm/Makefile
> @@ -12,7 +12,7 @@ kvm-y                 += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
>  kvm-$(CONFIG_KVM_ASYNC_PF)     += $(KVM)/async_pf.o
>
>  kvm-y                  += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
> -                          i8254.o ioapic.o irq_comm.o cpuid.o pmu.o
> +                          i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mshv.o
>  kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)    += assigned-dev.o iommu.o
>  kvm-intel-y            += vmx.o
>  kvm-amd-y              += svm.o
> diff --git a/arch/x86/kvm/mshv.c b/arch/x86/kvm/mshv.c
> new file mode 100644
> index 0000000..ad367c44
> --- /dev/null
> +++ b/arch/x86/kvm/mshv.c
> @@ -0,0 +1,84 @@
> +/*
> + * KVM Microsoft Hyper-V extended paravirtualization
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + * Copyright (C) 2015 Andrey Smetanin <asmetanin@xxxxxxxxxxxxx>
> + *
> + * Authors: Andrey Smetanin asmetanin@xxxxxxxxxxxxx
> + */
> +
> +#include <linux/kvm_host.h>
> +#include "mshv.h"
> +
> +int kvm_mshv_ctx_create(struct kvm *kvm)
> +{
> +       struct kvm_mshv_ctx *ctx;
> +
> +       ctx = kzalloc(sizeof(struct kvm_mshv_ctx), GFP_KERNEL);
> +       if (!ctx)
> +               return -ENOMEM;
> +
> +       ctx->kvm = kvm;
> +       atomic_set(&ctx->crash_pending, 0);
> +       kvm->mshv_ctx = ctx;
> +       return 0;
> +}
> +
> +void kvm_mshv_ctx_destroy(struct kvm *kvm)
> +{
> +       kfree(kvm->mshv_ctx);
> +}
> +
> +int kvm_mshv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
> +{
> +       struct kvm_mshv_ctx *ctx = kvm_vcpu_get_mshv_ctx(vcpu);
> +
> +       atomic_set(&ctx->crash_pending, 1);

Can you explain why crash_pending is needed?

Do you know what the Windows guest behavior is here? From my reading
of the Hyper-V TLFS 4.0, the guest will read HV_X64_MSR_CRASH_CTL at
some point (not necessarily at the time of the crash, potentially at
boot time?) to determine the crash actions that the hypervisor
supports. I'm not sure why kvm needs to remember any state on reads
from HV_X64_MSR_CRASH_CTL.

 > +
> +       /* Response that KVM ready to receive crash data */
> +       *pdata = HV_CRASH_CTL_CRASH_NOTIFY;

The TLFS says that CrashNotify is the only current action that is
supported on a guest crash, but other actions may be supported in the
future. I suggest defining something like
HV_X64_MSR_CRASH_CTL_CONTENTS (see below), which will only have
HV_CRASH_CTL_CRASH_NOTIFY set for now but may have other bits set in
the future, and then setting *pdata = HV_X64_MSR_CRASH_CTL_CONTENTS
here.

> +       return 0;
> +}
> +
> +int kvm_mshv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> +{
> +       struct kvm_mshv_ctx *ctx = kvm_vcpu_get_mshv_ctx(vcpu);
> +
> +       if (atomic_dec_and_test(&ctx->crash_pending)) {

The TLFS says: "To invoke a supported hypervisor guest crash action, a
child partition writes to the HV_X64_MSR_CRASH_CTL register,
specifying the desired action." However, this implementation doesn't
actually check the data that's written to this register. The condition
here should check for "data & HV_CRASH_CTL_CRASH_NOTIFY", right?

I'm still not clear on the need for crash_pending here... maybe there
are concurrency reasons that I haven't thought through.

> +               pr_debug("vcpu %p 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx",
> +                        vcpu, ctx->crash_p0, ctx->crash_p1, ctx->crash_p2,
> +                        ctx->crash_p3, ctx->crash_p4);
> +
> +               /* Crash data almost gathered so notify user space */
> +               kvm_make_request(KVM_REQ_MSHV_CRASH, vcpu);
> +       }
> +
> +       return 0;
> +}
> +
> +int kvm_mshv_msr_set_crash_data(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> +{
> +       struct kvm_mshv_ctx *ctx = kvm_vcpu_get_mshv_ctx(vcpu);
> +
> +       switch (msr) {
> +       case HV_X64_MSR_CRASH_P0:
> +               ctx->crash_p0 = data;
> +               return 0;
> +       case HV_X64_MSR_CRASH_P1:
> +               ctx->crash_p1 = data;
> +               return 0;
> +       case HV_X64_MSR_CRASH_P2:
> +               ctx->crash_p2 = data;
> +               return 0;
> +       case HV_X64_MSR_CRASH_P3:
> +               ctx->crash_p3 = data;
> +               return 0;
> +       case HV_X64_MSR_CRASH_P4:
> +               ctx->crash_p4 = data;
> +               return 0;
> +       default:
> +               return -EINVAL;
> +       }
> +}
> diff --git a/arch/x86/kvm/mshv.h b/arch/x86/kvm/mshv.h
> new file mode 100644
> index 0000000..ce8e7fa
> --- /dev/null
> +++ b/arch/x86/kvm/mshv.h
> @@ -0,0 +1,32 @@
> +/*
> + * KVM Microsoft Hyper-V extended paravirtualization
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + * Copyright (C) 2015 Andrey Smetanin <asmetanin@xxxxxxxxxxxxx>
> + *
> + * Authors: Andrey Smetanin asmetanin@xxxxxxxxxxxxx
> + */
> +
> +#ifndef __ARCH_X86_KVM_MSHV_H__
> +#define __ARCH_X86_KVM_MSHV_H__
> +

I suggest here:

#define HV_X64_MSR_CRASH_CTL_CONTENTS  \
        (HV_CRASH_CTL_CRASH_NOTIFY)

To allow for more crash actions to be added in the future.

> +static inline struct kvm_mshv_ctx *kvm_get_mshv_ctx(struct kvm *vm)
> +{
> +       return vm->mshv_ctx;
> +}
> +
> +static inline struct kvm_mshv_ctx *kvm_vcpu_get_mshv_ctx(struct kvm_vcpu *vcpu)
> +{
> +       return vcpu->kvm->mshv_ctx;
> +}
> +
> +int kvm_mshv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
> +int kvm_mshv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u32 msr, u64 data);
> +int kvm_mshv_msr_set_crash_data(struct kvm_vcpu *vcpu, u32 msr, u64 data);
> +
> +int kvm_mshv_ctx_create(struct kvm *kvm);
> +void kvm_mshv_ctx_destroy(struct kvm *kvm);
> +
> +#endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index ea306ad..388b58f 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -28,6 +28,7 @@
>  #include "x86.h"
>  #include "cpuid.h"
>  #include "assigned-dev.h"
> +#include "mshv.h"
>
>  #include <linux/clocksource.h>
>  #include <linux/interrupt.h>
> @@ -2338,6 +2339,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>                 } else
>                         return set_msr_hyperv(vcpu, msr, data);
>                 break;
> +       case HV_X64_MSR_CRASH_CTL:
> +               return kvm_mshv_msr_set_crash_ctl(vcpu, msr, data);
> +       case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
> +               return kvm_mshv_msr_set_crash_data(vcpu, msr, data);
>         case MSR_IA32_BBL_CR_CTL3:
>                 /* Drop writes to this legacy MSR -- see rdmsr
>                  * counterpart for further detail.
> @@ -2650,6 +2655,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>                 } else
>                         return get_msr_hyperv(vcpu, msr, pdata);
>                 break;
> +       case HV_X64_MSR_CRASH_CTL:
> +               return kvm_mshv_msr_get_crash_ctl(vcpu, msr, pdata);

Have the Windows guests that you've tested ever tried to read from the
data registers HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4? Is there
any harm in implementing them for reads? The TLFS doesn't seem to say
if reads are permitted or not. Not terribly important, but might be
nice to have.

>         case MSR_IA32_BBL_CR_CTL3:
>                 /* This legacy MSR exists but isn't fully documented in current
>                  * silicon.  It is however accessed by winxp in very narrow
> @@ -6280,6 +6287,20 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>                         vcpu_scan_ioapic(vcpu);
>                 if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
>                         kvm_vcpu_reload_apic_access_page(vcpu);
> +               if (kvm_check_request(KVM_REQ_MSHV_CRASH, vcpu)) {
> +                       struct kvm_mshv_ctx *ctx = kvm_vcpu_get_mshv_ctx(vcpu);
> +
> +                       vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
> +                       vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
> +                       vcpu->run->system_event.flags = 0;
> +                       vcpu->run->system_event.crash.p0 = ctx->crash_p0;
> +                       vcpu->run->system_event.crash.p1 = ctx->crash_p1;
> +                       vcpu->run->system_event.crash.p2 = ctx->crash_p2;
> +                       vcpu->run->system_event.crash.p3 = ctx->crash_p3;
> +                       vcpu->run->system_event.crash.p4 = ctx->crash_p4;
> +                       r = 0;
> +                       goto out;
> +               }
>         }
>
>         if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> @@ -7418,6 +7439,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>         if (type)
>                 return -EINVAL;
>
> +       if (kvm_mshv_ctx_create(kvm))
> +               return -ENOMEM;
> +
>         INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
>         INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
>         INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
> @@ -7484,6 +7508,7 @@ void kvm_arch_sync_events(struct kvm *kvm)
>
>  void kvm_arch_destroy_vm(struct kvm *kvm)
>  {
> +       kvm_mshv_ctx_destroy(kvm);
>         if (current->mm == kvm->mm) {
>                 /*
>                  * Free memory regions allocated on behalf of userspace,
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index ad45054..83bd7bf 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -134,6 +134,7 @@ static inline bool is_error_page(struct page *page)
>  #define KVM_REQ_ENABLE_IBS        23
>  #define KVM_REQ_DISABLE_IBS       24
>  #define KVM_REQ_APIC_PAGE_RELOAD  25
> +#define KVM_REQ_MSHV_CRASH        26
>
>  #define KVM_USERSPACE_IRQ_SOURCE_ID            0
>  #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID       1
> @@ -343,6 +344,21 @@ struct kvm_memslots {
>         int used_slots;
>  };
>
> +/*
> + * Ms hyperv paravirt context
> + */
> +struct kvm_mshv_ctx {
> +       struct kvm      *kvm;
> +       atomic_t        crash_pending;
> +
> +       /* Guest crash related parameters */
> +       u64             crash_p0;
> +       u64             crash_p1;
> +       u64             crash_p2;
> +       u64             crash_p3;
> +       u64             crash_p4;
> +};
> +
>  struct kvm {
>         spinlock_t mmu_lock;
>         struct mutex slots_lock;
> @@ -395,6 +411,7 @@ struct kvm {
>  #endif
>         long tlbs_dirty;
>         struct list_head devices;
> +       struct kvm_mshv_ctx *mshv_ctx;
>  };
>
>  #define kvm_err(fmt, ...) \
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 4b60056..12f481b 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -317,8 +317,19 @@ struct kvm_run {
>                 struct {
>  #define KVM_SYSTEM_EVENT_SHUTDOWN       1
>  #define KVM_SYSTEM_EVENT_RESET          2
> +#define KVM_SYSTEM_EVENT_CRASH          3
>                         __u32 type;
>                         __u64 flags;
> +                       union {
> +                               struct {
> +                                       /* Guest crash related parameters */
> +                                       __u64 p0;
> +                                       __u64 p1;
> +                                       __u64 p2;
> +                                       __u64 p3;
> +                                       __u64 p4;
> +                               } crash;
> +                       };
>                 } system_event;
>                 /* KVM_EXIT_S390_STSI */
>                 struct {
> --
> 1.9.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

Thanks,
Pete
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html