Re: [PATCH 2/2] KVM: nVMX: trace nested VM-Enter failures detected by H/W

Jim Mattson <jmattson@xxxxxxxxxx> · Mon, 11 Mar 2019 14:42:06 -0700

On Fri, Mar 8, 2019 at 3:16 PM Sean Christopherson
<sean.j.christopherson@xxxxxxxxx> wrote:
>
> Use the recently added tracepoint for logging nested VM-Enter failures
> instead of spamming the kernel log when hardware detects a consistency
> check failure.  Take the opportunity to print the name of the error code
> instead of dumping the raw hex number, but limit the symbol table to
> error codes that can reasonably be encountered by KVM.
>
> Add an equivalent tracepoint in nested_vmx_check_vmentry_hw() so that
> "invalid control field" errors aren't suppressed when nested early
> checks are enabled.
>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
> ---
>  arch/x86/include/asm/vmx.h | 14 ++++++++++++++
>  arch/x86/kvm/trace.h       |  9 ++++++---
>  arch/x86/kvm/vmx/nested.c  | 14 +++++++++-----
>  3 files changed, 29 insertions(+), 8 deletions(-)
>
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index 4e4133e86484..038ab715307f 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -575,6 +575,20 @@ enum vm_instruction_error_number {
>         VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID = 28,
>  };
>
> +/*
> + * VM-instruction errors that can be encountered on VM-Enter, used to trace
> + * nested VM-Enter failures reported by hardware.  Errors unique to VM-Enter
> + * from a SMI Transfer Monitor are not included as things have gone seriously
> + * sideways if we get one of those...
> + */
> +#define VMX_VMENTER_INSTRUCTION_ERRORS \
> +       { VMXERR_VMLAUNCH_NONCLEAR_VMCS,                "VMLAUNCH_NONCLEAR_VMCS" }, \
> +       { VMXERR_VMRESUME_NONLAUNCHED_VMCS,             "VMRESUME_NONLAUNCHED_VMCS" }, \
> +       { VMXERR_VMRESUME_AFTER_VMXOFF,                 "VMRESUME_AFTER_VMXOFF" }, \
> +       { VMXERR_ENTRY_INVALID_CONTROL_FIELD,           "VMENTRY_INVALID_CONTROL_FIELD" }, \
> +       { VMXERR_ENTRY_INVALID_HOST_STATE_FIELD,        "VMENTRY_INVALID_HOST_STATE_FIELD" }, \
> +       { VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS,        "VMENTRY_EVENTS_BLOCKED_BY_MOV_SS" }
> +
>  enum vmx_l1d_flush_state {
>         VMENTER_L1D_FLUSH_AUTO,
>         VMENTER_L1D_FLUSH_NEVER,
> diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
> index 166bea7e2e92..06eff8978810 100644
> --- a/arch/x86/kvm/trace.h
> +++ b/arch/x86/kvm/trace.h
> @@ -1467,18 +1467,21 @@ TRACE_EVENT(kvm_hv_send_ipi_ex,
>   * Tracepoint for failed nested VMX VM-Enter.
>   */
>  TRACE_EVENT(kvm_nested_vmenter_failed,
> -       TP_PROTO(const char *msg),
> -       TP_ARGS(msg),
> +       TP_PROTO(const char *msg, u32 err),
> +       TP_ARGS(msg, err),
>
>         TP_STRUCT__entry(
>                 __field(const char *, msg)
> +               __field(u32, err)
>         ),
>
>         TP_fast_assign(
>                 __entry->msg = msg;
> +               __entry->err = err;
>         ),
>
> -       TP_printk("%s", __entry->msg)
> +       TP_printk("%s%s", __entry->msg, !__entry->err ? "" :
> +               __print_symbolic(__entry->err, VMX_VMENTER_INSTRUCTION_ERRORS))
>  );
>
>  #endif /* _TRACE_KVM_H */
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index 34cdbe86e99b..1a0ae908ac1a 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -23,7 +23,7 @@ module_param(nested_early_check, bool, S_IRUGO);
>  ({                                                                     \
>         bool failed = (consistency_check);                              \
>         if (failed)                                                     \
> -               trace_kvm_nested_vmenter_failed(#consistency_check);    \
> +               trace_kvm_nested_vmenter_failed(#consistency_check, 0); \
>         failed;                                                         \
>  })
>
> @@ -2719,6 +2719,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
>         struct vcpu_vmx *vmx = to_vmx(vcpu);
>         unsigned long cr3, cr4;
>         bool vm_fail;
> +       u32 vmxerr;
>
>         if (!nested_early_check)
>                 return 0;
> @@ -2790,8 +2791,10 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
>                 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
>
>         if (vm_fail) {
> -               WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
> -                            VMXERR_ENTRY_INVALID_CONTROL_FIELD);
> +               vmxerr = vmcs_read32(VM_INSTRUCTION_ERROR);
> +               trace_kvm_nested_vmenter_failed(
> +                       "early hardware check VM-instruction error: ", vmxerr);
> +               WARN_ON_ONCE(vmxerr != VMXERR_ENTRY_INVALID_CONTROL_FIELD);
>                 return 1;
>         }
>
> @@ -5061,8 +5064,9 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
>                 return false;
>
>         if (unlikely(vmx->fail)) {
> -               pr_info_ratelimited("%s failed vm entry %x\n", __func__,
> -                                   vmcs_read32(VM_INSTRUCTION_ERROR));

I *love* the tracing, but I don't think we want to turn it on for
production. Can we keep the pr_info_ratelimited for when tracing is
disabled?

> +               trace_kvm_nested_vmenter_failed(
> +                       "hardware VM-instruction error: ",
> +                       vmcs_read32(VM_INSTRUCTION_ERROR));
>                 return true;
>         }
>
> --
> 2.21.0
>