Re: [RFC/RFT PATCH] KVM: nVMX: fixes to nested virt interrupt injection

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



2017-07-24 22:20 GMT+08:00 Paolo Bonzini <pbonzini@xxxxxxxxxx>:
> There are three issues in nested_vmx_check_exception:
>
> 1) it is not taking PFEC_MATCH/PFEC_MASK into account, as reported
> by Wanpeng Li;
>
> 2) it should rebuild the interruption info and exit qualification fields
> from scratch, as reported by Jim Mattson, because the values from the
> L2->L0 vmexit may be invalid (e.g. if an emulated instruction causes
> a page fault, the EPT misconfig's exit qualification is incorrect).
>
> 3) CR2 and DR6 should not be written for exception intercept vmexits
> (CR2 only for AMD).
>
> This patch fixes the first two and adds a comment about the last,
> outlining the fix.
>
> Cc: Jim Mattson <jmattson@xxxxxxxxxx>
> Cc: Wanpeng Li <wanpeng.li@xxxxxxxxxxx>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> ---
>         Wanpeng, can you test this on the testcases you had for commit

I  didn't observe any issue when testing the latest version of your
patch in kvm/queue.

Regards,
Wanpeng Li

>         d4912215d103 ("KVM: nVMX: Fix exception injection", 2017-06-05)?
>         Also, do you have a testcase for PFEC matching?
>
>  arch/x86/kvm/svm.c | 10 ++++++++
>  arch/x86/kvm/vmx.c | 71 +++++++++++++++++++++++++++++++++++++++++++-----------
>  2 files changed, 67 insertions(+), 14 deletions(-)
>
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 4d8141e533c3..1107626938cc 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -2430,6 +2430,16 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
>         svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
>         svm->vmcb->control.exit_code_hi = 0;
>         svm->vmcb->control.exit_info_1 = error_code;
> +
> +       /*
> +        * FIXME: we should not write CR2 when L1 intercepts an L2 #PF exception.
> +        * The fix is to add the ancillary datum (CR2 or DR6) to structs
> +        * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 can be
> +        * written only when inject_pending_event runs (DR6 would written here
> +        * too).  This should be conditional on a new capability---if the
> +        * capability is disabled, kvm_multiple_exception would write the
> +        * ancillary information to CR2 or DR6, for backwards ABI-compatibility.
> +        */
>         if (svm->vcpu.arch.exception.nested_apf)
>                 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
>         else
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index d04092f821b6..b520614f9d46 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -927,6 +927,10 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
>  static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
>  static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
>  static int alloc_identity_pagetable(struct kvm *kvm);
> +static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
> +static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
> +static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
> +                                           u16 error_code);
>
>  static DEFINE_PER_CPU(struct vmcs *, vmxarea);
>  static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
> @@ -2432,28 +2436,67 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
>   * KVM wants to inject page-faults which it got to the guest. This function
>   * checks whether in a nested guest, we need to inject them to L1 or L2.
>   */
> +static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
> +                                              unsigned long exit_qual)
> +{
> +       struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> +       unsigned int nr = vcpu->arch.exception.nr;
> +       u32 intr_info = nr | INTR_INFO_VALID_MASK;
> +
> +       if (vcpu->arch.exception.has_error_code) {
> +               vmcs_write32(VM_EXIT_INTR_ERROR_CODE, vcpu->arch.exception.error_code);
> +               intr_info |= INTR_INFO_DELIVER_CODE_MASK;
> +       }
> +
> +       if (kvm_exception_is_soft(nr))
> +               intr_info |= INTR_TYPE_SOFT_EXCEPTION;
> +       else
> +               intr_info |= INTR_TYPE_HARD_EXCEPTION;
> +
> +       if (!(vmcs12->idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) &&
> +           vmx_get_nmi_mask(vcpu))
> +               intr_info |= INTR_INFO_UNBLOCK_NMI;
> +
> +       nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual);
> +}
> +
>  static int nested_vmx_check_exception(struct kvm_vcpu *vcpu)
>  {
>         struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
>         unsigned int nr = vcpu->arch.exception.nr;
>
> -       if (!((vmcs12->exception_bitmap & (1u << nr)) ||
> -               (nr == PF_VECTOR && vcpu->arch.exception.nested_apf)))
> -               return 0;
> +       if (nr == PF_VECTOR) {
> +               if (vcpu->arch.exception.nested_apf) {
> +                       nested_vmx_inject_exception_vmexit(vcpu,
> +                                                          vcpu->arch.apf.nested_apf_token);
> +                       return 1;
> +               }
> +               /*
> +                * FIXME: we must not write CR2 when L1 intercepts an L2 #PF exception.
> +                * The fix is to add the ancillary datum (CR2 or DR6) to structs
> +                * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6
> +                * can be written only when inject_pending_event runs.  This should be
> +                * conditional on a new capability---if the capability is disabled,
> +                * kvm_multiple_exception would write the ancillary information to
> +                * CR2 or DR6, for backwards ABI-compatibility.
> +                */
> +               if (nested_vmx_is_page_fault_vmexit(vmcs12,
> +                                                   vcpu->arch.exception.error_code)) {
> +                       nested_vmx_inject_exception_vmexit(vcpu, vcpu->arch.cr2);
> +                       return 1;
> +               }
> +       } else {
> +               unsigned long exit_qual = 0;
> +               if (nr == DB_VECTOR)
> +                       exit_qual = vcpu->arch.dr6;
>
> -       if (vcpu->arch.exception.nested_apf) {
> -               vmcs_write32(VM_EXIT_INTR_ERROR_CODE, vcpu->arch.exception.error_code);
> -               nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
> -                       PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
> -                       INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
> -                       vcpu->arch.apf.nested_apf_token);
> -               return 1;
> +               if (vmcs12->exception_bitmap & (1u << nr)) {
> +                       nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
> +                       return 1;
> +               }
>         }
>
> -       nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
> -                         vmcs_read32(VM_EXIT_INTR_INFO),
> -                         vmcs_readl(EXIT_QUALIFICATION));
> -       return 1;
> +       return 0;
>  }
>
>  static void vmx_queue_exception(struct kvm_vcpu *vcpu)
> --
> 1.8.3.1
>



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux