2017-07-24 22:20 GMT+08:00 Paolo Bonzini <pbonzini@xxxxxxxxxx>: > There are three issues in nested_vmx_check_exception: > > 1) it is not taking PFEC_MATCH/PFEC_MASK into account, as reported > by Wanpeng Li; > > 2) it should rebuild the interruption info and exit qualification fields > from scratch, as reported by Jim Mattson, because the values from the > L2->L0 vmexit may be invalid (e.g. if an emulated instruction causes > a page fault, the EPT misconfig's exit qualification is incorrect). > > 3) CR2 and DR6 should not be written for exception intercept vmexits > (CR2 only for AMD). > > This patch fixes the first two and adds a comment about the last, > outlining the fix. > > Cc: Jim Mattson <jmattson@xxxxxxxxxx> > Cc: Wanpeng Li <wanpeng.li@xxxxxxxxxxx> > Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> > --- > Wanpeng, can you test this on the testcases you had for commit I didn't observe any issue when testing the latest version of your patch in kvm/queue. Regards, Wanpeng Li > d4912215d103 ("KVM: nVMX: Fix exception injection", 2017-06-05)? > Also, do you have a testcase for PFEC matching? > > arch/x86/kvm/svm.c | 10 ++++++++ > arch/x86/kvm/vmx.c | 71 +++++++++++++++++++++++++++++++++++++++++++----------- > 2 files changed, 67 insertions(+), 14 deletions(-) > > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index 4d8141e533c3..1107626938cc 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -2430,6 +2430,16 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, > svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; > svm->vmcb->control.exit_code_hi = 0; > svm->vmcb->control.exit_info_1 = error_code; > + > + /* > + * FIXME: we should not write CR2 when L1 intercepts an L2 #PF exception. > + * The fix is to add the ancillary datum (CR2 or DR6) to structs > + * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 can be > + * written only when inject_pending_event runs (DR6 would written here > + * too). This should be conditional on a new capability---if the > + * capability is disabled, kvm_multiple_exception would write the > + * ancillary information to CR2 or DR6, for backwards ABI-compatibility. > + */ > if (svm->vcpu.arch.exception.nested_apf) > svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token; > else > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index d04092f821b6..b520614f9d46 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -927,6 +927,10 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, > static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); > static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); > static int alloc_identity_pagetable(struct kvm *kvm); > +static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu); > +static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); > +static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, > + u16 error_code); > > static DEFINE_PER_CPU(struct vmcs *, vmxarea); > static DEFINE_PER_CPU(struct vmcs *, current_vmcs); > @@ -2432,28 +2436,67 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) > * KVM wants to inject page-faults which it got to the guest. This function > * checks whether in a nested guest, we need to inject them to L1 or L2. > */ > +static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu, > + unsigned long exit_qual) > +{ > + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); > + unsigned int nr = vcpu->arch.exception.nr; > + u32 intr_info = nr | INTR_INFO_VALID_MASK; > + > + if (vcpu->arch.exception.has_error_code) { > + vmcs_write32(VM_EXIT_INTR_ERROR_CODE, vcpu->arch.exception.error_code); > + intr_info |= INTR_INFO_DELIVER_CODE_MASK; > + } > + > + if (kvm_exception_is_soft(nr)) > + intr_info |= INTR_TYPE_SOFT_EXCEPTION; > + else > + intr_info |= INTR_TYPE_HARD_EXCEPTION; > + > + if (!(vmcs12->idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) && > + vmx_get_nmi_mask(vcpu)) > + intr_info |= INTR_INFO_UNBLOCK_NMI; > + > + nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual); > +} > + > static int nested_vmx_check_exception(struct kvm_vcpu *vcpu) > { > struct vmcs12 *vmcs12 = get_vmcs12(vcpu); > unsigned int nr = vcpu->arch.exception.nr; > > - if (!((vmcs12->exception_bitmap & (1u << nr)) || > - (nr == PF_VECTOR && vcpu->arch.exception.nested_apf))) > - return 0; > + if (nr == PF_VECTOR) { > + if (vcpu->arch.exception.nested_apf) { > + nested_vmx_inject_exception_vmexit(vcpu, > + vcpu->arch.apf.nested_apf_token); > + return 1; > + } > + /* > + * FIXME: we must not write CR2 when L1 intercepts an L2 #PF exception. > + * The fix is to add the ancillary datum (CR2 or DR6) to structs > + * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 > + * can be written only when inject_pending_event runs. This should be > + * conditional on a new capability---if the capability is disabled, > + * kvm_multiple_exception would write the ancillary information to > + * CR2 or DR6, for backwards ABI-compatibility. > + */ > + if (nested_vmx_is_page_fault_vmexit(vmcs12, > + vcpu->arch.exception.error_code)) { > + nested_vmx_inject_exception_vmexit(vcpu, vcpu->arch.cr2); > + return 1; > + } > + } else { > + unsigned long exit_qual = 0; > + if (nr == DB_VECTOR) > + exit_qual = vcpu->arch.dr6; > > - if (vcpu->arch.exception.nested_apf) { > - vmcs_write32(VM_EXIT_INTR_ERROR_CODE, vcpu->arch.exception.error_code); > - nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, > - PF_VECTOR | INTR_TYPE_HARD_EXCEPTION | > - INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK, > - vcpu->arch.apf.nested_apf_token); > - return 1; > + if (vmcs12->exception_bitmap & (1u << nr)) { > + nested_vmx_inject_exception_vmexit(vcpu, exit_qual); > + return 1; > + } > } > > - nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, > - vmcs_read32(VM_EXIT_INTR_INFO), > - vmcs_readl(EXIT_QUALIFICATION)); > - return 1; > + return 0; > } > > static void vmx_queue_exception(struct kvm_vcpu *vcpu) > -- > 1.8.3.1 >