RE: [PATCH v2 5/5] KVM: nVMX: Enable nested posted interrupt processing.

"Zhang, Yang Z" <yang.z.zhang@xxxxxxxxx> · Wed, 21 Jan 2015 08:07:36 +0000

Wincy Van wrote on 2015-01-20:
> If vcpu has a interrupt in vmx non-root mode, we will kick that vcpu
> to inject interrupt timely. With posted interrupt processing, the kick
> intr is not needed, and interrupts are fully taken care of by hardware.
> 
> In nested vmx, this feature avoids much more vmexits than non-nested vmx.
> 
> This patch use L0's POSTED_INTR_NV to avoid unexpected interrupt if
> L1's vector is different with L0's. If vcpu is in hardware's non-root
> mode, we use a physical ipi to deliver posted interrupts, otherwise we
> will deliver that interrupt to L1 and kick that vcpu out of nested non-root mode.
> 
> Signed-off-by: Wincy Van <fanwenyi0529@xxxxxxxxx>
> ---
>  arch/x86/kvm/vmx.c |  136
>  ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 files changed,
>  132 insertions(+), 4 deletions(-)
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index
> ea56e9f..cda9133 100644 --- a/arch/x86/kvm/vmx.c +++
> b/arch/x86/kvm/vmx.c @@ -215,6 +215,7 @@ struct __packed vmcs12 {
>         u64 tsc_offset; u64 virtual_apic_page_addr; u64
>         apic_access_addr; +       u64 posted_intr_desc_addr; u64
>         ept_pointer; u64 eoi_exit_bitmap0; u64 eoi_exit_bitmap1; @@
>         -334,6 +335,7 @@ struct __packed vmcs12 { u32
>         vmx_preemption_timer_value; u32 padding32[7]; /* room for future
>         expansion */ u16 virtual_processor_id; +       u16
>         posted_intr_nv; u16 guest_es_selector; u16 guest_cs_selector;
>         u16 guest_ss_selector; @@ -387,6 +389,7 @@ struct nested_vmx {
>         /* The host-usable pointer to the above */ struct page
>         *current_vmcs12_page; struct vmcs12 *current_vmcs12; +      
>         spinlock_t vmcs12_lock; struct vmcs *current_shadow_vmcs; /*
>          * Indicates if the shadow vmcs must be updated with the @@
>          -406,6 +409,8 @@ struct nested_vmx { */
>         struct page *apic_access_page;
>         struct page *virtual_apic_page;
> +       struct page *pi_desc_page;
> +       struct pi_desc *pi_desc;
>         u64 msr_ia32_feature_control;
>         
>         struct hrtimer preemption_timer; @@ -621,6 +626,7 @@ static
> int max_shadow_read_write_fields =
> 
>  static const unsigned short vmcs_field_to_offset_table[] = {
>         FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id), +      
>         FIELD(POSTED_INTR_NV, posted_intr_nv), FIELD(GUEST_ES_SELECTOR,
>         guest_es_selector), FIELD(GUEST_CS_SELECTOR, guest_cs_selector),
>         FIELD(GUEST_SS_SELECTOR, guest_ss_selector), @@ -646,6 +652,7 @@
>         static const unsigned short vmcs_field_to_offset_table[] = {
>         FIELD64(TSC_OFFSET, tsc_offset), FIELD64(VIRTUAL_APIC_PAGE_ADDR,
>         virtual_apic_page_addr), FIELD64(APIC_ACCESS_ADDR,
>         apic_access_addr), +       FIELD64(POSTED_INTR_DESC_ADDR,
>         posted_intr_desc_addr), FIELD64(EPT_POINTER, ept_pointer),
>         FIELD64(EOI_EXIT_BITMAP0, eoi_exit_bitmap0),
>         FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1), @@ -798,6 +805,7
> @@ static void kvm_cpu_vmxon(u64 addr);  static void
> kvm_cpu_vmxoff(void); static bool vmx_mpx_supported(void);  static
> bool vmx_xsaves_supported(void);
> +static int vmx_vm_has_apicv(struct kvm *kvm);
>  static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
> static void vmx_set_segment(struct kvm_vcpu *vcpu,
>                             struct kvm_segment *var, int seg); @@
> -1159,6 +1167,11 @@ static inline bool nested_cpu_has_vid(struct
> vmcs12 *vmcs12)
>         return nested_cpu_has2(vmcs12,
> SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
>  }
> +static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12) {
> +       return vmcs12->pin_based_vm_exec_control &
> +PIN_BASED_POSTED_INTR; }
> +
>  static inline bool is_exception(u32 intr_info)  {
>         return (intr_info & (INTR_INFO_INTR_TYPE_MASK |
> INTR_INFO_VALID_MASK)) @@ -2362,6 +2375,9 @@ static void
> nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
>         vmx->nested.nested_vmx_pinbased_ctls_high |=
>                 PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
>                 PIN_BASED_VMX_PREEMPTION_TIMER;
> +       if (vmx_vm_has_apicv(vmx->vcpu.kvm))
> +               vmx->nested.nested_vmx_pinbased_ctls_high |=
> +                       PIN_BASED_POSTED_INTR;
> 
>         /* exit controls */ rdmsr(MSR_IA32_VMX_EXIT_CTLS, @@ -4267,6
>         +4283,46 @@ static int vmx_vm_has_apicv(struct kvm *kvm) return
>         enable_apicv && irqchip_in_kernel(kvm);  }
> +static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
> +                                               int vector) {
> +       int r = 0;
> +       struct vmcs12 *vmcs12;
> +
> +       /*
> +        * Since posted intr delivery is async,
> +        * we must aquire a spin-lock to avoid
> +        * the race of vmcs12.
> +        */
> +       spin_lock(&to_vmx(vcpu)->nested.vmcs12_lock);
> +       vmcs12 = get_vmcs12(vcpu);
> +       if (!is_guest_mode(vcpu) || !vmcs12) {
> +               r = -1;
> +               goto out;
> +       }
> +       if (vector == vmcs12->posted_intr_nv &&
> +           nested_cpu_has_posted_intr(vmcs12)) {
> +               if (vcpu->mode == IN_GUEST_MODE)
> +                       apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
> +                               POSTED_INTR_VECTOR);
> +               else {
> +                       r = -1;
> +                       goto out;
> +               }
> +
> +               /*
> +                * if posted intr is done by hardware, the
> +                * corresponding eoi was sent to L0. Thus
> +                * we should send eoi to L1 manually.
> +                */
> +               kvm_apic_set_eoi_accelerated(vcpu,
> +                       vmcs12->posted_intr_nv);

Why this is necessary? As your comments mentioned, it is done by hardware not L1, why L1 should aware of it?

Best regards,
Yang

��.n��������+%������w��{.n�����o�^n�r������&��z�ޗ�zf���h���~����������_��+v���)ߣ�