Gleb Natapov wrote on 2013-02-04: > On Mon, Feb 04, 2013 at 05:05:14PM +0800, Yang Zhang wrote: >> From: Yang Zhang <yang.z.zhang@xxxxxxxxx> >> >> Posted Interrupt allows APIC interrupts to inject into guest directly >> without any vmexit. >> >> - When delivering a interrupt to guest, if target vcpu is running, >> update Posted-interrupt requests bitmap and send a notification event >> to the vcpu. Then the vcpu will handle this interrupt automatically, >> without any software involvemnt. >> - If target vcpu is not running or there already a notification event >> pending in the vcpu, do nothing. The interrupt will be handled by >> next vm entry >> Signed-off-by: Yang Zhang <yang.z.zhang@xxxxxxxxx> >> --- >> arch/x86/include/asm/entry_arch.h | 1 + >> arch/x86/include/asm/hw_irq.h | 1 + >> arch/x86/include/asm/irq_vectors.h | 4 + >> arch/x86/include/asm/kvm_host.h | 3 + arch/x86/include/asm/vmx.h >> | 4 + arch/x86/kernel/entry_64.S | 5 + >> arch/x86/kernel/irq.c | 19 ++++ >> arch/x86/kernel/irqinit.c | 4 + arch/x86/kvm/lapic.c >> | 15 +++- arch/x86/kvm/lapic.h | 1 + >> arch/x86/kvm/svm.c | 6 ++ arch/x86/kvm/vmx.c >> | 164 +++++++++++++++++++++++++++++++----- >> arch/x86/kvm/x86.c | 4 + include/linux/kvm_host.h >> | 1 + 14 files changed, 208 insertions(+), 24 deletions(-) >> diff --git a/arch/x86/include/asm/entry_arch.h >> b/arch/x86/include/asm/entry_arch.h index 40afa00..7b0a29e 100644 --- >> a/arch/x86/include/asm/entry_arch.h +++ >> b/arch/x86/include/asm/entry_arch.h @@ -18,6 +18,7 @@ >> BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) >> #endif >> >> BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) >> +BUILD_INTERRUPT(posted_intr_ipi, POSTED_INTR_VECTOR) > Missing CONFIG_HAVE_KVM ifdef. Have you verified that this patch > compiles with KVM support disabled? Also give it a name that will > associate it with KVM. Yes, but seems it is selected by x86 by default. And it always enabled when building kernel. I will remove the select in Kconfig and try again. >> >> /* >> * every pentium local APIC has two 'local interrupts', with a >> diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h >> index eb92a6e..ee61af3 100644 >> --- a/arch/x86/include/asm/hw_irq.h >> +++ b/arch/x86/include/asm/hw_irq.h >> @@ -28,6 +28,7 @@ >> /* Interrupt handlers registered during init_IRQ */ extern void >> apic_timer_interrupt(void); extern void x86_platform_ipi(void); >> +extern void posted_intr_ipi(void); extern void error_interrupt(void); >> extern void irq_work_interrupt(void); >> diff --git a/arch/x86/include/asm/irq_vectors.h >> b/arch/x86/include/asm/irq_vectors.h index 1508e51..6421a63 100644 --- >> a/arch/x86/include/asm/irq_vectors.h +++ >> b/arch/x86/include/asm/irq_vectors.h @@ -102,6 +102,10 @@ >> */ >> #define X86_PLATFORM_IPI_VECTOR 0xf7 >> +#ifdef CONFIG_HAVE_KVM >> +#define POSTED_INTR_VECTOR 0xf2 >> +#endif >> + >> /* >> * IRQ work vector: >> */ >> diff --git a/arch/x86/include/asm/kvm_host.h >> b/arch/x86/include/asm/kvm_host.h index b8388e9..bab1c0a 100644 --- >> a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h >> @@ -704,6 +704,9 @@ struct kvm_x86_ops { >> void (*hwapic_isr_update)(struct kvm *kvm, int isr); >> void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); >> void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); >> + bool (*send_notification_event)(struct kvm_vcpu *vcpu, >> + int vector, int *result); >> + bool (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); >> int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); >> int (*get_tdp_level)(void); >> u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); >> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h >> index 694586c..f5ec72c 100644 >> --- a/arch/x86/include/asm/vmx.h >> +++ b/arch/x86/include/asm/vmx.h >> @@ -153,6 +153,7 @@ >> #define PIN_BASED_EXT_INTR_MASK 0x00000001 >> #define PIN_BASED_NMI_EXITING 0x00000008 >> #define PIN_BASED_VIRTUAL_NMIS 0x00000020 >> +#define PIN_BASED_POSTED_INTR 0x00000080 >> >> #define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000002 #define >> VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 @@ -175,6 +176,7 @@ >> /* VMCS Encodings */ enum vmcs_field { VIRTUAL_PROCESSOR_ID >> = 0x00000000, + POSTED_INTR_NV = 0x00000002, >> GUEST_ES_SELECTOR = 0x00000800, GUEST_CS_SELECTOR >> = 0x00000802, GUEST_SS_SELECTOR = 0x00000804, >> @@ -209,6 +211,8 @@ enum vmcs_field { VIRTUAL_APIC_PAGE_ADDR_HIGH >> = 0x00002013, APIC_ACCESS_ADDR = 0x00002014, >> APIC_ACCESS_ADDR_HIGH = 0x00002015, >> + POSTED_INTR_DESC_ADDR = 0x00002016, >> + POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, >> EPT_POINTER = 0x0000201a, >> EPT_POINTER_HIGH = 0x0000201b, >> EOI_EXIT_BITMAP0 = 0x0000201c, >> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S >> index 70641af..c6c47a3 100644 >> --- a/arch/x86/kernel/entry_64.S >> +++ b/arch/x86/kernel/entry_64.S >> @@ -1177,6 +1177,11 @@ apicinterrupt LOCAL_TIMER_VECTOR \ >> apicinterrupt X86_PLATFORM_IPI_VECTOR \ >> x86_platform_ipi smp_x86_platform_ipi >> +#ifdef CONFIG_HAVE_KVM >> +apicinterrupt POSTED_INTR_VECTOR \ >> + posted_intr_ipi smp_posted_intr_ipi >> +#endif >> + >> apicinterrupt THRESHOLD_APIC_VECTOR \ >> threshold_interrupt smp_threshold_interrupt >> apicinterrupt THERMAL_APIC_VECTOR \ >> diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c >> index e4595f1..3551cf2 100644 >> --- a/arch/x86/kernel/irq.c >> +++ b/arch/x86/kernel/irq.c >> @@ -228,6 +228,25 @@ void smp_x86_platform_ipi(struct pt_regs *regs) >> set_irq_regs(old_regs); >> } >> +/* + * Handler for POSTED_INTERRUPT_VECTOR. + */ #ifdef >> CONFIG_HAVE_KVM +void smp_posted_intr_ipi(struct pt_regs *regs) +{ >> + struct pt_regs *old_regs = set_irq_regs(regs); + + ack_APIC_irq(); + >> + irq_enter(); + + exit_idle(); + + irq_exit(); + >> + set_irq_regs(old_regs); +} + + > One blank line is enough. > >> EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); >> >> #ifdef CONFIG_HOTPLUG_CPU >> diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c >> index 6e03b0d..f90c5ae 100644 >> --- a/arch/x86/kernel/irqinit.c >> +++ b/arch/x86/kernel/irqinit.c >> @@ -205,6 +205,10 @@ static void __init apic_intr_init(void) >> >> /* IPI for X86 platform specific use */ >> alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi); >> +#ifdef CONFIG_HAVE_KVM >> + /* IPI for posted interrupt use */ >> + alloc_intr_gate(POSTED_INTR_VECTOR, posted_intr_ipi); >> +#endif >> >> /* IPI vectors for APIC spurious and error interrupts */ >> alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); >> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c >> index 02b51dd..df6b6a3 100644 >> --- a/arch/x86/kvm/lapic.c >> +++ b/arch/x86/kvm/lapic.c >> @@ -379,6 +379,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic > *apic) >> if (!apic->irr_pending) >> return -1; >> + kvm_x86_ops->sync_pir_to_irr(apic->vcpu); >> result = apic_search_irr(apic); >> ASSERT(result == -1 || result >= 16); >> @@ -685,6 +686,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int > delivery_mode, >> { >> int result = 0; >> struct kvm_vcpu *vcpu = apic->vcpu; >> + bool send = false; >> >> switch (delivery_mode) { >> case APIC_DM_LOWEST: >> @@ -700,7 +702,12 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int > delivery_mode, >> } else >> apic_clear_vector(vector, apic->regs + APIC_TMR); >> - result = !apic_test_and_set_irr(vector, apic); >> + if (kvm_x86_ops->vm_has_apicv(vcpu->kvm)) > Just call send_notification_event() and do the check inside. And call it > deliver_posted_interrupt() or something. It does more than just sends > notification event. Actually it may not send it at all. The code logic is different w/ or w/o apicv. So even put the check inside callee, we still need check it in caller. I think current solution is more clear. >> + send = kvm_x86_ops->send_notification_event(vcpu, >> + vector, &result); >> + else >> + result = !apic_test_and_set_irr(vector, apic); >> + >> trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, >> trig_mode, vector, !result); >> if (!result) { >> @@ -710,8 +717,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int > delivery_mode, >> break; >> } >> - kvm_make_request(KVM_REQ_EVENT, vcpu); >> - kvm_vcpu_kick(vcpu); >> + if (!send) { >> + kvm_make_request(KVM_REQ_EVENT, vcpu); >> + kvm_vcpu_kick(vcpu); >> + } >> break; >> >> case APIC_DM_REMRD: >> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h >> index 1676d34..632111f 100644 >> --- a/arch/x86/kvm/lapic.h >> +++ b/arch/x86/kvm/lapic.h >> @@ -46,6 +46,7 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu); >> void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); >> u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); >> void kvm_apic_set_version(struct kvm_vcpu *vcpu); >> +void kvm_apic_update_irr(struct kvm_vcpu *vcpu, unsigned int *pir); >> >> int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); >> int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); >> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c >> index a7d60d7..37f961d 100644 >> --- a/arch/x86/kvm/svm.c >> +++ b/arch/x86/kvm/svm.c >> @@ -3591,6 +3591,11 @@ static void svm_hwapic_isr_update(struct kvm *kvm, > int isr) >> return; >> } >> +static bool svm_sync_pir_to_irr(struct kvm_vcpu *vcpu) >> +{ >> + return false; >> +} >> + >> static int svm_nmi_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm >> *svm = to_svm(vcpu); @@ -4319,6 +4324,7 @@ static struct kvm_x86_ops >> svm_x86_ops = { .vm_has_apicv = svm_vm_has_apicv, .load_eoi_exitmap >> = svm_load_eoi_exitmap, .hwapic_isr_update = svm_hwapic_isr_update, >> + .sync_pir_to_irr = svm_sync_pir_to_irr, >> >> .set_tss_addr = svm_set_tss_addr, >> .get_tdp_level = get_npt_level, >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> index e826d29..d2b02f2 100644 >> --- a/arch/x86/kvm/vmx.c >> +++ b/arch/x86/kvm/vmx.c >> @@ -84,8 +84,8 @@ module_param(vmm_exclusive, bool, S_IRUGO); >> static bool __read_mostly fasteoi = 1; >> module_param(fasteoi, bool, S_IRUGO); >> -static bool __read_mostly enable_apicv_reg_vid = 1; >> -module_param(enable_apicv_reg_vid, bool, S_IRUGO); >> +static bool __read_mostly enable_apicv = 1; >> +module_param(enable_apicv, bool, S_IRUGO); >> >> /* >> * If nested=1, nested virtualization is supported, i.e., guests may use >> @@ -370,6 +370,41 @@ struct nested_vmx { >> struct page *apic_access_page; >> }; >> +#define POSTED_INTR_ON 0 >> +/* Posted-Interrupt Descriptor */ >> +struct pi_desc { >> + u32 pir[8]; /* Posted interrupt requested */ >> + union { >> + struct { >> + u8 on:1, >> + rsvd:7; >> + } control; >> + u32 rsvd[8]; >> + } u; >> +} __aligned(64); >> + >> +static bool pi_test_on(struct pi_desc *pi_desc) >> +{ >> + return test_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->u.control); >> +} >> + >> +static bool pi_test_and_set_on(struct pi_desc *pi_desc) >> +{ >> + return test_and_set_bit(POSTED_INTR_ON, >> + (unsigned long *)&pi_desc->u.control); >> +} >> + >> +static bool pi_test_and_clear_on(struct pi_desc *pi_desc) >> +{ >> + return test_and_clear_bit(POSTED_INTR_ON, >> + (unsigned long *)&pi_desc->u.control); >> +} >> + >> +static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc) >> +{ >> + return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); >> +} >> + >> struct vcpu_vmx { >> struct kvm_vcpu vcpu; >> unsigned long host_rsp; >> @@ -434,6 +469,9 @@ struct vcpu_vmx { >> >> bool rdtscp_enabled; >> + /* Posted interrupt descriptor */ >> + struct pi_desc *pi; >> + > You haven't answered on my previous review why are you trying save 46 > bytes here. Sorry. I cannot get your point. It's just a pointer and only takes 8 bytes. >> /* Support for a guest hypervisor (nested VMX) */ >> struct nested_vmx nested; >> }; >> @@ -788,6 +826,18 @@ static inline bool > cpu_has_vmx_virtual_intr_delivery(void) >> SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; >> } >> +static inline bool cpu_has_vmx_posted_intr(void) >> +{ >> + return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR; >> +} >> + >> +static inline bool cpu_has_vmx_apicv(void) >> +{ >> + return cpu_has_vmx_apic_register_virt() && >> + cpu_has_vmx_virtual_intr_delivery() && >> + cpu_has_vmx_posted_intr(); >> +} >> + >> static inline bool cpu_has_vmx_flexpriority(void) >> { >> return cpu_has_vmx_tpr_shadow() && >> @@ -2535,12 +2585,6 @@ static __init int setup_vmcs_config(struct > vmcs_config *vmcs_conf) >> u32 _vmexit_control = 0; >> u32 _vmentry_control = 0; >> - min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; >> - opt = PIN_BASED_VIRTUAL_NMIS; >> - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, >> - &_pin_based_exec_control) < 0) >> - return -EIO; >> - >> min = CPU_BASED_HLT_EXITING | >> #ifdef CONFIG_X86_64 >> CPU_BASED_CR8_LOAD_EXITING | >> @@ -2617,6 +2661,17 @@ static __init int setup_vmcs_config(struct > vmcs_config *vmcs_conf) >> &_vmexit_control) < 0) >> return -EIO; >> + min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; >> + opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR; >> + if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, >> + &_pin_based_exec_control) < 0) >> + return -EIO; >> + >> + if (!(_cpu_based_2nd_exec_control & >> + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) || >> + !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT)) >> + _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR; >> + >> min = 0; opt = VM_ENTRY_LOAD_IA32_PAT; if (adjust_vmx_controls(min, >> opt, MSR_IA32_VMX_ENTRY_CTLS, @@ -2795,11 +2850,10 @@ static __init >> int hardware_setup(void) if (!cpu_has_vmx_ple()) ple_gap = 0; >> - if (!cpu_has_vmx_apic_register_virt() || >> - !cpu_has_vmx_virtual_intr_delivery()) >> - enable_apicv_reg_vid = 0; >> + if (!cpu_has_vmx_apicv()) >> + enable_apicv = 0; >> >> - if (enable_apicv_reg_vid) >> + if (enable_apicv) >> kvm_x86_ops->update_cr8_intercept = NULL; >> else >> kvm_x86_ops->hwapic_irr_update = NULL; >> @@ -3868,6 +3922,61 @@ static void > vmx_disable_intercept_msr_write_x2apic(u32 msr) >> msr, MSR_TYPE_W); >> } >> +static int vmx_vm_has_apicv(struct kvm *kvm) >> +{ >> + return enable_apicv && irqchip_in_kernel(kvm); >> +} >> + >> +static bool vmx_send_notification_event(struct kvm_vcpu *vcpu, >> + int vector, int *result) >> +{ >> + struct vcpu_vmx *vmx = to_vmx(vcpu); >> + >> + *result = !pi_test_and_set_pir(vector, vmx->pi); > The problem here is that interrupt may still be pending in IRR so > eventually it will be coalesced, but we report it as delivered here. I > do not see solution for this yet. Yes, it's true and it may result in the interrupt losing. But even in real hardware, an interrupt also will lost in some cases: for example, cpu doesn't turn on irq in time or there is a high priority interrupt pending in IRR. And since there already an interrupt pending in IRR, so the interrupt still will be handled. >> + if (!pi_test_and_set_on(vmx->pi) && (vcpu->mode == IN_GUEST_MODE)) { >> + kvm_make_request(KVM_REQ_PENDING_PIR, vcpu); > Why not set KVM_REQ_EVENT here? What this intermediate event is needed > for? see answer in below. >> + apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), >> + POSTED_INTR_VECTOR); >> + if (!pi_test_on(vmx->pi)) > Isn't it too optimistic of you to expect IPI to be delivered and > processed by remote CPU by this point? I have collected some data in my box and it shows about 5 percent of the posted interrupt will be handled when calling this check. How about add a unlikely() here? Also it means 5% of check the request is unnecessary. And check KVM_REQ_EVENT is more costly, so I use a more light request to do it. >> + clear_bit(KVM_REQ_PENDING_PIR, &vcpu->requests) ; >> + return true; >> + } >> + return false; >> +} >> + >> +static bool vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) >> +{ >> + struct vcpu_vmx *vmx = to_vmx(vcpu); >> + struct kvm_lapic *apic = vcpu->arch.apic; >> + unsigned int i, old, new, ret_val, irr_offset, pir_val; >> + bool make_request = false; >> + >> + if (!vmx_vm_has_apicv(vcpu->kvm) || !pi_test_and_clear_on(vmx->pi)) >> + return false; >> + >> + for (i = 0; i <= 7; i++) { >> + pir_val = xchg(&vmx->pi->pir[i], 0); >> + if (pir_val) { >> + irr_offset = APIC_IRR + i * 0x10; >> + do { >> + old = kvm_apic_get_reg(apic, irr_offset); >> + new = old | pir_val; >> + ret_val = cmpxchg((u32 *)(apic->regs + >> + irr_offset), old, new); >> + } while (unlikely(ret_val != old)); >> + make_request = true; >> + } >> + } >> + >> + return make_request; >> +} >> + >> +static void free_pi(struct vcpu_vmx *vmx) >> +{ >> + if (vmx_vm_has_apicv(vmx->vcpu.kvm)) >> + kfree(vmx->pi); >> +} >> + >> /* >> * Set up the vmcs's constant host-state fields, i.e., host-state fields that >> * will not change in the lifetime of the guest. >> @@ -3928,6 +4037,15 @@ static void set_cr4_guest_host_mask(struct > vcpu_vmx *vmx) >> vmcs_writel(CR4_GUEST_HOST_MASK, >> ~vmx->vcpu.arch.cr4_guest_owned_bits); } >> +static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) >> +{ >> + u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; >> + >> + if (!vmx_vm_has_apicv(vmx->vcpu.kvm)) >> + pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; >> + return pin_based_exec_ctrl; >> +} >> + >> static u32 vmx_exec_control(struct vcpu_vmx *vmx) { u32 exec_control >> = vmcs_config.cpu_based_exec_ctrl; @@ -3945,11 +4063,6 @@ static u32 >> vmx_exec_control(struct vcpu_vmx *vmx) return exec_control; } >> -static int vmx_vm_has_apicv(struct kvm *kvm) >> -{ >> - return enable_apicv_reg_vid && irqchip_in_kernel(kvm); >> -} >> - >> static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) { u32 >> exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; @@ -4005,8 +4118,7 >> @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) >> vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ >> >> /* Control */ >> - vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, >> - vmcs_config.pin_based_exec_ctrl); >> + vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); >> >> vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, > vmx_exec_control(vmx)); >> >> @@ -4015,13 +4127,17 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) >> vmx_secondary_exec_control(vmx)); >> } >> - if (enable_apicv_reg_vid) { >> + if (vmx_vm_has_apicv(vmx->vcpu.kvm)) { >> vmcs_write64(EOI_EXIT_BITMAP0, 0); >> vmcs_write64(EOI_EXIT_BITMAP1, 0); >> vmcs_write64(EOI_EXIT_BITMAP2, 0); >> vmcs_write64(EOI_EXIT_BITMAP3, 0); >> >> vmcs_write16(GUEST_INTR_STATUS, 0); >> + >> + vmx->pi = kzalloc(sizeof(struct pi_desc), GFP_KERNEL); >> + vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR); >> + vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((vmx->pi))); >> } >> >> if (ple_gap) { @@ -4171,6 +4287,9 @@ static int vmx_vcpu_reset(struct >> kvm_vcpu *vcpu) vmcs_write64(APIC_ACCESS_ADDR, >> page_to_phys(vmx->vcpu.kvm->arch.apic_access_page)); >> + if (vmx_vm_has_apicv(vcpu->kvm)) >> + memset(vmx->pi, 0, sizeof(struct pi_desc)); >> + >> if (vmx->vpid != 0) >> vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); >> @@ -6746,6 +6865,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) >> >> free_vpid(vmx); free_nested(vmx); + free_pi(vmx); >> free_loaded_vmcs(vmx->loaded_vmcs); kfree(vmx->guest_msrs); >> kvm_vcpu_uninit(vcpu); @@ -7647,6 +7767,8 @@ static struct >> kvm_x86_ops vmx_x86_ops = { .load_eoi_exitmap = vmx_load_eoi_exitmap, >> .hwapic_irr_update = vmx_hwapic_irr_update, .hwapic_isr_update = >> vmx_hwapic_isr_update, >> + .sync_pir_to_irr = vmx_sync_pir_to_irr, >> + .send_notification_event = vmx_send_notification_event, >> >> .set_tss_addr = vmx_set_tss_addr, .get_tdp_level = get_ept_level, @@ >> -7750,7 +7872,7 @@ static int __init vmx_init(void) >> memcpy(vmx_msr_bitmap_longmode_x2apic, vmx_msr_bitmap_longmode, >> PAGE_SIZE); >> - if (enable_apicv_reg_vid) { >> + if (enable_apicv) { >> for (msr = 0x800; msr <= 0x8ff; msr++) >> vmx_disable_intercept_msr_read_x2apic(msr); >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >> index 9f25d70..6e1e6e7 100644 >> --- a/arch/x86/kvm/x86.c >> +++ b/arch/x86/kvm/x86.c >> @@ -2681,6 +2681,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) >> static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, >> struct kvm_lapic_state *s) { + kvm_x86_ops->sync_pir_to_irr(vcpu); >> memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); >> >> return 0; @@ -5698,6 +5699,9 @@ static int vcpu_enter_guest(struct >> kvm_vcpu *vcpu) kvm_deliver_pmi(vcpu); if >> (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu)) >> update_eoi_exitmap(vcpu); >> + if (kvm_check_request(KVM_REQ_PENDING_PIR, vcpu)) >> + if (kvm_x86_ops->sync_pir_to_irr(vcpu)) >> + kvm_make_request(KVM_REQ_EVENT, vcpu); >> } >> >> if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { >> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h >> index 0350e0d..a410819 100644 >> --- a/include/linux/kvm_host.h >> +++ b/include/linux/kvm_host.h >> @@ -124,6 +124,7 @@ static inline bool is_error_page(struct page *page) >> #define KVM_REQ_MCLOCK_INPROGRESS 20 >> #define KVM_REQ_EPR_EXIT 21 >> #define KVM_REQ_EOIBITMAP 22 >> +#define KVM_REQ_PENDING_PIR 23 >> >> #define KVM_USERSPACE_IRQ_SOURCE_ID 0 >> #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 >> -- >> 1.7.1 > > -- > Gleb. Best regards, Yang -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html