Re: [PATCH v2 2/2] KVM: VMX: Add Posted Interrupt supporting

Gleb Natapov <gleb@xxxxxxxxxx> · Mon, 4 Feb 2013 14:28:35 +0200



On Mon, Feb 04, 2013 at 05:05:14PM +0800, Yang Zhang wrote:
> From: Yang Zhang <yang.z.zhang@xxxxxxxxx>
> 
> Posted Interrupt allows APIC interrupts to inject into guest directly
> without any vmexit.
> 
> - When delivering a interrupt to guest, if target vcpu is running,
>   update Posted-interrupt requests bitmap and send a notification event
>   to the vcpu. Then the vcpu will handle this interrupt automatically,
>   without any software involvemnt.
> 
> - If target vcpu is not running or there already a notification event
>   pending in the vcpu, do nothing. The interrupt will be handled by
>   next vm entry
> 
> Signed-off-by: Yang Zhang <yang.z.zhang@xxxxxxxxx>
> ---
>  arch/x86/include/asm/entry_arch.h  |    1 +
>  arch/x86/include/asm/hw_irq.h      |    1 +
>  arch/x86/include/asm/irq_vectors.h |    4 +
>  arch/x86/include/asm/kvm_host.h    |    3 +
>  arch/x86/include/asm/vmx.h         |    4 +
>  arch/x86/kernel/entry_64.S         |    5 +
>  arch/x86/kernel/irq.c              |   19 ++++
>  arch/x86/kernel/irqinit.c          |    4 +
>  arch/x86/kvm/lapic.c               |   15 +++-
>  arch/x86/kvm/lapic.h               |    1 +
>  arch/x86/kvm/svm.c                 |    6 ++
>  arch/x86/kvm/vmx.c                 |  164 +++++++++++++++++++++++++++++++-----
>  arch/x86/kvm/x86.c                 |    4 +
>  include/linux/kvm_host.h           |    1 +
>  14 files changed, 208 insertions(+), 24 deletions(-)
> 
> diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
> index 40afa00..7b0a29e 100644
> --- a/arch/x86/include/asm/entry_arch.h
> +++ b/arch/x86/include/asm/entry_arch.h
> @@ -18,6 +18,7 @@ BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
>  #endif
>  
>  BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
> +BUILD_INTERRUPT(posted_intr_ipi, POSTED_INTR_VECTOR)
Missing CONFIG_HAVE_KVM ifdef. Have you verified that this patch
compiles with KVM support disabled? Also give it a name that will
associate it with KVM.

>  
>  /*
>   * every pentium local APIC has two 'local interrupts', with a
> diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
> index eb92a6e..ee61af3 100644
> --- a/arch/x86/include/asm/hw_irq.h
> +++ b/arch/x86/include/asm/hw_irq.h
> @@ -28,6 +28,7 @@
>  /* Interrupt handlers registered during init_IRQ */
>  extern void apic_timer_interrupt(void);
>  extern void x86_platform_ipi(void);
> +extern void posted_intr_ipi(void);
>  extern void error_interrupt(void);
>  extern void irq_work_interrupt(void);
>  
> diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
> index 1508e51..6421a63 100644
> --- a/arch/x86/include/asm/irq_vectors.h
> +++ b/arch/x86/include/asm/irq_vectors.h
> @@ -102,6 +102,10 @@
>   */
>  #define X86_PLATFORM_IPI_VECTOR		0xf7
>  
> +#ifdef CONFIG_HAVE_KVM
> +#define POSTED_INTR_VECTOR		0xf2
> +#endif
> +
>  /*
>   * IRQ work vector:
>   */
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index b8388e9..bab1c0a 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -704,6 +704,9 @@ struct kvm_x86_ops {
>  	void (*hwapic_isr_update)(struct kvm *kvm, int isr);
>  	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
>  	void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
> +	bool (*send_notification_event)(struct kvm_vcpu *vcpu,
> +					int vector, int *result);
> +	bool (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
>  	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
>  	int (*get_tdp_level)(void);
>  	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index 694586c..f5ec72c 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -153,6 +153,7 @@
>  #define PIN_BASED_EXT_INTR_MASK                 0x00000001
>  #define PIN_BASED_NMI_EXITING                   0x00000008
>  #define PIN_BASED_VIRTUAL_NMIS                  0x00000020
> +#define PIN_BASED_POSTED_INTR                   0x00000080
>  
>  #define VM_EXIT_SAVE_DEBUG_CONTROLS             0x00000002
>  #define VM_EXIT_HOST_ADDR_SPACE_SIZE            0x00000200
> @@ -175,6 +176,7 @@
>  /* VMCS Encodings */
>  enum vmcs_field {
>  	VIRTUAL_PROCESSOR_ID            = 0x00000000,
> +	POSTED_INTR_NV                  = 0x00000002,
>  	GUEST_ES_SELECTOR               = 0x00000800,
>  	GUEST_CS_SELECTOR               = 0x00000802,
>  	GUEST_SS_SELECTOR               = 0x00000804,
> @@ -209,6 +211,8 @@ enum vmcs_field {
>  	VIRTUAL_APIC_PAGE_ADDR_HIGH     = 0x00002013,
>  	APIC_ACCESS_ADDR		= 0x00002014,
>  	APIC_ACCESS_ADDR_HIGH		= 0x00002015,
> +	POSTED_INTR_DESC_ADDR           = 0x00002016,
> +	POSTED_INTR_DESC_ADDR_HIGH      = 0x00002017,
>  	EPT_POINTER                     = 0x0000201a,
>  	EPT_POINTER_HIGH                = 0x0000201b,
>  	EOI_EXIT_BITMAP0                = 0x0000201c,
> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
> index 70641af..c6c47a3 100644
> --- a/arch/x86/kernel/entry_64.S
> +++ b/arch/x86/kernel/entry_64.S
> @@ -1177,6 +1177,11 @@ apicinterrupt LOCAL_TIMER_VECTOR \
>  apicinterrupt X86_PLATFORM_IPI_VECTOR \
>  	x86_platform_ipi smp_x86_platform_ipi
>  
> +#ifdef CONFIG_HAVE_KVM
> +apicinterrupt POSTED_INTR_VECTOR \
> +	posted_intr_ipi smp_posted_intr_ipi
> +#endif
> +
>  apicinterrupt THRESHOLD_APIC_VECTOR \
>  	threshold_interrupt smp_threshold_interrupt
>  apicinterrupt THERMAL_APIC_VECTOR \
> diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
> index e4595f1..3551cf2 100644
> --- a/arch/x86/kernel/irq.c
> +++ b/arch/x86/kernel/irq.c
> @@ -228,6 +228,25 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
>  	set_irq_regs(old_regs);
>  }
>  
> +/*
> + * Handler for POSTED_INTERRUPT_VECTOR.
> + */
#ifdef CONFIG_HAVE_KVM
> +void smp_posted_intr_ipi(struct pt_regs *regs)
> +{
> +	struct pt_regs *old_regs = set_irq_regs(regs);
> +
> +	ack_APIC_irq();
> +
> +	irq_enter();
> +
> +	exit_idle();
> +
> +	irq_exit();
> +
> +	set_irq_regs(old_regs);
> +}
> +
> +
One blank line is enough.

>  EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
>  
>  #ifdef CONFIG_HOTPLUG_CPU
> diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
> index 6e03b0d..f90c5ae 100644
> --- a/arch/x86/kernel/irqinit.c
> +++ b/arch/x86/kernel/irqinit.c
> @@ -205,6 +205,10 @@ static void __init apic_intr_init(void)
>  
>  	/* IPI for X86 platform specific use */
>  	alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi);
> +#ifdef CONFIG_HAVE_KVM
> +	/* IPI for posted interrupt use */
> +	alloc_intr_gate(POSTED_INTR_VECTOR, posted_intr_ipi);
> +#endif
>  
>  	/* IPI vectors for APIC spurious and error interrupts */
>  	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 02b51dd..df6b6a3 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -379,6 +379,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
>  	if (!apic->irr_pending)
>  		return -1;
>  
> +	kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
>  	result = apic_search_irr(apic);
>  	ASSERT(result == -1 || result >= 16);
>  
> @@ -685,6 +686,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
>  {
>  	int result = 0;
>  	struct kvm_vcpu *vcpu = apic->vcpu;
> +	bool send = false;
>  
>  	switch (delivery_mode) {
>  	case APIC_DM_LOWEST:
> @@ -700,7 +702,12 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
>  		} else
>  			apic_clear_vector(vector, apic->regs + APIC_TMR);
>  
> -		result = !apic_test_and_set_irr(vector, apic);
> +		if (kvm_x86_ops->vm_has_apicv(vcpu->kvm))
Just call send_notification_event() and do the check inside. And call it
deliver_posted_interrupt() or something. It does more than just sends
notification event. Actually it may not send it at all.

> +			send = kvm_x86_ops->send_notification_event(vcpu,
> +						vector, &result);
> +		else
> +			result = !apic_test_and_set_irr(vector, apic);
> +
>  		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
>  					  trig_mode, vector, !result);
>  		if (!result) {
> @@ -710,8 +717,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
>  			break;
>  		}
>  
> -		kvm_make_request(KVM_REQ_EVENT, vcpu);
> -		kvm_vcpu_kick(vcpu);
> +		if (!send) {
> +			kvm_make_request(KVM_REQ_EVENT, vcpu);
> +			kvm_vcpu_kick(vcpu);
> +		}
>  		break;
>  
>  	case APIC_DM_REMRD:
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index 1676d34..632111f 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -46,6 +46,7 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu);
>  void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
>  u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
>  void kvm_apic_set_version(struct kvm_vcpu *vcpu);
> +void kvm_apic_update_irr(struct kvm_vcpu *vcpu, unsigned int *pir);
>  
>  int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
>  int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index a7d60d7..37f961d 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -3591,6 +3591,11 @@ static void svm_hwapic_isr_update(struct kvm *kvm, int isr)
>  	return;
>  }
>  
> +static bool svm_sync_pir_to_irr(struct kvm_vcpu *vcpu)
> +{
> +	return false;
> +}
> +
>  static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
> @@ -4319,6 +4324,7 @@ static struct kvm_x86_ops svm_x86_ops = {
>  	.vm_has_apicv = svm_vm_has_apicv,
>  	.load_eoi_exitmap = svm_load_eoi_exitmap,
>  	.hwapic_isr_update = svm_hwapic_isr_update,
> +	.sync_pir_to_irr = svm_sync_pir_to_irr,
>  
>  	.set_tss_addr = svm_set_tss_addr,
>  	.get_tdp_level = get_npt_level,
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index e826d29..d2b02f2 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -84,8 +84,8 @@ module_param(vmm_exclusive, bool, S_IRUGO);
>  static bool __read_mostly fasteoi = 1;
>  module_param(fasteoi, bool, S_IRUGO);
>  
> -static bool __read_mostly enable_apicv_reg_vid = 1;
> -module_param(enable_apicv_reg_vid, bool, S_IRUGO);
> +static bool __read_mostly enable_apicv = 1;
> +module_param(enable_apicv, bool, S_IRUGO);
>  
>  /*
>   * If nested=1, nested virtualization is supported, i.e., guests may use
> @@ -370,6 +370,41 @@ struct nested_vmx {
>  	struct page *apic_access_page;
>  };
>  
> +#define POSTED_INTR_ON  0
> +/* Posted-Interrupt Descriptor */
> +struct pi_desc {
> +	u32 pir[8];     /* Posted interrupt requested */
> +	union {
> +		struct {
> +			u8  on:1,
> +			    rsvd:7;
> +		} control;
> +		u32 rsvd[8];
> +	} u;
> +} __aligned(64);
> +
> +static bool pi_test_on(struct pi_desc *pi_desc)
> +{
> +	return test_bit(POSTED_INTR_ON,	(unsigned long *)&pi_desc->u.control);
> +}
> +
> +static bool pi_test_and_set_on(struct pi_desc *pi_desc)
> +{
> +	return test_and_set_bit(POSTED_INTR_ON,
> +			(unsigned long *)&pi_desc->u.control);
> +}
> +
> +static bool pi_test_and_clear_on(struct pi_desc *pi_desc)
> +{
> +	return test_and_clear_bit(POSTED_INTR_ON,
> +			(unsigned long *)&pi_desc->u.control);
> +}
> +
> +static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
> +{
> +	return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
> +}
> +
>  struct vcpu_vmx {
>  	struct kvm_vcpu       vcpu;
>  	unsigned long         host_rsp;
> @@ -434,6 +469,9 @@ struct vcpu_vmx {
>  
>  	bool rdtscp_enabled;
>  
> +	/* Posted interrupt descriptor */
> +	struct pi_desc *pi;
> +
You haven't answered on my previous review why are you trying save 46
bytes here.

>  	/* Support for a guest hypervisor (nested VMX) */
>  	struct nested_vmx nested;
>  };
> @@ -788,6 +826,18 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void)
>  		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
>  }
>  
> +static inline bool cpu_has_vmx_posted_intr(void)
> +{
> +	return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
> +}
> +
> +static inline bool cpu_has_vmx_apicv(void)
> +{
> +	return cpu_has_vmx_apic_register_virt() &&
> +		cpu_has_vmx_virtual_intr_delivery() &&
> +		cpu_has_vmx_posted_intr();
> +}
> +
>  static inline bool cpu_has_vmx_flexpriority(void)
>  {
>  	return cpu_has_vmx_tpr_shadow() &&
> @@ -2535,12 +2585,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
>  	u32 _vmexit_control = 0;
>  	u32 _vmentry_control = 0;
>  
> -	min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
> -	opt = PIN_BASED_VIRTUAL_NMIS;
> -	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
> -				&_pin_based_exec_control) < 0)
> -		return -EIO;
> -
>  	min = CPU_BASED_HLT_EXITING |
>  #ifdef CONFIG_X86_64
>  	      CPU_BASED_CR8_LOAD_EXITING |
> @@ -2617,6 +2661,17 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
>  				&_vmexit_control) < 0)
>  		return -EIO;
>  
> +	min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
> +	opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR;
> +	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
> +				&_pin_based_exec_control) < 0)
> +		return -EIO;
> +
> +	if (!(_cpu_based_2nd_exec_control &
> +		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) ||
> +		!(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT))
> +		_pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
> +
>  	min = 0;
>  	opt = VM_ENTRY_LOAD_IA32_PAT;
>  	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
> @@ -2795,11 +2850,10 @@ static __init int hardware_setup(void)
>  	if (!cpu_has_vmx_ple())
>  		ple_gap = 0;
>  
> -	if (!cpu_has_vmx_apic_register_virt() ||
> -				!cpu_has_vmx_virtual_intr_delivery())
> -		enable_apicv_reg_vid = 0;
> +	if (!cpu_has_vmx_apicv())
> +		enable_apicv = 0;
>  
> -	if (enable_apicv_reg_vid)
> +	if (enable_apicv)
>  		kvm_x86_ops->update_cr8_intercept = NULL;
>  	else
>  		kvm_x86_ops->hwapic_irr_update = NULL;
> @@ -3868,6 +3922,61 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
>  			msr, MSR_TYPE_W);
>  }
>  
> +static int vmx_vm_has_apicv(struct kvm *kvm)
> +{
> +	return enable_apicv && irqchip_in_kernel(kvm);
> +}
> +
> +static bool vmx_send_notification_event(struct kvm_vcpu *vcpu,
> +		int vector, int *result)
> +{
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +
> +	*result = !pi_test_and_set_pir(vector, vmx->pi);
The problem here is that interrupt may still be pending in IRR so
eventually it will be coalesced, but we report it as delivered here. I
do not see solution for this yet.

> +	if (!pi_test_and_set_on(vmx->pi) && (vcpu->mode == IN_GUEST_MODE)) {
> +		kvm_make_request(KVM_REQ_PENDING_PIR, vcpu);
Why not set KVM_REQ_EVENT here? What this intermediate event is needed
for?

> +		apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
> +				    POSTED_INTR_VECTOR);
> +		if (!pi_test_on(vmx->pi))
Isn't it too optimistic of you to expect IPI to be delivered and
processed by remote CPU by this point?

> +			clear_bit(KVM_REQ_PENDING_PIR, &vcpu->requests)	;
> +		return true;
> +	}
> +	return false;
> +}
> +
> +static bool vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
> +{
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	struct kvm_lapic *apic = vcpu->arch.apic;
> +	unsigned int i, old, new, ret_val, irr_offset, pir_val;
> +	bool make_request = false;
> +
> +	if (!vmx_vm_has_apicv(vcpu->kvm) || !pi_test_and_clear_on(vmx->pi))
> +		return false;
> +
> +	for (i = 0; i <= 7; i++) {
> +		pir_val = xchg(&vmx->pi->pir[i], 0);
> +		if (pir_val) {
> +			irr_offset = APIC_IRR + i * 0x10;
> +			do {
> +				old = kvm_apic_get_reg(apic, irr_offset);
> +				new = old | pir_val;
> +				ret_val = cmpxchg((u32 *)(apic->regs +
> +						irr_offset), old, new);
> +			} while (unlikely(ret_val != old));
> +			make_request = true;
> +		}
> +	}
> +
> +	return make_request;
> +}
> +
> +static void free_pi(struct vcpu_vmx *vmx)
> +{
> +	if (vmx_vm_has_apicv(vmx->vcpu.kvm))
> +		kfree(vmx->pi);
> +}
> +
>  /*
>   * Set up the vmcs's constant host-state fields, i.e., host-state fields that
>   * will not change in the lifetime of the guest.
> @@ -3928,6 +4037,15 @@ static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
>  	vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
>  }
>  
> +static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
> +{
> +	u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
> +
> +	if (!vmx_vm_has_apicv(vmx->vcpu.kvm))
> +		pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
> +	return pin_based_exec_ctrl;
> +}
> +
>  static u32 vmx_exec_control(struct vcpu_vmx *vmx)
>  {
>  	u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
> @@ -3945,11 +4063,6 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
>  	return exec_control;
>  }
>  
> -static int vmx_vm_has_apicv(struct kvm *kvm)
> -{
> -	return enable_apicv_reg_vid && irqchip_in_kernel(kvm);
> -}
> -
>  static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
>  {
>  	u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
> @@ -4005,8 +4118,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
>  	vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
>  
>  	/* Control */
> -	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
> -		vmcs_config.pin_based_exec_ctrl);
> +	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
>  
>  	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
>  
> @@ -4015,13 +4127,17 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
>  				vmx_secondary_exec_control(vmx));
>  	}
>  
> -	if (enable_apicv_reg_vid) {
> +	if (vmx_vm_has_apicv(vmx->vcpu.kvm)) {
>  		vmcs_write64(EOI_EXIT_BITMAP0, 0);
>  		vmcs_write64(EOI_EXIT_BITMAP1, 0);
>  		vmcs_write64(EOI_EXIT_BITMAP2, 0);
>  		vmcs_write64(EOI_EXIT_BITMAP3, 0);
>  
>  		vmcs_write16(GUEST_INTR_STATUS, 0);
> +
> +		vmx->pi = kzalloc(sizeof(struct pi_desc), GFP_KERNEL);
> +		vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
> +		vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((vmx->pi)));
>  	}
>  
>  	if (ple_gap) {
> @@ -4171,6 +4287,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
>  		vmcs_write64(APIC_ACCESS_ADDR,
>  			     page_to_phys(vmx->vcpu.kvm->arch.apic_access_page));
>  
> +	if (vmx_vm_has_apicv(vcpu->kvm))
> +		memset(vmx->pi, 0, sizeof(struct pi_desc));
> +
>  	if (vmx->vpid != 0)
>  		vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
>  
> @@ -6746,6 +6865,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
>  
>  	free_vpid(vmx);
>  	free_nested(vmx);
> +	free_pi(vmx);
>  	free_loaded_vmcs(vmx->loaded_vmcs);
>  	kfree(vmx->guest_msrs);
>  	kvm_vcpu_uninit(vcpu);
> @@ -7647,6 +7767,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
>  	.load_eoi_exitmap = vmx_load_eoi_exitmap,
>  	.hwapic_irr_update = vmx_hwapic_irr_update,
>  	.hwapic_isr_update = vmx_hwapic_isr_update,
> +	.sync_pir_to_irr = vmx_sync_pir_to_irr,
> +	.send_notification_event = vmx_send_notification_event,
>  
>  	.set_tss_addr = vmx_set_tss_addr,
>  	.get_tdp_level = get_ept_level,
> @@ -7750,7 +7872,7 @@ static int __init vmx_init(void)
>  	memcpy(vmx_msr_bitmap_longmode_x2apic,
>  			vmx_msr_bitmap_longmode, PAGE_SIZE);
>  
> -	if (enable_apicv_reg_vid) {
> +	if (enable_apicv) {
>  		for (msr = 0x800; msr <= 0x8ff; msr++)
>  			vmx_disable_intercept_msr_read_x2apic(msr);
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 9f25d70..6e1e6e7 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2681,6 +2681,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>  static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
>  				    struct kvm_lapic_state *s)
>  {
> +	kvm_x86_ops->sync_pir_to_irr(vcpu);
>  	memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
>  
>  	return 0;
> @@ -5698,6 +5699,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  			kvm_deliver_pmi(vcpu);
>  		if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu))
>  			update_eoi_exitmap(vcpu);
> +		if (kvm_check_request(KVM_REQ_PENDING_PIR, vcpu))
> +			if (kvm_x86_ops->sync_pir_to_irr(vcpu))
> +				kvm_make_request(KVM_REQ_EVENT, vcpu);
>  	}
>  
>  	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 0350e0d..a410819 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -124,6 +124,7 @@ static inline bool is_error_page(struct page *page)
>  #define KVM_REQ_MCLOCK_INPROGRESS 20
>  #define KVM_REQ_EPR_EXIT          21
>  #define KVM_REQ_EOIBITMAP         22
> +#define KVM_REQ_PENDING_PIR	  23
>  
>  #define KVM_USERSPACE_IRQ_SOURCE_ID		0
>  #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
> -- 
> 1.7.1

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html