Re: [PATCH v2 3/6] x86, apicv: add virtual interrupt delivery support

Gleb Natapov <gleb@xxxxxxxxxx> · Sun, 25 Nov 2012 10:53:18 +0200



On Fri, Nov 23, 2012 at 11:46:30AM +0000, Zhang, Yang Z wrote:
> Gleb Natapov wrote on 2012-11-22:
> > On Wed, Nov 21, 2012 at 04:09:36PM +0800, Yang Zhang wrote:
> >> Virtual interrupt delivery avoids KVM to inject vAPIC interrupts
> >> manually, which is fully taken care of by the hardware. This needs
> >> some special awareness into existing interrupr injection path:
> >> 
> >> - for pending interrupt, instead of direct injection, we may need
> >>   update architecture specific indicators before resuming to guest.
> >> - A pending interrupt, which is masked by ISR, should be also
> >>   considered in above update action, since hardware will decide
> >>   when to inject it at right time. Current has_interrupt and
> >>   get_interrupt only returns a valid vector from injection p.o.v.
> >> Signed-off-by: Yang Zhang <yang.z.zhang@xxxxxxxxx>
> >> Signed-off-by: Kevin Tian <kevin.tian@xxxxxxxxx>
> >> ---
> >>  arch/x86/include/asm/kvm_host.h |    4 + arch/x86/include/asm/vmx.h   
> >>    |   11 ++++ arch/x86/kvm/irq.c              |   44 ++++++++++++++
> >>  arch/x86/kvm/lapic.c            |   44 +++++++++++++-
> >>  arch/x86/kvm/lapic.h            |   13 ++++ arch/x86/kvm/svm.c        
> >>       |    6 ++ arch/x86/kvm/vmx.c              |  125
> >>  ++++++++++++++++++++++++++++++++++++++- arch/x86/kvm/x86.c            
> >>   |   16 +++++- virt/kvm/ioapic.c               |    1 + 9 files
> >>  changed, 260 insertions(+), 4 deletions(-)
> >> diff --git a/arch/x86/include/asm/kvm_host.h
> >> b/arch/x86/include/asm/kvm_host.h index b2e11f4..8e07a86 100644 ---
> >> a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h
> >> @@ -682,6 +682,10 @@ struct kvm_x86_ops {
> >>  	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
> >>  	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
> >>  	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
> >> +	int (*has_virtual_interrupt_delivery)(struct kvm_vcpu *vcpu);
> >> +	void (*update_irq)(struct kvm_vcpu *vcpu);
> >> +	void (*set_eoi_exitmap)(struct kvm_vcpu *vcpu, int vector,
> >> +			int need_eoi, int global);
> >>  	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
> >>  	int (*get_tdp_level)(void);
> >>  	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
> >> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> >> index 21101b6..1003341 100644
> >> --- a/arch/x86/include/asm/vmx.h
> >> +++ b/arch/x86/include/asm/vmx.h
> >> @@ -62,6 +62,7 @@
> >>  #define EXIT_REASON_MCE_DURING_VMENTRY  41 #define
> >>  EXIT_REASON_TPR_BELOW_THRESHOLD 43 #define EXIT_REASON_APIC_ACCESS    
> >>      44 +#define EXIT_REASON_EOI_INDUCED         45 #define
> >>  EXIT_REASON_EPT_VIOLATION       48 #define EXIT_REASON_EPT_MISCONFIG  
> >>      49 #define EXIT_REASON_WBINVD              54 @@ -143,6 +144,7 @@
> >>  #define SECONDARY_EXEC_WBINVD_EXITING		0x00000040 #define
> >>  SECONDARY_EXEC_UNRESTRICTED_GUEST	0x00000080 #define
> >>  SECONDARY_EXEC_APIC_REGISTER_VIRT       0x00000100 +#define
> >>  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY    0x00000200 #define
> >>  SECONDARY_EXEC_PAUSE_LOOP_EXITING	0x00000400 #define
> >>  SECONDARY_EXEC_ENABLE_INVPCID		0x00001000
> >> @@ -180,6 +182,7 @@ enum vmcs_field {
> >>  	GUEST_GS_SELECTOR               = 0x0000080a, 	GUEST_LDTR_SELECTOR   
> >>           = 0x0000080c, 	GUEST_TR_SELECTOR               = 0x0000080e,
> >>  +	GUEST_INTR_STATUS               = 0x00000810, 	HOST_ES_SELECTOR     
> >>            = 0x00000c00, 	HOST_CS_SELECTOR                = 0x00000c02,
> >>  	HOST_SS_SELECTOR                = 0x00000c04, @@ -207,6 +210,14 @@
> >>  enum vmcs_field { 	APIC_ACCESS_ADDR_HIGH		= 0x00002015, 	EPT_POINTER  
> >>                    = 0x0000201a, 	EPT_POINTER_HIGH                =
> >>  0x0000201b,
> >> +	EOI_EXIT_BITMAP0                = 0x0000201c,
> >> +	EOI_EXIT_BITMAP0_HIGH           = 0x0000201d,
> >> +	EOI_EXIT_BITMAP1                = 0x0000201e,
> >> +	EOI_EXIT_BITMAP1_HIGH           = 0x0000201f,
> >> +	EOI_EXIT_BITMAP2                = 0x00002020,
> >> +	EOI_EXIT_BITMAP2_HIGH           = 0x00002021,
> >> +	EOI_EXIT_BITMAP3                = 0x00002022,
> >> +	EOI_EXIT_BITMAP3_HIGH           = 0x00002023,
> >>  	GUEST_PHYSICAL_ADDRESS          = 0x00002400,
> >>  	GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
> >>  	VMCS_LINK_POINTER               = 0x00002800,
> >> diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
> >> index 7e06ba1..c7356a3 100644
> >> --- a/arch/x86/kvm/irq.c
> >> +++ b/arch/x86/kvm/irq.c
> >> @@ -60,6 +60,29 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
> >>  EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
> >>  
> >>  /*
> >> + * check if there is pending interrupt without
> >> + * intack. This _apicv version is used when hardware
> >> + * supports APIC virtualization with virtual interrupt
> >> + * delivery support. In such case, KVM is not required
> >> + * to poll pending APIC interrupt, and thus this
> >> + * interface is used to poll pending interupts from
> >> + * non-APIC source.
> >> + */
> >> +int kvm_cpu_has_extint(struct kvm_vcpu *v)
> >> +{
> >> +	struct kvm_pic *s;
> >> +
> >> +	if (!irqchip_in_kernel(v->kvm))
> >> +		return v->arch.interrupt.pending;
> >> +
> > This does not belong here. If !irqchip_in_kernel() the function will not
> > be called. Hmm actually with !irqchip_in_kernel() kernel will oops in
> > kvm_apic_vid_enabled() since it dereference vcpu->arch.apic without
> > checking if it is NULL.
> 
> Right. Will remove it in next version and add the check in kvm_apic_vid_enabled.
>  
> > 
> >> +	if (kvm_apic_accept_pic_intr(v)) {
> >> +		s = pic_irqchip(v->kvm);	/* PIC */
> >> +		return s->output;
> >> +	} else
> >> +		return 0;
> > This is code duplication from kvm_cpu_has_interrupt(). Write common
> > function and call it from kvm_cpu_has_interrupt(), but even that is
> > not needed, see below.
> 
> Why it is not needed? 
Because it you change kvm_cpu_has_interrupt() like I described below the
code path that uses this function will not be needed.

>  
> >> +}
> >> +
> >> +/*
> >>   * Read pending interrupt vector and intack.
> >>   */
> >>  int kvm_cpu_get_interrupt(struct kvm_vcpu *v) @@ -82,6 +105,27 @@ int
> >>  kvm_cpu_get_interrupt(struct kvm_vcpu *v) }
> >>  EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
> >> +/*
> >> + * Read pending interrupt vector and intack.
> >> + * Similar to kvm_cpu_has_interrupt_apicv, to get
> >> + * interrupts from non-APIC sources.
> >> + */
> >> +int kvm_cpu_get_extint(struct kvm_vcpu *v)
> >> +{
> >> +	struct kvm_pic *s;
> >> +	int vector = -1;
> >> +
> >> +	if (!irqchip_in_kernel(v->kvm))
> >> +		return v->arch.interrupt.nr;
> > Same as above.
> > 
> >> +
> >> +	if (kvm_apic_accept_pic_intr(v)) {
> >> +		s = pic_irqchip(v->kvm);
> >> +		s->output = 0;		/* PIC */
> >> +		vector = kvm_pic_read_irq(v->kvm);
> > Ditto about code duplication.
> > 
> >> +	}
> >> +	return vector;
> >> +}
> >> +
> >>  void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
> >>  {
> >>  	kvm_inject_apic_timer_irqs(vcpu);
> >> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> >> index a63ffdc..af48361 100644
> >> --- a/arch/x86/kvm/lapic.c
> >> +++ b/arch/x86/kvm/lapic.c
> >> @@ -643,6 +643,12 @@ out:
> >>  	return ret;
> >>  }
> >> +void kvm_set_eoi_exitmap(struct kvm_vcpu *vcpu, int vector,
> >> +		int need_eoi, int global)
> >> +{
> >> +	kvm_x86_ops->set_eoi_exitmap(vcpu, vector, need_eoi, global);
> >> +}
> >> +
> >>  /*
> >>   * Add a pending IRQ into lapic.
> >>   * Return 1 if successfully added and 0 if discarded.
> >> @@ -664,8 +670,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int
> > delivery_mode,
> >>  		if (trig_mode) {
> >>  			apic_debug("level trig mode for vector %d", vector);
> >>  			apic_set_vector(vector, apic->regs + APIC_TMR);
> >> -		} else
> >> +			kvm_set_eoi_exitmap(vcpu, vector, 1, 0);
> >> +		} else {
> >>  			apic_clear_vector(vector, apic->regs + APIC_TMR);
> >> +			kvm_set_eoi_exitmap(vcpu, vector, 0, 0);
> > Why not use APIC_TMR directly instead of kvm_set_eoi_exitmap() logic?
> 
> Good idea. It seems more reasonable. 
> 
> >> +		}
> >> 
> >>  		result = !apic_test_and_set_irr(vector, apic);
> >>  		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, @@ -769,6
> >>  +778,26 @@ static int apic_set_eoi(struct kvm_lapic *apic) 	return
> >>  vector; }
> >> +/*
> >> + * this interface assumes a trap-like exit, which has already finished
> >> + * desired side effect including vISR and vPPR update.
> >> + */
> >> +void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
> >> +{
> >> +	struct kvm_lapic *apic = vcpu->arch.apic;
> >> +	int trigger_mode;
> >> +
> >> +	if (apic_test_and_clear_vector(vector, apic->regs + APIC_TMR))
> >> +		trigger_mode = IOAPIC_LEVEL_TRIG;
> >> +	else
> >> +		trigger_mode = IOAPIC_EDGE_TRIG;
> >> +
> >> +	if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI))
> >> +		kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
> >> +	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
> > More code duplication. Why not call apic_set_eoi() and skip isr/ppr
> > logic there if vid is enabled, or put the logic in common function and
> > call from both places.
> 
> Ok, will change it in next patch.
> 
> >> +}
> >> +EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
> >> +
> >>  static void apic_send_ipi(struct kvm_lapic *apic) { 	u32 icr_low =
> >>  kvm_apic_get_reg(apic, APIC_ICR); @@ -1510,6 +1539,8 @@ int
> >>  kvm_create_lapic(struct kvm_vcpu *vcpu) 	kvm_lapic_reset(vcpu);
> >>  	kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
> >> +	if (kvm_x86_ops->has_virtual_interrupt_delivery(vcpu))
> >> +		apic->vid_enabled = true;
> > What do you have vid_enabled for. This is global, not per apic, state.
> When inject interrupt to guest, we need this to check whether vid is enabled. If not, use old way to handle the interrupt.
> I thing put it in apic is reasonable. Though all vcpu use same configuration, APICv feature is per vcpu too.
> 
How APICv is per vcpu? It is global. Just call has_virtual_interrupt_delivery(vcpu)
instead of vid_enabled thing.

> > 
> >>  	return 0; nomem_free_apic: 	kfree(apic); @@ -1533,6 +1564,17 @@ int
> >>  kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) 	return highest_irr; }
> >> +int kvm_apic_get_highest_irr(struct kvm_vcpu *vcpu)
> >> +{
> >> +	struct kvm_lapic *apic = vcpu->arch.apic;
> >> +
> >> +	if (!apic || !apic_enabled(apic))
> >> +		return -1;
> >> +
> >> +	return apic_find_highest_irr(apic);
> >> +}
> >> +EXPORT_SYMBOL_GPL(kvm_apic_get_highest_irr);
> >> +
> >>  int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
> >>  {
> >>  	u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0);
> >> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> >> index c42f111..2503a64 100644
> >> --- a/arch/x86/kvm/lapic.h
> >> +++ b/arch/x86/kvm/lapic.h
> >> @@ -20,6 +20,7 @@ struct kvm_lapic {
> >>  	u32 divide_count; 	struct kvm_vcpu *vcpu; 	bool irr_pending; +	bool
> >>  vid_enabled; 	/* Number of bits set in ISR. */ 	s16 isr_count; 	/* The
> >>  highest vector set in ISR; if -1 - invalid, must scan ISR. */ @@ -39,6
> >>  +40,9 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu); int
> >>  kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); int
> >>  kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); int
> >>  kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
> >> +int kvm_cpu_has_extint(struct kvm_vcpu *v);
> >> +int kvm_cpu_get_extint(struct kvm_vcpu *v);
> >> +int kvm_apic_get_highest_irr(struct kvm_vcpu *vcpu);
> >>  void kvm_lapic_reset(struct kvm_vcpu *vcpu); u64
> >>  kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); void
> >>  kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); @@ -50,6
> >>  +54,8 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu); int
> >>  kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); int
> >>  kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); int
> >>  kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
> >> +void kvm_set_eoi_exitmap(struct kvm_vcpu *vcpu, int vector,
> >> +		int need_eoi, int global);
> >>  int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
> >>  
> >>  bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
> >> @@ -65,6 +71,7 @@ u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu
> > *vcpu);
> >>  void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data);
> >>  
> >>  int kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset);
> >> +void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector);
> >> 
> >>  void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
> >>  void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
> >> @@ -81,6 +88,12 @@ static inline bool
> > kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
> >>  	return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
> >>  }
> >> +static inline bool kvm_apic_vid_enabled(struct kvm_vcpu *vcpu)
> >> +{
> >> +	struct kvm_lapic *apic = vcpu->arch.apic;
> >> +	return apic->vid_enabled;
> >> +}
> > vcpu->arch.apic can be NULL from where this is called.
> > 
> >> +
> >>  int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
> >>  void kvm_lapic_init(void);
> >> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> >> index d017df3..b290aba 100644
> >> --- a/arch/x86/kvm/svm.c
> >> +++ b/arch/x86/kvm/svm.c
> >> @@ -3564,6 +3564,11 @@ static void update_cr8_intercept(struct kvm_vcpu
> > *vcpu, int tpr, int irr)
> >>  		set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
> >>  }
> >> +static int svm_has_virtual_interrupt_delivery(struct kvm_vcpu *vcpu)
> >> +{
> >> +	return 0;
> >> +}
> >> +
> >>  static int svm_nmi_allowed(struct kvm_vcpu *vcpu) { 	struct vcpu_svm
> >>  *svm = to_svm(vcpu); @@ -4283,6 +4288,7 @@ static struct kvm_x86_ops
> >>  svm_x86_ops = { 	.enable_nmi_window = enable_nmi_window,
> >>  	.enable_irq_window = enable_irq_window, 	.update_cr8_intercept =
> >>  update_cr8_intercept,
> >> +	.has_virtual_interrupt_delivery = svm_has_virtual_interrupt_delivery,
> >> 
> >>  	.set_tss_addr = svm_set_tss_addr,
> >>  	.get_tdp_level = get_npt_level,
> >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> >> index e9287aa..c0d74ce 100644
> >> --- a/arch/x86/kvm/vmx.c
> >> +++ b/arch/x86/kvm/vmx.c
> >> @@ -86,6 +86,9 @@ module_param(fasteoi, bool, S_IRUGO);
> >>  static bool __read_mostly enable_apicv_reg = 0;
> >>  module_param(enable_apicv_reg, bool, S_IRUGO);
> >> +static bool __read_mostly enable_apicv_vid = 0;
> >> +module_param(enable_apicv_vid, bool, S_IRUGO);
> >> +
> >>  /*
> >>   * If nested=1, nested virtualization is supported, i.e., guests may use
> >>   * VMX and be a hypervisor for its own guests. If nested=0, guests may not
> >> @@ -432,6 +435,10 @@ struct vcpu_vmx {
> >> 
> >>  	bool rdtscp_enabled;
> >> +	u8 eoi_exitmap_changed;
> >> +	u64 eoi_exit_bitmap[4];
> >> +	u64 eoi_exit_bitmap_global[4];
> >> +
> >>  	/* Support for a guest hypervisor (nested VMX) */
> >>  	struct nested_vmx nested;
> >>  };
> >> @@ -770,6 +777,12 @@ static inline bool
> > cpu_has_vmx_apic_register_virt(void)
> >>  		SECONDARY_EXEC_APIC_REGISTER_VIRT;
> >>  }
> >> +static inline bool cpu_has_vmx_virtual_intr_delivery(void)
> >> +{
> >> +	return vmcs_config.cpu_based_2nd_exec_ctrl &
> >> +		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
> >> +}
> >> +
> >>  static inline bool cpu_has_vmx_flexpriority(void)
> >>  {
> >>  	return cpu_has_vmx_tpr_shadow() &&
> >> @@ -2480,7 +2493,8 @@ static __init int setup_vmcs_config(struct
> > vmcs_config *vmcs_conf)
> >>  			SECONDARY_EXEC_PAUSE_LOOP_EXITING |
> >>  			SECONDARY_EXEC_RDTSCP |
> >>  			SECONDARY_EXEC_ENABLE_INVPCID |
> >> -			SECONDARY_EXEC_APIC_REGISTER_VIRT;
> >> +			SECONDARY_EXEC_APIC_REGISTER_VIRT |
> >> +			SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
> >>  		if (adjust_vmx_controls(min2, opt2,
> >>  					MSR_IA32_VMX_PROCBASED_CTLS2,
> >>  					&_cpu_based_2nd_exec_control) < 0)
> >> @@ -2494,7 +2508,8 @@ static __init int setup_vmcs_config(struct
> >> vmcs_config *vmcs_conf)
> >> 
> >>  	if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
> >>  		_cpu_based_2nd_exec_control &= ~(
> >> -				SECONDARY_EXEC_APIC_REGISTER_VIRT);
> >> +				SECONDARY_EXEC_APIC_REGISTER_VIRT |
> >> +				SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
> >> 
> >>  	if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { 		/*
> >>  CR3 accesses and invlpg don't need to cause VM Exits when EPT @@
> >>  -2696,6 +2711,9 @@ static __init int hardware_setup(void) 	if
> >>  (!cpu_has_vmx_apic_register_virt()) 		enable_apicv_reg = 0;
> >> +	if (!cpu_has_vmx_virtual_intr_delivery())
> >> +		enable_apicv_vid = 0;
> >> +
> >>  	if (nested)
> >>  		nested_vmx_setup_ctls_msrs();
> >> @@ -3811,6 +3829,8 @@ static u32 vmx_secondary_exec_control(struct
> > vcpu_vmx *vmx)
> >>  		exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
> >>  	if (!enable_apicv_reg)
> >>  		exec_control &= ~SECONDARY_EXEC_APIC_REGISTER_VIRT;
> >> +	if (!enable_apicv_vid)
> >> +		exec_control &= ~SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
> >>  	return exec_control;
> >>  }
> >> @@ -3855,6 +3875,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
> >>  				vmx_secondary_exec_control(vmx));
> >>  	}
> >> +	if (enable_apicv_vid) {
> >> +		vmcs_write64(EOI_EXIT_BITMAP0, 0);
> >> +		vmcs_write64(EOI_EXIT_BITMAP1, 0);
> >> +		vmcs_write64(EOI_EXIT_BITMAP2, 0);
> >> +		vmcs_write64(EOI_EXIT_BITMAP3, 0);
> >> +
> >> +		vmcs_write16(GUEST_INTR_STATUS, 0);
> >> +	}
> >> +
> >>  	if (ple_gap) {
> >>  		vmcs_write32(PLE_GAP, ple_gap);
> >>  		vmcs_write32(PLE_WINDOW, ple_window);
> >> @@ -4770,6 +4799,16 @@ static int handle_apic_access(struct kvm_vcpu
> > *vcpu)
> >>  	return emulate_instruction(vcpu, 0) == EMULATE_DONE;
> >>  }
> >> +static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
> >> +{
> >> +	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
> >> +	int vector = exit_qualification & 0xff;
> >> +
> >> +	/* EOI-induced VM exit is trap-like and thus no need to adjust IP */
> >> +	kvm_apic_set_eoi_accelerated(vcpu, vector);
> >> +	return 1;
> >> +}
> >> +
> >>  static int handle_apic_write(struct kvm_vcpu *vcpu)
> >>  {
> >>  	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
> >> @@ -5719,6 +5758,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct
> > kvm_vcpu *vcpu) = {
> >>  	[EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
> >>  	[EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
> >>  	[EXIT_REASON_APIC_WRITE]              = handle_apic_write,
> >>  +	[EXIT_REASON_EOI_INDUCED]             = handle_apic_eoi_induced,
> >>  	[EXIT_REASON_WBINVD]                  = handle_wbinvd,
> >>  	[EXIT_REASON_XSETBV]                  = handle_xsetbv,
> >>  	[EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
> >> @@ -6049,6 +6089,11 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
> >> 
> >>  static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
> >>  {
> >> +    /* no need for tpr_threshold update if APIC virtual
> >> +     * interrupt delivery is enabled */
> >> +	if (!enable_apicv_vid)
> >> +		return ;
> > 
> > Just set kvm_x86_ops->update_cr8_intercept to NULL if !enable_apicv_vid
> > and the function will not be called.
> 
> Sure.
>  
> >> +
> >>  	if (irr == -1 || tpr < irr) {
> >>  		vmcs_write32(TPR_THRESHOLD, 0);
> >>  		return;
> >> @@ -6057,6 +6102,79 @@ static void update_cr8_intercept(struct kvm_vcpu
> > *vcpu, int tpr, int irr)
> >>  	vmcs_write32(TPR_THRESHOLD, irr);
> >>  }
> >> +static int vmx_has_virtual_interrupt_delivery(struct kvm_vcpu *vcpu)
> >> +{
> >> +	return irqchip_in_kernel(vcpu->kvm) && enable_apicv_vid;
> >> +}
> >> +
> >> +static void vmx_update_irq(struct kvm_vcpu *vcpu)
> >> +{
> >> +	u16 status;
> >> +	u8 old;
> >> +	int vector;
> >> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> >> +
> >> +	if (!enable_apicv_vid)
> >> +		return ;
> > Ditto. Set kvm_x86_ops->update_irq to a function that does nothing if
> > !enable_apicv_vid. BTW you do not set this callback in SVM code and call
> > it unconditionally.
> > 
> >> +
> >> +	vector = kvm_apic_get_highest_irr(vcpu);
> >> +	if (vector == -1)
> >> +		return;
> >> +
> >> +	status = vmcs_read16(GUEST_INTR_STATUS);
> >> +	old = (u8)status & 0xff;
> >> +	if ((u8)vector != old) {
> >> +		status &= ~0xff;
> >> +		status |= (u8)vector;
> >> +		vmcs_write16(GUEST_INTR_STATUS, status);
> >> +	}
> > Please write RVI assessor functions.
> Sure.
> 
> >> +
> >> +	if (vmx->eoi_exitmap_changed) {
> >> +#define UPDATE_EOI_EXITMAP(v, e) {				\
> >> +	if ((v)->eoi_exitmap_changed & (1 << (e)))	\
> >> +		vmcs_write64(EOI_EXIT_BITMAP##e,		\
> >> +		(v)->eoi_exit_bitmap[e] | (v)->eoi_exit_bitmap_global[e]); }
> > Inline function would do. But why calculate this on each entry? We want
> > EOI exits only for level IOAPIC interrupts and edge IOAPIC interrupt
> > with registered notifiers. This configuration rarely changes.
> 
> eoi_exitmap_changed is used to track whether the trig mode is changed. As you said, it changes rarely, so this codes seldom will be executed.
> 
But code still checks whether bitmap was changed during each interrupt
injection. Recalculate bitmap when notifier is added/removed or ioapic
configuration changes. Use request bit to reload new bitmap.

> > 
> > 
> >> +
> >> +		UPDATE_EOI_EXITMAP(vmx, 0);
> >> +		UPDATE_EOI_EXITMAP(vmx, 1);
> >> +		UPDATE_EOI_EXITMAP(vmx, 2);
> >> +		UPDATE_EOI_EXITMAP(vmx, 3);
> >> +		vmx->eoi_exitmap_changed = 0;
> >> +	}
> >> +}
> >> +
> >> +static void vmx_set_eoi_exitmap(struct kvm_vcpu *vcpu,
> >> +				int vector,
> >> +				int need_eoi, int global)
> >> +{
> >> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> >> +	int index, offset, changed;
> >> +	unsigned long *eoi_exitmap;
> >> +
> >> +	if (!enable_apicv_vid)
> >> +		return ;
> >> +
> >> +	if (WARN_ONCE((vector < 0) || (vector > 255),
> >> +		"KVM VMX: vector (%d) out of range\n", vector))
> >> +		return;
> >> +
> >> +	index = vector >> 6;
> >> +	offset = vector & 63;
> >> +	if (global)
> >> +		eoi_exitmap =
> >> +		    (unsigned long *)&vmx->eoi_exit_bitmap_global[index];
> >> +	else
> >> +		eoi_exitmap = (unsigned long *)&vmx->eoi_exit_bitmap[index];
> >> +
> >> +	if (need_eoi)
> >> +		changed = !test_and_set_bit(offset, eoi_exitmap);
> >> +	else
> >> +		changed = test_and_clear_bit(offset, eoi_exitmap);
> >> +
> >> +	if (changed)
> >> +		vmx->eoi_exitmap_changed |= 1 << index;
> >> +}
> >> +
> >>  static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) { 	u32
> >>  exit_intr_info; @@ -7320,6 +7438,9 @@ static struct kvm_x86_ops
> >>  vmx_x86_ops = { 	.enable_nmi_window = enable_nmi_window,
> >>  	.enable_irq_window = enable_irq_window, 	.update_cr8_intercept =
> >>  update_cr8_intercept,
> >> +	.has_virtual_interrupt_delivery = vmx_has_virtual_interrupt_delivery,
> >> +	.update_irq = vmx_update_irq,
> > You need to initialize this one in svm.c too.
> > 
> >> +	.set_eoi_exitmap = vmx_set_eoi_exitmap,
> >> 
> >>  	.set_tss_addr = vmx_set_tss_addr,
> >>  	.get_tdp_level = get_ept_level,
> >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >> index 4f76417..8b8de3b 100644
> >> --- a/arch/x86/kvm/x86.c
> >> +++ b/arch/x86/kvm/x86.c
> >> @@ -5190,6 +5190,13 @@ static void inject_pending_event(struct kvm_vcpu
> > *vcpu)
> >>  			vcpu->arch.nmi_injected = true;
> >>  			kvm_x86_ops->set_nmi(vcpu);
> >>  		}
> >> +	} else if (kvm_apic_vid_enabled(vcpu)) {
> >> +		if (kvm_cpu_has_extint(vcpu) &&
> >> +		    kvm_x86_ops->interrupt_allowed(vcpu)) {
> >> +			kvm_queue_interrupt(vcpu,
> >> +				kvm_cpu_get_extint(vcpu), false);
> >> +			kvm_x86_ops->set_irq(vcpu);
> >> +		}
> > Drop all this and modify kvm_cpu_has_interrupt()/kvm_cpu_get_interrupt()
> > to consider apic interrupts only if vid is enabled then the if below
> > will just work.
> Ok.
> 
> > 
> >>  	} else if (kvm_cpu_has_interrupt(vcpu)) {
> >>  		if (kvm_x86_ops->interrupt_allowed(vcpu)) {
> >>  			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
> >> @@ -5289,12 +5296,19 @@ static int vcpu_enter_guest(struct kvm_vcpu
> > *vcpu)
> >>  	}
> >>  
> >>  	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> >> +		/* update archtecture specific hints for APIC
> >> +		 * virtual interrupt delivery */
> >> +		kvm_x86_ops->update_irq(vcpu);
> >> +
> >>  		inject_pending_event(vcpu);
> >>  
> >>  		/* enable NMI/IRQ window open exits if needed */
> >>  		if (vcpu->arch.nmi_pending)
> >>  			kvm_x86_ops->enable_nmi_window(vcpu);
> >> -		else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
> >> +		else if (kvm_apic_vid_enabled(vcpu)) {
> >> +			if (kvm_cpu_has_extint(vcpu))
> >> +				kvm_x86_ops->enable_irq_window(vcpu);
> > Same as above. With proper kvm_cpu_has_interrupt() implementation this
> > id is not needed.
> > 
> >> +		} else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
> >>  			kvm_x86_ops->enable_irq_window(vcpu);
> >>  
> >>  		if (kvm_lapic_enabled(vcpu)) {
> >> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
> >> index 166c450..898aa62 100644
> >> --- a/virt/kvm/ioapic.c
> >> +++ b/virt/kvm/ioapic.c
> >> @@ -186,6 +186,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int
> > irq)
> >>  		/* need to read apic_id from apic regiest since 		 * it can be
> >>  rewritten */ 		irqe.dest_id = ioapic->kvm->bsp_vcpu_id;
> >>  +		kvm_set_eoi_exitmap(ioapic->kvm->vcpus[0], irqe.vector, 1, 1); 	}
> >>  #endif 	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe);
> >> --
> >> 1.7.1
> > 
> > --
> > 			Gleb.
> 
> 
> Best regards,
> Yang
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html