RE: [PATCH v3 3/4] x86, apicv: add virtual interrupt delivery support

"Zhang, Yang Z" <yang.z.zhang@xxxxxxxxx> · Wed, 5 Dec 2012 01:55:17 +0000



Gleb Natapov wrote on 2012-12-04:
> On Tue, Dec 04, 2012 at 06:39:50AM +0000, Zhang, Yang Z wrote:
>> Gleb Natapov wrote on 2012-12-03:
>>> On Mon, Dec 03, 2012 at 03:01:03PM +0800, Yang Zhang wrote:
>>>> Virtual interrupt delivery avoids KVM to inject vAPIC interrupts
>>>> manually, which is fully taken care of by the hardware. This needs
>>>> some special awareness into existing interrupr injection path:
>>>> 
>>>> - for pending interrupt, instead of direct injection, we may need
>>>>   update architecture specific indicators before resuming to guest. -
>>>>   A pending interrupt, which is masked by ISR, should be also
>>>>   considered in above update action, since hardware will decide when
>>>>   to inject it at right time. Current has_interrupt and get_interrupt
>>>>   only returns a valid vector from injection p.o.v.
>>> Most of my previous comments still apply.
>>> 
>>>> +void kvm_set_eoi_exitmap(struct kvm_vcpu *vcpu, int vector,
>>>> +		int trig_mode, int always_set)
>>>> +{
>>>> +	if (kvm_x86_ops->set_eoi_exitmap)
>>>> +		kvm_x86_ops->set_eoi_exitmap(vcpu, vector,
>>>> +					trig_mode, always_set);
>>>> +}
>>>> +
>>>>  /*
>>>>   * Add a pending IRQ into lapic.
>>>>   * Return 1 if successfully added and 0 if discarded.
>>>> @@ -661,6 +669,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic,
> int
>>> delivery_mode,
>>>>  		if (unlikely(!apic_enabled(apic)))
>>>>  			break;
>>>> +		kvm_set_eoi_exitmap(vcpu, vector, trig_mode, 0);
>>> As I said in the last review rebuild the bitmap when ioapic or irq
>>> notifier configuration changes, user request bit to notify vcpus to
>>> reload the bitmap.
>> It is too complicated. When program ioapic entry, we cannot get the target vcpu
> easily. We need to read destination format register and logical destination
> register to find out target vcpu if using logical mode. Also, we must trap every
> modification to the two registers to update eoi bitmap.
> No need to check target vcpu. Enable exit on all vcpus for the vector
This is wrong. As we known, modern OS uses per VCPU vector. We cannot ensure all vectors have same trigger mode. And what's worse, the vector in another vcpu is used to handle high frequency interrupts(like 10G NIC), then it will hurt performance.

> programmed into ioapic. Which two registers? All accesses to ioapic are
> trapped and reconfiguration is rare.
In logical mode, the destination VCPU is depend on each CPU's destination format register and logical destination register. So we must also trap the two registers.
And if it uses lowest priority delivery mode, the PPR need to be trapped too. Since PPR will change on each interrupt injection, the cost should be higher than current approach.

>> For irq notifier, only PIT is special which is edge trigger but need an
>> EOI notifier. So, just treat it specially. And TMR can cover others.
>> 
> We shouldn't assume that. If another notifier will be added it will be
> easy to forget to update apicv code to exclude another vector too.
At this point, guest is not running(in device initializing), we cannot not know the vector. As you mentioned, the best point is when guest program ioapic entry. But it also is impossible to get the vector(see above).
I can give some comments on the function to remind the caller to update eoi bitmap when the interrupt is edge and they still want to get EOI vmexit.

>>> 
>>>>  		if (trig_mode) {
>>>>  			apic_debug("level trig mode for vector %d", vector);
>>>>  			apic_set_vector(vector, apic->regs + APIC_TMR);
>>>> @@ -740,6 +749,19 @@ int kvm_apic_compare_prio(struct kvm_vcpu
> *vcpu1,
>>> struct kvm_vcpu *vcpu2)
>>>>  	return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
>>>>  }
>>>> +static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
>>>> +{ +	if (!(kvm_apic_get_reg(apic, APIC_SPIV) &
>>>> APIC_SPIV_DIRECTED_EOI) && +	   
>>>> kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { +		int
>>>> trigger_mode; +		if (apic_test_vector(vector, apic->regs + APIC_TMR))
>>>> +			trigger_mode = IOAPIC_LEVEL_TRIG; +		else +			trigger_mode =
>>>> IOAPIC_EDGE_TRIG; +		kvm_ioapic_update_eoi(apic->vcpu->kvm, vector,
>>>> trigger_mode); +	} +} +
>>>>  static int apic_set_eoi(struct kvm_lapic *apic) { 	int vector =
>>>>  apic_find_highest_isr(apic); @@ -756,19 +778,24 @@ static int
>>>>  apic_set_eoi(struct kvm_lapic *apic) 	apic_clear_isr(vector, apic);
>>>>  	apic_update_ppr(apic);
>>>> -	if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)
>>>> && -	    kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { -		int
>>>> trigger_mode; -		if (apic_test_vector(vector, apic->regs + APIC_TMR))
>>>> -			trigger_mode = IOAPIC_LEVEL_TRIG; -		else -			trigger_mode =
>>>> IOAPIC_EDGE_TRIG; -		kvm_ioapic_update_eoi(apic->vcpu->kvm, vector,
>>>> trigger_mode); -	} +	kvm_ioapic_send_eoi(apic, vector);
>>>>  	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
>>>>  	return vector;
>>>>  }
>>>> +/*
>>>> + * this interface assumes a trap-like exit, which has already finished
>>>> + * desired side effect including vISR and vPPR update.
>>>> + */
>>>> +void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
>>>> +{
>>>> +	struct kvm_lapic *apic = vcpu->arch.apic;
>>>> +
>>> trace_kvm_eoi()
>> Ok.
>> 
>>>> +	kvm_ioapic_send_eoi(apic, vector);
>>>> +	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
>>>> +
>>>>  static void apic_send_ipi(struct kvm_lapic *apic) { 	u32 icr_low =
>>>>  kvm_apic_get_reg(apic, APIC_ICR); @@ -1533,6 +1560,17 @@ int
>>>>  kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) 	return highest_irr; }
>>>> +int kvm_apic_get_highest_irr(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +	struct kvm_lapic *apic = vcpu->arch.apic;
>>>> +
>>>> +	if (!apic || !apic_enabled(apic))
>>> Use kvm_vcpu_has_lapic() instead of checking arch.apic directly.
>> Ok.
>> 
>>> 
>>>> +		return -1;
>>>> +
>>>> +	return apic_find_highest_irr(apic);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(kvm_apic_get_highest_irr);
>>>> +
>>>>  int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
>>>>  {
>>>>  	u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0);
>>>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
>>>> index c42f111..749661a 100644
>>>> --- a/arch/x86/kvm/lapic.h
>>>> +++ b/arch/x86/kvm/lapic.h
>>>> @@ -39,6 +39,9 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu);
>>>>  int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
>>>>  int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
>>>>  int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
>>>> +int kvm_cpu_has_extint(struct kvm_vcpu *v);
>>>> +int kvm_cpu_get_extint(struct kvm_vcpu *v);
>>>> +int kvm_apic_get_highest_irr(struct kvm_vcpu *vcpu);
>>>>  void kvm_lapic_reset(struct kvm_vcpu *vcpu); u64
>>>>  kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); void
>>>>  kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); @@ -50,6
>>>>  +53,8 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu); int
>>>>  kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); int
>>>>  kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); int
>>>>  kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
>>>> +void kvm_set_eoi_exitmap(struct kvm_vcpu *vcpu, int vector,
>>>> +		int need_eoi, int global);
>>>>  int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
>>>>  
>>>>  bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic
> *src,
>>>> @@ -65,6 +70,7 @@ u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu
>>> *vcpu);
>>>>  void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data);
>>>>  
>>>>  int kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset);
>>>> +void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector);
>>>> 
>>>>  void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t
>>>>  vapic_addr); void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
>>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>>>> index dcb7952..8f0903b 100644
>>>> --- a/arch/x86/kvm/svm.c
>>>> +++ b/arch/x86/kvm/svm.c
>>>> @@ -3573,6 +3573,22 @@ static void update_cr8_intercept(struct
> kvm_vcpu
>>> *vcpu, int tpr, int irr)
>>>>  		set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
>>>>  }
>>>> +static int svm_has_virtual_interrupt_delivery(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static void svm_update_irq(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +	return ;
>>>> +}
>>>> +
>>>> +static void svm_set_eoi_exitmap(struct kvm_vcpu *vcpu, int vector,
>>>> +				int trig_mode, int always_set)
>>>> +{
>>>> +	return ;
>>>> +}
>>>> +
>>>>  static int svm_nmi_allowed(struct kvm_vcpu *vcpu) { 	struct vcpu_svm
>>>>  *svm = to_svm(vcpu); @@ -4292,6 +4308,9 @@ static struct kvm_x86_ops
>>>>  svm_x86_ops = { 	.enable_nmi_window = enable_nmi_window,
>>>>  	.enable_irq_window = enable_irq_window, 	.update_cr8_intercept =
>>>>  update_cr8_intercept,
>>>> +	.has_virtual_interrupt_delivery = svm_has_virtual_interrupt_delivery,
>>>> +	.update_irq = svm_update_irq;
>>>> +	.set_eoi_exitmap = svm_set_eoi_exitmap;
>>>> 
>>>>  	.set_tss_addr = svm_set_tss_addr,
>>>>  	.get_tdp_level = get_npt_level,
>>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>>> index 6a5f651..909ce90 100644
>>>> --- a/arch/x86/kvm/vmx.c
>>>> +++ b/arch/x86/kvm/vmx.c
>>>> @@ -86,6 +86,9 @@ module_param(fasteoi, bool, S_IRUGO);
>>>>  static bool __read_mostly enable_apicv_reg;
>>>>  module_param(enable_apicv_reg, bool, S_IRUGO);
>>>> +static bool __read_mostly enable_apicv_vid;
>>>> +module_param(enable_apicv_vid, bool, S_IRUGO);
>>>> +
>>>>  /*
>>>>   * If nested=1, nested virtualization is supported, i.e., guests may use
>>>>   * VMX and be a hypervisor for its own guests. If nested=0, guests may
> not
>>>> @@ -432,6 +435,9 @@ struct vcpu_vmx {
>>>> 
>>>>  	bool rdtscp_enabled;
>>>> +	u8 eoi_exitmap_changed;
>>>> +	u32 eoi_exit_bitmap[8];
>>>> +
>>>>  	/* Support for a guest hypervisor (nested VMX) */
>>>>  	struct nested_vmx nested;
>>>>  };
>>>> @@ -770,6 +776,12 @@ static inline bool
>>> cpu_has_vmx_apic_register_virt(void)
>>>>  		SECONDARY_EXEC_APIC_REGISTER_VIRT;
>>>>  }
>>>> +static inline bool cpu_has_vmx_virtual_intr_delivery(void)
>>>> +{
>>>> +	return vmcs_config.cpu_based_2nd_exec_ctrl &
>>>> +		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
>>>> +}
>>>> +
>>>>  static inline bool cpu_has_vmx_flexpriority(void)
>>>>  {
>>>>  	return cpu_has_vmx_tpr_shadow() &&
>>>> @@ -2508,7 +2520,8 @@ static __init int setup_vmcs_config(struct
>>> vmcs_config *vmcs_conf)
>>>>  			SECONDARY_EXEC_PAUSE_LOOP_EXITING |
>>>>  			SECONDARY_EXEC_RDTSCP |
>>>>  			SECONDARY_EXEC_ENABLE_INVPCID |
>>>> -			SECONDARY_EXEC_APIC_REGISTER_VIRT;
>>>> +			SECONDARY_EXEC_APIC_REGISTER_VIRT |
>>>> +			SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
>>>>  		if (adjust_vmx_controls(min2, opt2,
>>>>  					MSR_IA32_VMX_PROCBASED_CTLS2,
>>>>  					&_cpu_based_2nd_exec_control) < 0)
>>>> @@ -2522,7 +2535,8 @@ static __init int setup_vmcs_config(struct
>>>> vmcs_config *vmcs_conf)
>>>> 
>>>>  	if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
>>>>  		_cpu_based_2nd_exec_control &= ~(
>>>> -				SECONDARY_EXEC_APIC_REGISTER_VIRT);
>>>> +				SECONDARY_EXEC_APIC_REGISTER_VIRT |
>>>> +				SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
>>>> 
>>>>  	if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { 		/*
>>>>  CR3 accesses and invlpg don't need to cause VM Exits when EPT @@
>>>>  -2724,6 +2738,14 @@ static __init int hardware_setup(void) 	if
>>>>  (!cpu_has_vmx_apic_register_virt()) 		enable_apicv_reg = 0;
>>>> +	if (!cpu_has_vmx_virtual_intr_delivery())
>>>> +		enable_apicv_vid = 0;
>>>> +
>>>> +	if (!enable_apicv_vid) {
>>>> +		kvm_x86_ops->update_irq = NULL;
>>> Why setting it to NULL? Either drop this since vmx_update_irq() checks
>>> enable_apicv_vid or better set it to function that does nothing and
>>> drop enable_apicv_vid check in vmx_update_irq(). Since
>>> kvm_x86_ops->update_irq will never be NULL you can drop the check
>>> before calling it.
>> Sure.
>> 
>>>> +		kvm_x86_ops->update_cr8_intercept = NULL;
>>> Why? It should be other way around: if apicv is enabled set
>>> update_cr8_intercept callback to NULL.
>> Yes, this is wrong.
> Please test the patches with vid disabled and Windows guests. This bug
> should have prevented it from working.
> 
>> 
>>>> +	}
>>>> +
>>>>  	if (nested)
>>>>  		nested_vmx_setup_ctls_msrs();
>>>> @@ -3839,6 +3861,8 @@ static u32 vmx_secondary_exec_control(struct
>>> vcpu_vmx *vmx)
>>>>  		exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
>>>>  	if (!enable_apicv_reg)
>>>>  		exec_control &= ~SECONDARY_EXEC_APIC_REGISTER_VIRT;
>>>> +	if (!enable_apicv_vid)
>>>> +		exec_control &= ~SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
>>>>  	return exec_control;
>>>>  }
>>>> @@ -3883,6 +3907,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx
> *vmx)
>>>>  				vmx_secondary_exec_control(vmx));
>>>>  	}
>>>> +	if (enable_apicv_vid) {
>>>> +		vmcs_write64(EOI_EXIT_BITMAP0, 0);
>>>> +		vmcs_write64(EOI_EXIT_BITMAP1, 0);
>>>> +		vmcs_write64(EOI_EXIT_BITMAP2, 0);
>>>> +		vmcs_write64(EOI_EXIT_BITMAP3, 0);
>>>> +
>>>> +		vmcs_write16(GUEST_INTR_STATUS, 0);
>>>> +	}
>>>> +
>>>>  	if (ple_gap) {
>>>>  		vmcs_write32(PLE_GAP, ple_gap);
>>>>  		vmcs_write32(PLE_WINDOW, ple_window);
>>>> @@ -4806,6 +4839,16 @@ static int handle_apic_access(struct kvm_vcpu
>>> *vcpu)
>>>>  	return emulate_instruction(vcpu, 0) == EMULATE_DONE;
>>>>  }
>>>> +static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
>>>> +	int vector = exit_qualification & 0xff;
>>>> +
>>>> +	/* EOI-induced VM exit is trap-like and thus no need to adjust IP */
>>>> +	kvm_apic_set_eoi_accelerated(vcpu, vector);
>>>> +	return 1;
>>>> +}
>>>> +
>>>>  static int handle_apic_write(struct kvm_vcpu *vcpu)
>>>>  {
>>>>  	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
>>>> @@ -5755,6 +5798,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct
>>> kvm_vcpu *vcpu) = {
>>>>  	[EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
>>>>  	[EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
>>>>  	[EXIT_REASON_APIC_WRITE]              = handle_apic_write,
>>>>  +	[EXIT_REASON_EOI_INDUCED]             = handle_apic_eoi_induced,
>>>>  	[EXIT_REASON_WBINVD]                  = handle_wbinvd,
>>>>  	[EXIT_REASON_XSETBV]                  = handle_xsetbv,
>>>>  	[EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
>>>> @@ -6096,6 +6140,11 @@ static int vmx_handle_exit(struct kvm_vcpu
>>>> *vcpu)
>>>> 
>>>>  static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
>>>>  {
>>>> +	/* no need for tpr_threshold update if APIC virtual
>>>> +	 * interrupt delivery is enabled */
>>>> +	if (!enable_apicv_vid)
>>>> +		return ;
>>>> +
>>> Since you (will) set ->update_cr8_intercept callback to NULL if vid
>>> is enabled this function will never be called with !enable_apicv_vid,
>>> so this check can be dropped.
>> Ok.
>> 
>>>>  	if (irr == -1 || tpr < irr) {
>>>>  		vmcs_write32(TPR_THRESHOLD, 0);
>>>>  		return;
>>>> @@ -6104,6 +6153,90 @@ static void update_cr8_intercept(struct
> kvm_vcpu
>>> *vcpu, int tpr, int irr)
>>>>  	vmcs_write32(TPR_THRESHOLD, irr);
>>>>  }
>>>> +static int vmx_has_virtual_interrupt_delivery(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +	return irqchip_in_kernel(vcpu->kvm) && enable_apicv_vid;
>>>> +}
>>>> +
>>>> +static void vmx_update_eoi_exitmap(struct vcpu_vmx *vmx, int index)
>>>> +{
>>>> +	int tmr;
>>>> +	tmr = kvm_apic_get_reg(vmx->vcpu.arch.apic,
>>>> +			APIC_TMR + 0x10 * index);
>>>> +	vmcs_write32(EOI_EXIT_BITMAP0 + index,
>>>> +			vmx->eoi_exit_bitmap[index] | tmr);
>>>> +}
>>>> +
>>>> +static void vmx_update_rvi(int vector)
>>>> +{
>>>> +	u16 status;
>>>> +	u8 old;
>>>> +
>>>> +	status = vmcs_read16(GUEST_INTR_STATUS);
>>>> +	old = (u8)status & 0xff;
>>>> +	if ((u8)vector != old) {
>>>> +		status &= ~0xff;
>>>> +		status |= (u8)vector;
>>>> +		vmcs_write16(GUEST_INTR_STATUS, status);
>>>> +	}
>>>> +}
>>>> +
>>>> +static void vmx_update_irq(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +	int vector;
>>>> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
>>>> +
>>>> +	if (!enable_apicv_vid)
>>>> +		return ;
>>>> +
>>>> +	vector = kvm_apic_get_highest_irr(vcpu);
>>>> +	if (vector == -1)
>>>> +		return;
>>>> +
>>>> +	vmx_update_rvi(vector);
>>>> +
>>>> +	if (vmx->eoi_exitmap_changed) {
>>>> +		int index;
>>>> +		for_each_set_bit(index,
>>>> +				(unsigned long *)(&vmx->eoi_exitmap_changed), 8)
>>>> +			vmx_update_eoi_exitmap(vmx, index);
>>>> +		vmx->eoi_exitmap_changed = 0;
>>>> +	}
>>>> +}
>>>> +
>>>> +static void vmx_set_eoi_exitmap(struct kvm_vcpu *vcpu,
>>>> +				int vector, int trig_mode,
>>>> +				int always_set)
>>>> +{
>>>> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
>>>> +	int index, offset, changed;
>>>> +	struct kvm_lapic *apic;
>>>> +
>>>> +	if (!enable_apicv_vid)
>>>> +		return ;
>>>> +
>>>> +	if (WARN_ONCE((vector < 0) || (vector > 255),
>>>> +		"KVM VMX: vector (%d) out of range\n", vector))
>>>> +		return;
>>>> +
>>>> +	apic = vcpu->arch.apic;
>>>> +	index = vector >> 5;
>>>> +	offset = vector & 31;
>>>> +
>>>> +	if (always_set)
>>>> +		changed = !test_and_set_bit(offset,
>>>> +				(unsigned long *)&vmx->eoi_exit_bitmap);
>>>> +	else if (trig_mode)
>>>> +		changed = !test_bit(offset,
>>>> +				apic->regs + APIC_TMR + index * 0x10);
>>>> +	else
>>>> +		changed = test_bit(offset,
>>>> +				apic->regs + APIC_TMR + index * 0x10);
>>>> +
>>>> +	if (changed)
>>>> +		vmx->eoi_exitmap_changed |= 1 << index;
>>>> +}
>>>> +
>>>>  static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) { 	u32
>>>>  exit_intr_info; @@ -7364,6 +7497,9 @@ static struct kvm_x86_ops
>>>>  vmx_x86_ops = { 	.enable_nmi_window = enable_nmi_window,
>>>>  	.enable_irq_window = enable_irq_window, 	.update_cr8_intercept =
>>>>  update_cr8_intercept,
>>>> +	.has_virtual_interrupt_delivery = vmx_has_virtual_interrupt_delivery,
>>>> +	.update_irq = vmx_update_irq,
>>>> +	.set_eoi_exitmap = vmx_set_eoi_exitmap,
>>>> 
>>>>  	.set_tss_addr = vmx_set_tss_addr,
>>>>  	.get_tdp_level = get_ept_level,
>>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index
>>>> b0b8abe..02fe194 100644 --- a/arch/x86/kvm/x86.c +++
>>>> b/arch/x86/kvm/x86.c @@ -164,6 +164,14 @@ static int
>>>> emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
>>>> 
>>>>  static int kvm_vcpu_reset(struct kvm_vcpu *vcpu);
>>>> +static inline bool kvm_apic_vid_enabled(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +	if (kvm_x86_ops->has_virtual_interrupt_delivery)
>>> This callback is never NULL.
>> Ok.
>> 
>>>> +		return kvm_x86_ops->has_virtual_interrupt_delivery(vcpu);
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>>  static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
>>>>  {
>>>>  	int i;
>>>> @@ -5533,12 +5541,20 @@ static void inject_pending_event(struct
> kvm_vcpu
>>> *vcpu)
>>>>  			vcpu->arch.nmi_injected = true;
>>>>  			kvm_x86_ops->set_nmi(vcpu);
>>>>  		}
>>>> -	} else if (kvm_cpu_has_interrupt(vcpu)) {
>>>> -		if (kvm_x86_ops->interrupt_allowed(vcpu)) {
>>>> -			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
>>>> -					    false);
>>>> +	} else if (kvm_cpu_has_interrupt(vcpu) &&
>>>> +			kvm_x86_ops->interrupt_allowed(vcpu)) {
>>>> +		int vector = -1;
>>>> +
>>>> +		if (kvm_apic_vid_enabled(vcpu))
>>>> +			vector = kvm_cpu_get_extint(vcpu);
>>>> +		else
>>>> +			vector = kvm_cpu_get_interrupt(vcpu);
>>>> +
>>>> +		if (vector != -1) {
>>>> +			kvm_queue_interrupt(vcpu, vector, false);
>>>>  			kvm_x86_ops->set_irq(vcpu);
>>>>  		}
>>> If vid is enabled kvm_cpu_has_interrupt() should return true only if there
>>> is extint interrupt. Similarly kvm_cpu_get_interrupt() will only return
>>> extint if vid is enabled. This basically moves kvm_apic_vid_enabled()
>>> logic deeper into kvm_cpu_(has|get)_interrupt() functions instead
>>> of changing interrupt injection logic here and in vcpu_enter_guest()
>>> bellow. We still need kvm_cpu_has_interrupt() variant that always checks
>>> both extint and apic for use in kvm_arch_vcpu_runnable() though.
>> As you mentioned, we still need to checks both extint and apic interrupt in
> some case. So how to do this? Introduce another argument to indicate
> whether check both? Yes, we need to check both in
> kvm_arch_vcpu_runnable(). Another argument is good option. We can have
> two functions: kvm_cpu_has_injectable_interrupt() for use in irq
> injection path and kvm_cpu_has_interrupt() for use in
> kvm_arch_vcpu_runnable(). They will call common one with additional
> argument to avoid code duplication.
Ok. will follow this way.

>> 
>>>> +
>>>>  	}
>>>>  }
>>>> @@ -5657,12 +5673,20 @@ static int vcpu_enter_guest(struct kvm_vcpu
>>> *vcpu)
>>>>  	}
>>>>  
>>>>  	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
>>>> +		/* update archtecture specific hints for APIC
>>>> +		 * virtual interrupt delivery */
>>>> +		if (kvm_x86_ops->update_irq)
>>>> +			kvm_x86_ops->update_irq(vcpu);
>>>> +
>>> 
>>> I do not see why this have to be here instead of inside if
>>> (kvm_lapic_enabled(vcpu)){} near update_cr8_intercept() a couple of
>>> lines bellow. If you move it there you can drop apic enable check in
>>> kvm_apic_get_highest_irr().
>> Yes, it seems ok to move it.
>> 
>>>>  		inject_pending_event(vcpu);
>>>>  
>>>>  		/* enable NMI/IRQ window open exits if needed */
>>>>  		if (vcpu->arch.nmi_pending)
>>>>  			kvm_x86_ops->enable_nmi_window(vcpu);
>>>> -		else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
>>>> +		else if (kvm_apic_vid_enabled(vcpu)) {
>>>> +			if (kvm_cpu_has_extint(vcpu))
>>>> +				kvm_x86_ops->enable_irq_window(vcpu);
>>>> +		} else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
>>>>  			kvm_x86_ops->enable_irq_window(vcpu);
>>>>  
>>>>  		if (kvm_lapic_enabled(vcpu)) {
>>>> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
>>>> index 166c450..898aa62 100644
>>>> --- a/virt/kvm/ioapic.c
>>>> +++ b/virt/kvm/ioapic.c
>>>> @@ -186,6 +186,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int
>>> irq)
>>>>  		/* need to read apic_id from apic regiest since 		 * it can be
>>>>  rewritten */ 		irqe.dest_id = ioapic->kvm->bsp_vcpu_id;
>>>>  +		kvm_set_eoi_exitmap(ioapic->kvm->vcpus[0], irqe.vector, 1, 1); 	}
>>>>  #endif 	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe);
>>>> --
>>>> 1.7.1
>>> 
>>> --
>>> 			Gleb.
>> 
>> 
>> Best regards,
>> Yang
>> 
>> --
>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>> the body of a message to majordomo@xxxxxxxxxxxxxxx
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> --
> 			Gleb.


Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html