On Mon, Apr 01, 2013 at 11:32:31AM +0800, Yang Zhang wrote: > From: Yang Zhang <yang.z.zhang@xxxxxxxxx> > > Detect the posted interrupt feature. If it exists, then set it in vmcs_config. > > Signed-off-by: Yang Zhang <yang.z.zhang@xxxxxxxxx> > --- > arch/x86/include/asm/vmx.h | 4 ++ > arch/x86/kvm/vmx.c | 87 ++++++++++++++++++++++++++++++++++---------- > 2 files changed, 71 insertions(+), 20 deletions(-) > > diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h > index fc1c313..6f07f19 100644 > --- a/arch/x86/include/asm/vmx.h > +++ b/arch/x86/include/asm/vmx.h > @@ -71,6 +71,7 @@ > #define PIN_BASED_NMI_EXITING 0x00000008 > #define PIN_BASED_VIRTUAL_NMIS 0x00000020 > #define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 > +#define PIN_BASED_POSTED_INTR 0x00000080 > > #define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 > > @@ -102,6 +103,7 @@ > /* VMCS Encodings */ > enum vmcs_field { > VIRTUAL_PROCESSOR_ID = 0x00000000, > + POSTED_INTR_NV = 0x00000002, > GUEST_ES_SELECTOR = 0x00000800, > GUEST_CS_SELECTOR = 0x00000802, > GUEST_SS_SELECTOR = 0x00000804, > @@ -136,6 +138,8 @@ enum vmcs_field { > VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, > APIC_ACCESS_ADDR = 0x00002014, > APIC_ACCESS_ADDR_HIGH = 0x00002015, > + POSTED_INTR_DESC_ADDR = 0x00002016, > + POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, > EPT_POINTER = 0x0000201a, > EPT_POINTER_HIGH = 0x0000201b, > EOI_EXIT_BITMAP0 = 0x0000201c, > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 7408d93..b2e95bc 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -84,7 +84,8 @@ module_param(vmm_exclusive, bool, S_IRUGO); > static bool __read_mostly fasteoi = 1; > module_param(fasteoi, bool, S_IRUGO); > > -static bool __read_mostly enable_apicv_reg_vid; > +static bool __read_mostly enable_apicv; > +module_param(enable_apicv, bool, S_IRUGO); > > /* > * If nested=1, nested virtualization is supported, i.e., guests may use > @@ -366,6 +367,19 @@ struct nested_vmx { > struct page *apic_access_page; > }; > > +#define POSTED_INTR_ON 0 > +/* Posted-Interrupt Descriptor */ > +struct pi_desc { > + u32 pir[8]; /* Posted interrupt requested */ > + union { > + struct { > + u8 on:1, Do you actually use the 'on' member of the bit field? As far as I can tell the paths always access control with (set|clear)_bit(). And C does not guaranty layout of the bit field, so on may not point to what you think it points to. > + rsvd:7; > + } control; > + u32 rsvd[8]; > + } u; > +} __aligned(64); > + > struct vcpu_vmx { > struct kvm_vcpu vcpu; > unsigned long host_rsp; > @@ -430,6 +444,9 @@ struct vcpu_vmx { > > bool rdtscp_enabled; > > + /* Posted interrupt descriptor */ > + struct pi_desc pi_desc; > + > /* Support for a guest hypervisor (nested VMX) */ > struct nested_vmx nested; > }; > @@ -785,6 +802,18 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void) > SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; > } > > +static inline bool cpu_has_vmx_posted_intr(void) > +{ > + return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR; > +} > + > +static inline bool cpu_has_vmx_apicv(void) > +{ > + return cpu_has_vmx_apic_register_virt() && > + cpu_has_vmx_virtual_intr_delivery() && > + cpu_has_vmx_posted_intr(); > +} > + > static inline bool cpu_has_vmx_flexpriority(void) > { > return cpu_has_vmx_tpr_shadow() && > @@ -2552,12 +2581,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) > u32 _vmexit_control = 0; > u32 _vmentry_control = 0; > > - min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; > - opt = PIN_BASED_VIRTUAL_NMIS; > - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, > - &_pin_based_exec_control) < 0) > - return -EIO; > - > min = CPU_BASED_HLT_EXITING | > #ifdef CONFIG_X86_64 > CPU_BASED_CR8_LOAD_EXITING | > @@ -2634,6 +2657,17 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) > &_vmexit_control) < 0) > return -EIO; > > + min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; > + opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR; > + if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, > + &_pin_based_exec_control) < 0) > + return -EIO; > + > + if (!(_cpu_based_2nd_exec_control & > + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) || > + !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT)) > + _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR; > + > min = 0; > opt = VM_ENTRY_LOAD_IA32_PAT; > if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, > @@ -2812,11 +2846,10 @@ static __init int hardware_setup(void) > if (!cpu_has_vmx_ple()) > ple_gap = 0; > > - if (!cpu_has_vmx_apic_register_virt() || > - !cpu_has_vmx_virtual_intr_delivery()) > - enable_apicv_reg_vid = 0; > + if (!cpu_has_vmx_apicv()) > + enable_apicv = 0; > > - if (enable_apicv_reg_vid) > + if (enable_apicv) > kvm_x86_ops->update_cr8_intercept = NULL; > else > kvm_x86_ops->hwapic_irr_update = NULL; > @@ -3875,6 +3908,11 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr) > msr, MSR_TYPE_W); > } > > +static int vmx_vm_has_apicv(struct kvm *kvm) > +{ > + return enable_apicv && irqchip_in_kernel(kvm); > +} > + > /* > * Set up the vmcs's constant host-state fields, i.e., host-state fields that > * will not change in the lifetime of the guest. > @@ -3935,6 +3973,15 @@ static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) > vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); > } > > +static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) > +{ > + u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; > + > + if (!vmx_vm_has_apicv(vmx->vcpu.kvm)) > + pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; > + return pin_based_exec_ctrl; > +} > + > static u32 vmx_exec_control(struct vcpu_vmx *vmx) > { > u32 exec_control = vmcs_config.cpu_based_exec_ctrl; > @@ -3952,11 +3999,6 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) > return exec_control; > } > > -static int vmx_vm_has_apicv(struct kvm *kvm) > -{ > - return enable_apicv_reg_vid && irqchip_in_kernel(kvm); > -} > - > static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) > { > u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; > @@ -4012,8 +4054,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) > vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ > > /* Control */ > - vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, > - vmcs_config.pin_based_exec_ctrl); > + vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); > > vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); > > @@ -4022,13 +4063,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) > vmx_secondary_exec_control(vmx)); > } > > - if (enable_apicv_reg_vid) { > + if (vmx_vm_has_apicv(vmx->vcpu.kvm)) { > vmcs_write64(EOI_EXIT_BITMAP0, 0); > vmcs_write64(EOI_EXIT_BITMAP1, 0); > vmcs_write64(EOI_EXIT_BITMAP2, 0); > vmcs_write64(EOI_EXIT_BITMAP3, 0); > > vmcs_write16(GUEST_INTR_STATUS, 0); > + > + vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR); > + vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); > } > > if (ple_gap) { > @@ -4170,6 +4214,9 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) > vmcs_write64(APIC_ACCESS_ADDR, > page_to_phys(vmx->vcpu.kvm->arch.apic_access_page)); > > + if (vmx_vm_has_apicv(vcpu->kvm)) > + memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); > + > if (vmx->vpid != 0) > vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); > > @@ -7809,7 +7856,7 @@ static int __init vmx_init(void) > memcpy(vmx_msr_bitmap_longmode_x2apic, > vmx_msr_bitmap_longmode, PAGE_SIZE); > > - if (enable_apicv_reg_vid) { > + if (enable_apicv) { > for (msr = 0x800; msr <= 0x8ff; msr++) > vmx_disable_intercept_msr_read_x2apic(msr); > > -- > 1.7.1 -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html