On Mon, Dec 03, 2012 at 03:01:02PM +0800, Yang Zhang wrote: > - APIC read doesn't cause VM-Exit > - APIC write becomes trap-like > > Signed-off-by: Yang Zhang <yang.z.zhang@xxxxxxxxx> > Signed-off-by: Kevin Tian <kevin.tian@xxxxxxxxx> > --- > arch/x86/include/asm/vmx.h | 2 ++ > arch/x86/kvm/lapic.c | 16 ++++++++++++++++ > arch/x86/kvm/lapic.h | 2 ++ > arch/x86/kvm/vmx.c | 32 +++++++++++++++++++++++++++++++- > 4 files changed, 51 insertions(+), 1 deletions(-) > > diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h > index 36ec21c..21101b6 100644 > --- a/arch/x86/include/asm/vmx.h > +++ b/arch/x86/include/asm/vmx.h > @@ -66,6 +66,7 @@ > #define EXIT_REASON_EPT_MISCONFIG 49 > #define EXIT_REASON_WBINVD 54 > #define EXIT_REASON_XSETBV 55 > +#define EXIT_REASON_APIC_WRITE 56 > #define EXIT_REASON_INVPCID 58 > > #define VMX_EXIT_REASONS \ > @@ -141,6 +142,7 @@ > #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 > #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 > #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 > +#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 > #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 > #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 > > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index 9392f52..7c96012 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -1212,6 +1212,22 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) > } > EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); > > +/* emulate APIC access in a trap manner */ > +int kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) > +{ > + u32 val = 0; > + > + /* hw has done the conditional check and inst decode */ > + offset &= 0xff0; > + if ((offset != APIC_EOI) && > + apic_reg_read(vcpu->arch.apic, offset, 4, &val)) > + return 1; What is apic_reg_read doing? None of the checks it performs that can result in return value of 1 are necessary for APIC-write VM-exit AFAICS. > @@ -83,6 +83,9 @@ module_param(vmm_exclusive, bool, S_IRUGO); > static bool __read_mostly fasteoi = 1; > module_param(fasteoi, bool, S_IRUGO); > > +static bool __read_mostly enable_apicv_reg; > +module_param(enable_apicv_reg, bool, S_IRUGO); > + Are the different combinations of register virtualization / virtual interrupt delivery actually supported? Why would it be useful to enable register virtualization / virtual interrupt delivery separately? > /* > * If nested=1, nested virtualization is supported, i.e., guests may use > * VMX and be a hypervisor for its own guests. If nested=0, guests may not > @@ -761,6 +764,12 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void) > SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; > } > > +static inline bool cpu_has_vmx_apic_register_virt(void) > +{ > + return vmcs_config.cpu_based_2nd_exec_ctrl & > + SECONDARY_EXEC_APIC_REGISTER_VIRT; > +} > + > static inline bool cpu_has_vmx_flexpriority(void) > { > return cpu_has_vmx_tpr_shadow() && > @@ -2498,7 +2507,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) > SECONDARY_EXEC_UNRESTRICTED_GUEST | > SECONDARY_EXEC_PAUSE_LOOP_EXITING | > SECONDARY_EXEC_RDTSCP | > - SECONDARY_EXEC_ENABLE_INVPCID; > + SECONDARY_EXEC_ENABLE_INVPCID | > + SECONDARY_EXEC_APIC_REGISTER_VIRT; > if (adjust_vmx_controls(min2, opt2, > MSR_IA32_VMX_PROCBASED_CTLS2, > &_cpu_based_2nd_exec_control) < 0) > @@ -2509,6 +2519,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) > SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) > _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; > #endif > + > + if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) > + _cpu_based_2nd_exec_control &= ~( > + SECONDARY_EXEC_APIC_REGISTER_VIRT); > + > if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { > /* CR3 accesses and invlpg don't need to cause VM Exits when EPT > enabled */ > @@ -2706,6 +2721,9 @@ static __init int hardware_setup(void) > if (!cpu_has_vmx_ple()) > ple_gap = 0; > > + if (!cpu_has_vmx_apic_register_virt()) > + enable_apicv_reg = 0; > + > if (nested) > nested_vmx_setup_ctls_msrs(); > > @@ -3819,6 +3837,8 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) > exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; > if (!ple_gap) > exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; > + if (!enable_apicv_reg) > + exec_control &= ~SECONDARY_EXEC_APIC_REGISTER_VIRT; > return exec_control; > } > > @@ -4786,6 +4806,15 @@ static int handle_apic_access(struct kvm_vcpu *vcpu) > return emulate_instruction(vcpu, 0) == EMULATE_DONE; > } > > +static int handle_apic_write(struct kvm_vcpu *vcpu) > +{ > + unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); > + u32 offset = exit_qualification & 0xfff; > + > + /* APIC-write VM exit is trap-like and thus no need to adjust IP */ > + return kvm_apic_write_nodecode(vcpu, offset) == 0; > +} Point of return value == 0? -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html