On Wed, Dec 05, 2012 at 03:17:13AM +0000, Zhang, Yang Z wrote: > Marcelo Tosatti wrote on 2012-12-05: > > On Mon, Dec 03, 2012 at 03:01:02PM +0800, Yang Zhang wrote: > >> - APIC read doesn't cause VM-Exit > >> - APIC write becomes trap-like > >> > >> Signed-off-by: Yang Zhang <yang.z.zhang@xxxxxxxxx> > >> Signed-off-by: Kevin Tian <kevin.tian@xxxxxxxxx> > >> --- > >> arch/x86/include/asm/vmx.h | 2 ++ arch/x86/kvm/lapic.c | 16 > >> ++++++++++++++++ arch/x86/kvm/lapic.h | 2 ++ > >> arch/x86/kvm/vmx.c | 32 +++++++++++++++++++++++++++++++- 4 > >> files changed, 51 insertions(+), 1 deletions(-) > >> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h > >> index 36ec21c..21101b6 100644 > >> --- a/arch/x86/include/asm/vmx.h > >> +++ b/arch/x86/include/asm/vmx.h > >> @@ -66,6 +66,7 @@ > >> #define EXIT_REASON_EPT_MISCONFIG 49 #define EXIT_REASON_WBINVD > >> 54 #define EXIT_REASON_XSETBV 55 +#define > >> EXIT_REASON_APIC_WRITE 56 #define EXIT_REASON_INVPCID > >> 58 > >> > >> #define VMX_EXIT_REASONS \ @@ -141,6 +142,7 @@ #define > >> SECONDARY_EXEC_ENABLE_VPID 0x00000020 #define > >> SECONDARY_EXEC_WBINVD_EXITING 0x00000040 #define > >> SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 +#define > >> SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 #define > >> SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 #define > >> SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 > >> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > >> index 9392f52..7c96012 100644 > >> --- a/arch/x86/kvm/lapic.c > >> +++ b/arch/x86/kvm/lapic.c > >> @@ -1212,6 +1212,22 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) > >> } > >> EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); > >> +/* emulate APIC access in a trap manner */ > >> +int kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) > >> +{ > >> + u32 val = 0; > >> + > >> + /* hw has done the conditional check and inst decode */ > >> + offset &= 0xff0; > >> + if ((offset != APIC_EOI) && > >> + apic_reg_read(vcpu->arch.apic, offset, 4, &val)) > >> + return 1; > > > > What is apic_reg_read doing? None of the checks it performs that can > > result in return value of 1 are necessary for APIC-write VM-exit AFAICS. > Right. We should remove this check. > > > >> @@ -83,6 +83,9 @@ module_param(vmm_exclusive, bool, S_IRUGO); > >> static bool __read_mostly fasteoi = 1; > >> module_param(fasteoi, bool, S_IRUGO); > >> +static bool __read_mostly enable_apicv_reg; > >> +module_param(enable_apicv_reg, bool, S_IRUGO); > >> + > > > > Are the different combinations of register virtualization / virtual > > interrupt delivery actually supported? > Yes. > > > Why would it be useful to enable register virtualization / virtual > > interrupt delivery separately? > It's ok to use one option: enable_apicv. Why would someone (as a user, not developer), would ever want different combinations? Allowing different combinations means they must be supported. Why would someone enable virtual interrupt delivery without apic register virtualization, for example? Perhaps one module option to control apic register virtualization + virtual interrupt delivery, and another for posted-interrupts (which can be useful for debugging). > >> * If nested=1, nested virtualization is supported, i.e., guests may use > >> * VMX and be a hypervisor for its own guests. If nested=0, guests may not > >> @@ -761,6 +764,12 @@ static inline bool > > cpu_has_vmx_virtualize_apic_accesses(void) > >> SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; > >> } > >> +static inline bool cpu_has_vmx_apic_register_virt(void) > >> +{ > >> + return vmcs_config.cpu_based_2nd_exec_ctrl & > >> + SECONDARY_EXEC_APIC_REGISTER_VIRT; > >> +} > >> + > >> static inline bool cpu_has_vmx_flexpriority(void) > >> { > >> return cpu_has_vmx_tpr_shadow() && > >> @@ -2498,7 +2507,8 @@ static __init int setup_vmcs_config(struct > > vmcs_config *vmcs_conf) > >> SECONDARY_EXEC_UNRESTRICTED_GUEST | > >> SECONDARY_EXEC_PAUSE_LOOP_EXITING | > >> SECONDARY_EXEC_RDTSCP | > >> - SECONDARY_EXEC_ENABLE_INVPCID; > >> + SECONDARY_EXEC_ENABLE_INVPCID | > >> + SECONDARY_EXEC_APIC_REGISTER_VIRT; > >> if (adjust_vmx_controls(min2, opt2, > >> MSR_IA32_VMX_PROCBASED_CTLS2, > >> &_cpu_based_2nd_exec_control) < 0) > >> @@ -2509,6 +2519,11 @@ static __init int setup_vmcs_config(struct > > vmcs_config *vmcs_conf) > >> SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) > >> _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; > >> #endif > >> + > >> + if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) > >> + _cpu_based_2nd_exec_control &= ~( > >> + SECONDARY_EXEC_APIC_REGISTER_VIRT); > >> + > >> if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { /* > >> CR3 accesses and invlpg don't need to cause VM Exits when EPT > >> enabled */ @@ -2706,6 +2721,9 @@ static __init int > >> hardware_setup(void) if (!cpu_has_vmx_ple()) ple_gap = 0; > >> + if (!cpu_has_vmx_apic_register_virt()) > >> + enable_apicv_reg = 0; > >> + > >> if (nested) > >> nested_vmx_setup_ctls_msrs(); > >> @@ -3819,6 +3837,8 @@ static u32 vmx_secondary_exec_control(struct > > vcpu_vmx *vmx) > >> exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; > >> if (!ple_gap) > >> exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; > >> + if (!enable_apicv_reg) > >> + exec_control &= ~SECONDARY_EXEC_APIC_REGISTER_VIRT; > >> return exec_control; > >> } > >> @@ -4786,6 +4806,15 @@ static int handle_apic_access(struct kvm_vcpu > > *vcpu) > >> return emulate_instruction(vcpu, 0) == EMULATE_DONE; > >> } > >> +static int handle_apic_write(struct kvm_vcpu *vcpu) > >> +{ > >> + unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); > >> + u32 offset = exit_qualification & 0xfff; > >> + > >> + /* APIC-write VM exit is trap-like and thus no need to adjust IP */ > >> + return kvm_apic_write_nodecode(vcpu, offset) == 0; > >> +} > > > > Point of return value == 0? > if kvm_apic_write_nodecode() handle successfully, it will return zero. Then it will return 1 for this vmexit handle. > What's wrong? If kvm_apic_write_nodecode fails (return 0 for the vmexit handle), there is an exit to userspace. Why is that necessary? -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html