On Mon, Jan 14, 2013 at 11:01:02AM +0000, Zhang, Yang Z wrote: > Gleb Natapov wrote on 2013-01-14: > > On Mon, Jan 14, 2013 at 03:13:34PM +0800, Yang Zhang wrote: > >> From: Yang Zhang <yang.z.zhang@xxxxxxxxx> > >> > >> basically to benefit from apicv, we need to enable virtualized x2apic mode. > >> Currently, we only enable it when guest is really using x2apic. > >> > >> Also, clear MSR bitmap for corresponding x2apic MSRs when guest enabled > > x2apic: > >> 0x800 - 0x8ff: no read intercept for apicv register virtualization, > >> except APIC ID and TMCCT. > >> APIC ID and TMCCT: need software's assistance to get right value. > > Actually since msr bitmap is shared between all vcpus this will break > > guests that do not enable x2apic. > I don't think this case will exist. It will break the real OS too. > Which case? One VM uses x2apic another one does not? Bitmap is shared between all vcpus of all VMs. > >> Signed-off-by: Kevin Tian <kevin.tian@xxxxxxxxx> > >> Signed-off-by: Yang Zhang <yang.z.zhang@xxxxxxxxx> > >> --- > >> arch/x86/include/asm/kvm_host.h | 1 + arch/x86/include/asm/vmx.h > >> | 1 + arch/x86/kvm/lapic.c | 15 +++- > >> arch/x86/kvm/svm.c | 6 ++ arch/x86/kvm/vmx.c > >> | 162 +++++++++++++++++++++++++++++++++++++-- 5 files changed, 173 > >> insertions(+), 12 deletions(-) > >> diff --git a/arch/x86/include/asm/kvm_host.h > >> b/arch/x86/include/asm/kvm_host.h index c431b33..35aa8e6 100644 --- > >> a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h > >> @@ -697,6 +697,7 @@ struct kvm_x86_ops { > >> void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void > >> (*enable_irq_window)(struct kvm_vcpu *vcpu); void > >> (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); > >> + void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); > >> int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int > >> (*get_tdp_level)(void); u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, > >> gfn_t gfn, bool is_mmio); > >> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h > >> index 44c3f7e..0a54df0 100644 > >> --- a/arch/x86/include/asm/vmx.h > >> +++ b/arch/x86/include/asm/vmx.h > >> @@ -139,6 +139,7 @@ > >> #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 #define > >> SECONDARY_EXEC_ENABLE_EPT 0x00000002 #define > >> SECONDARY_EXEC_RDTSCP 0x00000008 +#define > >> SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 #define > >> SECONDARY_EXEC_ENABLE_VPID 0x00000020 #define > >> SECONDARY_EXEC_WBINVD_EXITING 0x00000040 #define > >> SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 > >> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > >> index 0664c13..2ef5e2b 100644 > >> --- a/arch/x86/kvm/lapic.c > >> +++ b/arch/x86/kvm/lapic.c > >> @@ -1323,12 +1323,17 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, > > u64 value) > >> if (!kvm_vcpu_is_bsp(apic->vcpu)) > >> value &= ~MSR_IA32_APICBASE_BSP; > >> - vcpu->arch.apic_base = value; > >> - if (apic_x2apic_mode(apic)) { > >> - u32 id = kvm_apic_id(apic); > >> - u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); > >> - kvm_apic_set_ldr(apic, ldr); > >> + if ((vcpu->arch.apic_base ^ value) & X2APIC_ENABLE) { > >> + if (value & X2APIC_ENABLE) { > >> + u32 id = kvm_apic_id(apic); > >> + u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); > >> + kvm_apic_set_ldr(apic, ldr); > >> + kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true); > >> + } else > >> + kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false); > >> } > >> + > >> + vcpu->arch.apic_base = value; > >> apic->base_address = apic->vcpu->arch.apic_base & > >> MSR_IA32_APICBASE_BASE; > >> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > >> index d29d3cd..38407e9 100644 > >> --- a/arch/x86/kvm/svm.c > >> +++ b/arch/x86/kvm/svm.c > >> @@ -3571,6 +3571,11 @@ static void update_cr8_intercept(struct kvm_vcpu > > *vcpu, int tpr, int irr) > >> set_cr_intercept(svm, INTERCEPT_CR8_WRITE); > >> } > >> +static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) > >> +{ > >> + return; > >> +} > >> + > >> static int svm_nmi_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm > >> *svm = to_svm(vcpu); @@ -4290,6 +4295,7 @@ static struct kvm_x86_ops > >> svm_x86_ops = { .enable_nmi_window = enable_nmi_window, > >> .enable_irq_window = enable_irq_window, .update_cr8_intercept = > >> update_cr8_intercept, > >> + .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, > >> > >> .set_tss_addr = svm_set_tss_addr, > >> .get_tdp_level = get_npt_level, > >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > >> index 0403634..847022e 100644 > >> --- a/arch/x86/kvm/vmx.c > >> +++ b/arch/x86/kvm/vmx.c > >> @@ -767,6 +767,12 @@ static inline bool > > cpu_has_vmx_virtualize_apic_accesses(void) > >> SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; > >> } > >> +static inline bool cpu_has_vmx_virtualize_x2apic_mode(void) > >> +{ > >> + return vmcs_config.cpu_based_2nd_exec_ctrl & > >> + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; > >> +} > >> + > >> static inline bool cpu_has_vmx_apic_register_virt(void) > >> { > >> return vmcs_config.cpu_based_2nd_exec_ctrl & > >> @@ -2543,6 +2549,7 @@ static __init int setup_vmcs_config(struct > > vmcs_config *vmcs_conf) > >> if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) > >> { min2 = 0; opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | > >> + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | > >> SECONDARY_EXEC_WBINVD_EXITING | SECONDARY_EXEC_ENABLE_VPID | > >> SECONDARY_EXEC_ENABLE_EPT | @@ -3724,7 +3731,45 @@ static void > >> free_vpid(struct vcpu_vmx *vmx) spin_unlock(&vmx_vpid_lock); } > >> -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, > >> u32 msr) +#define MSR_TYPE_R 1 +#define MSR_TYPE_W 2 +static void > >> __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 > >> msr, int type) +{ + int f = sizeof(unsigned long); + + if > >> (!cpu_has_vmx_msr_bitmap()) + return; + + /* + * See Intel PRM Vol. > >> 3, 20.6.9 (MSR-Bitmap Address). Early manuals + * have the write-low > >> and read-high bitmap offsets the wrong way round. + * We can control > >> MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. + */ + if (msr > >> <= 0x1fff) { + if (type & MSR_TYPE_R) + /* read-low */ > >> + __clear_bit(msr, msr_bitmap + 0x000 / f); + + if (type & > >> MSR_TYPE_W) + /* write-low */ + __clear_bit(msr, msr_bitmap + 0x800 > >> / f); + + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { > >> + msr &= 0x1fff; + if (type & MSR_TYPE_R) + /* read-high */ > >> + __clear_bit(msr, msr_bitmap + 0x400 / f); + + if (type & > >> MSR_TYPE_W) + /* write-high */ + __clear_bit(msr, msr_bitmap + > >> 0xc00 / f); + + } +} + +static void > >> __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, + u32 > >> msr, int type) > >> { > >> int f = sizeof(unsigned long); > >> @@ -3737,20 +3782,75 @@ static void > > __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) > >> * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. > >> */ > >> if (msr <= 0x1fff) { > >> - __clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */ > >> - __clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */ > >> + if (type & MSR_TYPE_R) > >> + /* read-low */ > >> + __set_bit(msr, msr_bitmap + 0x000 / f); > >> + > >> + if (type & MSR_TYPE_W) > >> + /* write-low */ > >> + __set_bit(msr, msr_bitmap + 0x800 / f); > >> + > >> } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { > >> msr &= 0x1fff; > >> - __clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */ > >> - __clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */ > >> + if (type & MSR_TYPE_R) > >> + /* read-high */ > >> + __set_bit(msr, msr_bitmap + 0x400 / f); > >> + > >> + if (type & MSR_TYPE_W) > >> + /* write-high */ > >> + __set_bit(msr, msr_bitmap + 0xc00 / f); > >> + > >> } > >> } > >> + > >> static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) > >> { > >> if (!longmode_only) > >> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr); > >> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr); > >> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, > >> + msr, MSR_TYPE_R | MSR_TYPE_W); > >> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, > >> + msr, MSR_TYPE_R | MSR_TYPE_W); > >> +} > >> + > >> +static void vmx_intercept_for_msr_read(u32 msr, bool longmode_only, > >> + bool set) > >> +{ > >> + if (!longmode_only) { > >> + if (set) > >> + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy, > >> + msr, MSR_TYPE_R); > >> + else > >> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, > >> + msr, MSR_TYPE_R); > >> + > >> + } > >> + if (set) > >> + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode, > >> + msr, MSR_TYPE_R); > >> + else > >> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, > >> + msr, MSR_TYPE_R); > >> +} > >> + > >> +static void vmx_intercept_for_msr_write(u32 msr, bool longmode_only, > >> + bool set) > >> +{ > >> + if (!longmode_only) { > >> + if (set) > >> + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy, > >> + msr, MSR_TYPE_W); > >> + else > >> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, > >> + msr, MSR_TYPE_W); > >> + > >> + } > >> + if (set) > >> + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode, > >> + msr, MSR_TYPE_W); > >> + else > >> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, > >> + msr, MSR_TYPE_W); > >> } > >> > >> /* > >> @@ -3848,6 +3948,7 @@ static u32 vmx_secondary_exec_control(struct > > vcpu_vmx *vmx) > >> exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; if > >> (!enable_apicv_reg) exec_control &= > >> ~SECONDARY_EXEC_APIC_REGISTER_VIRT; + exec_control &= > >> ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; return exec_control; } > >> @@ -6103,6 +6204,52 @@ static void update_cr8_intercept(struct kvm_vcpu > > *vcpu, int tpr, int irr) > >> vmcs_write32(TPR_THRESHOLD, irr); > >> } > >> +static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool > >> set) +{ + u32 exec_control, sec_exec_control; + int msr; + struct > >> vcpu_vmx *vmx = to_vmx(vcpu); + + /* There is not point to enable > >> virtualize x2apic without enable + * apicv*/ + if > >> (!cpu_has_vmx_virtualize_x2apic_mode() || !enable_apicv_reg) + return; > >> + + if (set) { + exec_control = > >> vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + /* virtualize x2apic mode > >> relies on tpr shadow */ + if (!(exec_control & CPU_BASED_TPR_SHADOW)) > >> + return; + } + + sec_exec_control = > >> vmcs_read32(SECONDARY_VM_EXEC_CONTROL); + + if (set) { > >> + sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; > >> + sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; + } else > >> { + sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; + if > >> (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) + sec_exec_control > >> |= + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + } > >> + vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); + + for > >> (msr = 0x800; msr <= 0x8ff; msr++) + vmx_intercept_for_msr_read(msr, > >> false, !set); + + if (set) { + /* According SDM, in x2apic mode, the > >> whole id reg is used. + * But in KVM, it only use the highest eight > >> bits. Need to + * intercept it*/ + vmx_intercept_for_msr_read(0x802, > >> false, true); + /* TMCCT */ + vmx_intercept_for_msr_read(0x839, > >> false, true); + } + /* TPR */ + vmx_intercept_for_msr_write(0x808, > >> false, !set); +} + > >> static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) { u32 > >> exit_intr_info; @@ -7366,6 +7513,7 @@ static struct kvm_x86_ops > >> vmx_x86_ops = { .enable_nmi_window = enable_nmi_window, > >> .enable_irq_window = enable_irq_window, .update_cr8_intercept = > >> update_cr8_intercept, > >> + .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, > >> > >> .set_tss_addr = vmx_set_tss_addr, > >> .get_tdp_level = get_ept_level, > >> -- > >> 1.7.1 > > > > -- > > Gleb. > > > Best regards, > Yang -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html