On Fri, Jan 11, 2013 at 02:37:15AM +0000, Zhang, Yang Z wrote: > Gleb Natapov wrote on 2013-01-10: > > On Thu, Jan 10, 2013 at 08:32:06AM +0000, Zhang, Yang Z wrote: > >> Gleb Natapov wrote on 2013-01-10: > >>> On Thu, Jan 10, 2013 at 03:26:07PM +0800, Yang Zhang wrote: > >>>> From: Yang Zhang <yang.z.zhang@xxxxxxxxx> > >>>> > >>>> basically to benefit from apicv, we need to enable virtualized x2apic mode. > >>>> Currently, we only enable it when guest is really using x2apic. > >>>> > >>>> Also, clear MSR bitmap for corresponding x2apic MSRs when guest enabled > >>> x2apic: > >>>> 0x800 - 0x8ff: no read intercept for apicv register virtualization, > >>>> except APIC ID and TMCCT. > >>>> APIC ID and TMCCT: need software's assistance to get right value. > >>>> TPR,EOI,SELF-IPI: no write intercept for virtual interrupt delivery. > >>>> Signed-off-by: Kevin Tian <kevin.tian@xxxxxxxxx> > >>>> Signed-off-by: Yang Zhang <yang.z.zhang@xxxxxxxxx> > >>>> --- > >>>> arch/x86/include/asm/kvm_host.h | 2 + arch/x86/include/asm/vmx.h > >>>> | 1 + arch/x86/kvm/lapic.c | 5 +- > >>>> arch/x86/kvm/svm.c | 6 + arch/x86/kvm/vmx.c | 194 > >>>> +++++++++++++++++++++++++++++++++++++-- 5 files > > changed, 200 > >>>> insertions(+), 8 deletions(-) > >>>> diff --git a/arch/x86/include/asm/kvm_host.h > >>>> b/arch/x86/include/asm/kvm_host.h index c431b33..572a562 100644 --- > >>>> a/arch/x86/include/asm/kvm_host.h +++ > >>>> b/arch/x86/include/asm/kvm_host.h @@ -697,6 +697,8 @@ struct > >>>> kvm_x86_ops { > >>>> void (*enable_nmi_window)(struct kvm_vcpu *vcpu); > >>>> void (*enable_irq_window)(struct kvm_vcpu *vcpu); > >>>> void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); > >>>> + void (*enable_virtual_x2apic_mode)(struct kvm_vcpu *vcpu); > >>>> + void (*disable_virtual_x2apic_mode)(struct kvm_vcpu *vcpu); > >>> Make one callback with enable/disable parameter. And do not forget SVM. > >>> > >>>> int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); > >>>> int (*get_tdp_level)(void); > >>>> u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool > > is_mmio); > >>>> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h > >>>> index 44c3f7e..0a54df0 100644 > >>>> --- a/arch/x86/include/asm/vmx.h > >>>> +++ b/arch/x86/include/asm/vmx.h > >>>> @@ -139,6 +139,7 @@ > >>>> #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 #define > >>>> SECONDARY_EXEC_ENABLE_EPT 0x00000002 #define > >>>> SECONDARY_EXEC_RDTSCP 0x00000008 +#define > >>>> SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 #define > >>>> SECONDARY_EXEC_ENABLE_VPID 0x00000020 #define > >>>> SECONDARY_EXEC_WBINVD_EXITING 0x00000040 #define > >>>> SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 > >>>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > >>>> index 0664c13..ec38906 100644 > >>>> --- a/arch/x86/kvm/lapic.c > >>>> +++ b/arch/x86/kvm/lapic.c > >>>> @@ -1328,7 +1328,10 @@ void kvm_lapic_set_base(struct kvm_vcpu > > *vcpu, > >>> u64 value) > >>>> u32 id = kvm_apic_id(apic); > >>>> u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); > >>>> kvm_apic_set_ldr(apic, ldr); > >>>> - } > >>>> + kvm_x86_ops->enable_virtual_x2apic_mode(vcpu); > >>>> + } else > >>>> + kvm_x86_ops->disable_virtual_x2apic_mode(vcpu); > >>>> + > >>> You just broke SVM. > >>>> apic->base_address = apic->vcpu->arch.apic_base & > >>>> MSR_IA32_APICBASE_BASE; > >>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > >>>> index d29d3cd..0b82cb1 100644 > >>>> --- a/arch/x86/kvm/svm.c > >>>> +++ b/arch/x86/kvm/svm.c > >>>> @@ -3571,6 +3571,11 @@ static void update_cr8_intercept(struct > > kvm_vcpu > >>> *vcpu, int tpr, int irr) > >>>> set_cr_intercept(svm, INTERCEPT_CR8_WRITE); > >>>> } > >>>> +static void svm_enable_virtual_x2apic_mode(struct kvm_vcpu *vcpu) > >>>> +{ > >>>> + return; > >>>> +} > >>>> + > >>>> static int svm_nmi_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm > >>>> *svm = to_svm(vcpu); @@ -4290,6 +4295,7 @@ static struct kvm_x86_ops > >>>> svm_x86_ops = { .enable_nmi_window = enable_nmi_window, > >>>> .enable_irq_window = enable_irq_window, .update_cr8_intercept = > >>>> update_cr8_intercept, > >>>> + .enable_virtual_x2apic_mode = svm_enable_virtual_x2apic_mode, > >>>> > >>>> .set_tss_addr = svm_set_tss_addr, > >>>> .get_tdp_level = get_npt_level, > >>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > >>>> index 688f43f..b203ce7 100644 > >>>> --- a/arch/x86/kvm/vmx.c > >>>> +++ b/arch/x86/kvm/vmx.c > >>>> @@ -433,6 +433,8 @@ struct vcpu_vmx { > >>>> > >>>> bool rdtscp_enabled; > >>>> + bool virtual_x2apic_enabled; > >>>> + > >>>> /* Support for a guest hypervisor (nested VMX) */ > >>>> struct nested_vmx nested; > >>>> }; > >>>> @@ -767,12 +769,23 @@ static inline bool > >>> cpu_has_vmx_virtualize_apic_accesses(void) > >>>> SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; > >>>> } > >>>> +static inline bool cpu_has_vmx_virtualize_x2apic_mode(void) > >>>> +{ > >>>> + return vmcs_config.cpu_based_2nd_exec_ctrl & > >>>> + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; > >>>> +} > >>>> + > >>>> static inline bool cpu_has_vmx_apic_register_virt(void) > >>>> { > >>>> return vmcs_config.cpu_based_2nd_exec_ctrl & > >>>> SECONDARY_EXEC_APIC_REGISTER_VIRT; > >>>> } > >>>> +static inline bool cpu_has_vmx_virtual_intr_delivery(void) > >>>> +{ > >>>> + return false; > >>>> +} > >>>> + > >>>> static inline bool cpu_has_vmx_flexpriority(void) > >>>> { > >>>> return cpu_has_vmx_tpr_shadow() && > >>>> @@ -2544,6 +2557,7 @@ static __init int setup_vmcs_config(struct > >>> vmcs_config *vmcs_conf) > >>>> if (_cpu_based_exec_control & > >>>> CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { min2 = 0; opt2 = > >>>> SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | > >>>> + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | > >>>> SECONDARY_EXEC_WBINVD_EXITING | SECONDARY_EXEC_ENABLE_VPID | > >>>> SECONDARY_EXEC_ENABLE_EPT | @@ -3731,7 +3745,45 @@ static void > >>>> free_vpid(struct vcpu_vmx *vmx) spin_unlock(&vmx_vpid_lock); } > >>>> -static void __vmx_disable_intercept_for_msr(unsigned long > >>>> *msr_bitmap, u32 msr) +#define MSR_TYPE_R 1 +#define MSR_TYPE_W 2 > >>>> +static void __vmx_disable_intercept_for_msr(unsigned long > >>>> *msr_bitmap, + u32 msr, int type) +{ + int f = sizeof(unsigned > >>>> long); + + if (!cpu_has_vmx_msr_bitmap()) + return; + + /* + * See > >>>> Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals + * > >>>> have the write-low and read-high bitmap offsets the wrong way round. > >>>> + * We can control MSRs 0x00000000-0x00001fff and > >>>> 0xc0000000-0xc0001fff. + */ + if (msr <= 0x1fff) { + if (type & > >>>> MSR_TYPE_R) + /* read-low */ + __clear_bit(msr, msr_bitmap + > >>>> 0x000 / f); + + if (type & MSR_TYPE_W) + /* write-low */ + > >>>> __clear_bit(msr, msr_bitmap + 0x800 / f); + + } else if ((msr >= > >>>> 0xc0000000) && (msr <= 0xc0001fff)) { + msr &= 0x1fff; + if (type & > >>>> MSR_TYPE_R) + /* read-high */ + __clear_bit(msr, msr_bitmap + > >>>> 0x400 / f); + + if (type & MSR_TYPE_W) + /* write-high */ + > >>>> __clear_bit(msr, msr_bitmap + 0xc00 / f); + + } +} + +static void > >>>> __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, + u32 > >>>> msr, int type) > >>>> { > >>>> int f = sizeof(unsigned long); > >>>> @@ -3744,20 +3796,75 @@ static void > >>> __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) > >>>> * We can control MSRs 0x00000000-0x00001fff and > >>>> 0xc0000000-0xc0001fff. */ if (msr <= 0x1fff) { > >>>> - __clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */ > >>>> - __clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */ > >>>> + if (type & MSR_TYPE_R) > >>>> + /* read-low */ > >>>> + __set_bit(msr, msr_bitmap + 0x000 / f); > >>>> + > >>>> + if (type & MSR_TYPE_W) > >>>> + /* write-low */ > >>>> + __set_bit(msr, msr_bitmap + 0x800 / f); > >>>> + > >>>> } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { > >>>> msr &= 0x1fff; > >>>> - __clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */ > >>>> - __clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */ > >>>> + if (type & MSR_TYPE_R) > >>>> + /* read-high */ > >>>> + __set_bit(msr, msr_bitmap + 0x400 / f); > >>>> + > >>>> + if (type & MSR_TYPE_W) > >>>> + /* write-high */ > >>>> + __set_bit(msr, msr_bitmap + 0xc00 / f); > >>>> + > >>>> } } + static void vmx_disable_intercept_for_msr(u32 msr, bool > >>>> longmode_only) { if (!longmode_only) > >>>> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr); > >>>> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr); > >>>> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, + msr, > >>>> MSR_TYPE_R | MSR_TYPE_W); > >>>> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, > >>>> + msr, MSR_TYPE_R | MSR_TYPE_W); +} + +static void > >>>> vmx_intercept_for_msr_read(u32 msr, bool longmode_only, + bool > >>>> set) +{ + if (!longmode_only) { + if (set) + > >>>> __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy, + msr, > >>>> MSR_TYPE_R); + else + > >>>> __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, + msr, > >>>> MSR_TYPE_R); + + } + if (set) > >>>> + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode, + msr, > >>>> MSR_TYPE_R); + else > >>>> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, + msr, > >>>> MSR_TYPE_R); +} + +static void vmx_intercept_for_msr_write(u32 msr, > >>>> bool longmode_only, + bool set) +{ + if (!longmode_only) { + if > >>>> (set) + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy, > >>>> + msr, MSR_TYPE_W); + else + > >>>> __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, + msr, > >>>> MSR_TYPE_W); + + } + if (set) > >>>> + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode, + msr, > >>>> MSR_TYPE_W); + else > >>>> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, + msr, > >>>> MSR_TYPE_W); > >>>> } > >>>> > >>>> /* > >>>> @@ -3855,6 +3962,7 @@ static u32 vmx_secondary_exec_control(struct > >>> vcpu_vmx *vmx) > >>>> exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; if > >>>> (!enable_apicv_reg_vid) exec_control &= > >>>> ~SECONDARY_EXEC_APIC_REGISTER_VIRT; + exec_control &= > >>>> ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; return > > exec_control; } > >>>> @@ -6110,6 +6218,76 @@ static void update_cr8_intercept(struct > > kvm_vcpu > >>> *vcpu, int tpr, int irr) > >>>> vmcs_write32(TPR_THRESHOLD, irr); > >>>> } > >>>> +static void vmx_enable_virtual_x2apic_mode(struct kvm_vcpu *vcpu) > >>>> +{ > >>>> + u32 exec_control; > >>>> + int msr; > >>>> + struct vcpu_vmx *vmx = to_vmx(vcpu); > >>>> + > >>>> + if (!cpu_has_vmx_virtualize_x2apic_mode()) > >>>> + return; > >>>> + > >>>> + exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); > >>>> + /* virtualize x2apic mode relies on tpr shadow */ > >>>> + if (!(exec_control & CPU_BASED_TPR_SHADOW)) > >>>> + return; > >>>> + > >>>> + exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); > >>>> + exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; > >>>> + exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; > >>>> + vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); > >>>> + vmx->virtual_x2apic_enabled = true; > >>> Why track it? > >> With this flag, we don't need to read vmcs to check whether we enabled > >> virtua x2apic before. > >> > > Why do you care? Just disabled it regardless. > kvm_lapic_set_base will be called when creating lapic. At that time, vcpu didn't initialized. Then read/write vmcs in vmx_disable_virtual_x2apic_mode will cause error. > With this flag, we only disable the virtual x2apic mode if it is enabled before. > Then call vmx_enable_virtual_x2apic_mode() only when mode actually changes. kvm_lapic_set_base() can track it like it does to MSR_IA32_APICBASE_ENABLE. -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html