RE: [PATCH v10 2/3] x86, apicv: add virtual x2apic support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Gleb Natapov wrote on 2013-01-14:
> On Mon, Jan 14, 2013 at 03:13:34PM +0800, Yang Zhang wrote:
>> From: Yang Zhang <yang.z.zhang@xxxxxxxxx>
>> 
>> basically to benefit from apicv, we need to enable virtualized x2apic mode.
>> Currently, we only enable it when guest is really using x2apic.
>> 
>> Also, clear MSR bitmap for corresponding x2apic MSRs when guest enabled
> x2apic:
>>     0x800 - 0x8ff: no read intercept for apicv register virtualization,
>>                    except APIC ID and TMCCT.
>>     APIC ID and TMCCT: need software's assistance to get right value.
> Actually since msr bitmap is shared between all vcpus this will break
> guests that do not enable x2apic.
I don't think this case will exist. It will break the real OS too.
 
>> Signed-off-by: Kevin Tian <kevin.tian@xxxxxxxxx>
>> Signed-off-by: Yang Zhang <yang.z.zhang@xxxxxxxxx>
>> ---
>>  arch/x86/include/asm/kvm_host.h |    1 + arch/x86/include/asm/vmx.h   
>>    |    1 + arch/x86/kvm/lapic.c            |   15 +++-
>>  arch/x86/kvm/svm.c              |    6 ++ arch/x86/kvm/vmx.c          
>>     |  162 +++++++++++++++++++++++++++++++++++++-- 5 files changed, 173
>>  insertions(+), 12 deletions(-)
>> diff --git a/arch/x86/include/asm/kvm_host.h
>> b/arch/x86/include/asm/kvm_host.h index c431b33..35aa8e6 100644 ---
>> a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -697,6 +697,7 @@ struct kvm_x86_ops {
>>  	void (*enable_nmi_window)(struct kvm_vcpu *vcpu); 	void
>>  (*enable_irq_window)(struct kvm_vcpu *vcpu); 	void
>>  (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
>>  +	void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
>>  	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); 	int
>>  (*get_tdp_level)(void); 	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu,
>>  gfn_t gfn, bool is_mmio);
>> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
>> index 44c3f7e..0a54df0 100644
>> --- a/arch/x86/include/asm/vmx.h
>> +++ b/arch/x86/include/asm/vmx.h
>> @@ -139,6 +139,7 @@
>>  #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 #define
>>  SECONDARY_EXEC_ENABLE_EPT               0x00000002 #define
>>  SECONDARY_EXEC_RDTSCP			0x00000008 +#define
>>  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE   0x00000010 #define
>>  SECONDARY_EXEC_ENABLE_VPID              0x00000020 #define
>>  SECONDARY_EXEC_WBINVD_EXITING		0x00000040 #define
>>  SECONDARY_EXEC_UNRESTRICTED_GUEST	0x00000080
>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>> index 0664c13..2ef5e2b 100644
>> --- a/arch/x86/kvm/lapic.c
>> +++ b/arch/x86/kvm/lapic.c
>> @@ -1323,12 +1323,17 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu,
> u64 value)
>>  	if (!kvm_vcpu_is_bsp(apic->vcpu))
>>  		value &= ~MSR_IA32_APICBASE_BSP;
>> -	vcpu->arch.apic_base = value;
>> -	if (apic_x2apic_mode(apic)) {
>> -		u32 id = kvm_apic_id(apic);
>> -		u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
>> -		kvm_apic_set_ldr(apic, ldr);
>> +	if ((vcpu->arch.apic_base ^ value) & X2APIC_ENABLE) {
>> +		if (value & X2APIC_ENABLE) {
>> +			u32 id = kvm_apic_id(apic);
>> +			u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
>> +			kvm_apic_set_ldr(apic, ldr);
>> +			kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true);
>> +		} else
>> +			kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false);
>>  	}
>> +
>> +	vcpu->arch.apic_base = value;
>>  	apic->base_address = apic->vcpu->arch.apic_base &
>>  			     MSR_IA32_APICBASE_BASE;
>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>> index d29d3cd..38407e9 100644
>> --- a/arch/x86/kvm/svm.c
>> +++ b/arch/x86/kvm/svm.c
>> @@ -3571,6 +3571,11 @@ static void update_cr8_intercept(struct kvm_vcpu
> *vcpu, int tpr, int irr)
>>  		set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
>>  }
>> +static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
>> +{
>> +	return;
>> +}
>> +
>>  static int svm_nmi_allowed(struct kvm_vcpu *vcpu) { 	struct vcpu_svm
>>  *svm = to_svm(vcpu); @@ -4290,6 +4295,7 @@ static struct kvm_x86_ops
>>  svm_x86_ops = { 	.enable_nmi_window = enable_nmi_window,
>>  	.enable_irq_window = enable_irq_window, 	.update_cr8_intercept =
>>  update_cr8_intercept,
>> +	.set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
>> 
>>  	.set_tss_addr = svm_set_tss_addr,
>>  	.get_tdp_level = get_npt_level,
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 0403634..847022e 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -767,6 +767,12 @@ static inline bool
> cpu_has_vmx_virtualize_apic_accesses(void)
>>  		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
>>  }
>> +static inline bool cpu_has_vmx_virtualize_x2apic_mode(void)
>> +{
>> +	return vmcs_config.cpu_based_2nd_exec_ctrl &
>> +		SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
>> +}
>> +
>>  static inline bool cpu_has_vmx_apic_register_virt(void)
>>  {
>>  	return vmcs_config.cpu_based_2nd_exec_ctrl &
>> @@ -2543,6 +2549,7 @@ static __init int setup_vmcs_config(struct
> vmcs_config *vmcs_conf)
>>  	if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
>>  { 		min2 = 0; 		opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
>>  +			SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
>>  			SECONDARY_EXEC_WBINVD_EXITING | 			SECONDARY_EXEC_ENABLE_VPID |
>>  			SECONDARY_EXEC_ENABLE_EPT | @@ -3724,7 +3731,45 @@ static void
>>  free_vpid(struct vcpu_vmx *vmx) 	spin_unlock(&vmx_vpid_lock); }
>> -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
>> u32 msr) +#define MSR_TYPE_R	1 +#define MSR_TYPE_W	2 +static void
>> __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, +						u32
>> msr, int type) +{ +	int f = sizeof(unsigned long); + +	if
>> (!cpu_has_vmx_msr_bitmap()) +		return; + +	/* +	 * See Intel PRM Vol.
>> 3, 20.6.9 (MSR-Bitmap Address). Early manuals +	 * have the write-low
>> and read-high bitmap offsets the wrong way round. +	 * We can control
>> MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. +	 */ +	if (msr
>> <= 0x1fff) { +		if (type & MSR_TYPE_R) +			/* read-low */
>> +			__clear_bit(msr, msr_bitmap + 0x000 / f); + +		if (type &
>> MSR_TYPE_W) +			/* write-low */ +			__clear_bit(msr, msr_bitmap + 0x800
>> / f); + +	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
>> +		msr &= 0x1fff; +		if (type & MSR_TYPE_R) +			/* read-high */
>> +			__clear_bit(msr, msr_bitmap + 0x400 / f); + +		if (type &
>> MSR_TYPE_W) +			/* write-high */ +			__clear_bit(msr, msr_bitmap +
>> 0xc00 / f); + +	} +} + +static void
>> __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, +						u32
>> msr, int type)
>>  {
>>  	int f = sizeof(unsigned long);
>> @@ -3737,20 +3782,75 @@ static void
> __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
>>  	 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
>>  	 */
>>  	if (msr <= 0x1fff) {
>> -		__clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */
>> -		__clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */
>> +		if (type & MSR_TYPE_R)
>> +			/* read-low */
>> +			__set_bit(msr, msr_bitmap + 0x000 / f);
>> +
>> +		if (type & MSR_TYPE_W)
>> +			/* write-low */
>> +			__set_bit(msr, msr_bitmap + 0x800 / f);
>> +
>>  	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
>>  		msr &= 0x1fff;
>> -		__clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */
>> -		__clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */
>> +		if (type & MSR_TYPE_R)
>> +			/* read-high */
>> +			__set_bit(msr, msr_bitmap + 0x400 / f);
>> +
>> +		if (type & MSR_TYPE_W)
>> +			/* write-high */
>> +			__set_bit(msr, msr_bitmap + 0xc00 / f);
>> +
>>  	}
>>  }
>> +
>>  static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
>>  {
>>  	if (!longmode_only)
>> -		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr);
>> -	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr);
>> +		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
>> +						msr, MSR_TYPE_R | MSR_TYPE_W);
>> +	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
>> +						msr, MSR_TYPE_R | MSR_TYPE_W);
>> +}
>> +
>> +static void vmx_intercept_for_msr_read(u32 msr, bool longmode_only,
>> +					bool set)
>> +{
>> +	if (!longmode_only) {
>> +		if (set)
>> +			__vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy,
>> +					msr, MSR_TYPE_R);
>> +		else
>> +			__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
>> +					msr, MSR_TYPE_R);
>> +
>> +	}
>> +	if (set)
>> +		__vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode,
>> +				msr, MSR_TYPE_R);
>> +	else
>> +		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
>> +				msr, MSR_TYPE_R);
>> +}
>> +
>> +static void vmx_intercept_for_msr_write(u32 msr, bool longmode_only,
>> +					bool set)
>> +{
>> +	if (!longmode_only) {
>> +		if (set)
>> +			__vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy,
>> +					msr, MSR_TYPE_W);
>> +		else
>> +			__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
>> +					msr, MSR_TYPE_W);
>> +
>> +	}
>> +	if (set)
>> +		__vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode,
>> +				msr, MSR_TYPE_W);
>> +	else
>> +		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
>> +				msr, MSR_TYPE_W);
>>  }
>>  
>>  /*
>> @@ -3848,6 +3948,7 @@ static u32 vmx_secondary_exec_control(struct
> vcpu_vmx *vmx)
>>  		exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; 	if
>>  (!enable_apicv_reg) 		exec_control &=
>>  ~SECONDARY_EXEC_APIC_REGISTER_VIRT; +	exec_control &=
>>  ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; 	return exec_control; }
>> @@ -6103,6 +6204,52 @@ static void update_cr8_intercept(struct kvm_vcpu
> *vcpu, int tpr, int irr)
>>  	vmcs_write32(TPR_THRESHOLD, irr);
>>  }
>> +static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool
>> set) +{ +	u32 exec_control, sec_exec_control; +	int msr; +	struct
>> vcpu_vmx *vmx = to_vmx(vcpu); + +	/* There is not point to enable
>> virtualize x2apic without enable +	 * apicv*/ +	if
>> (!cpu_has_vmx_virtualize_x2apic_mode() || !enable_apicv_reg) +		return;
>> + +	if (set) { +		exec_control =
>> vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); +		/* virtualize x2apic mode
>> relies on tpr shadow */ +		if (!(exec_control & CPU_BASED_TPR_SHADOW))
>> +			return; +	} + +	sec_exec_control =
>> vmcs_read32(SECONDARY_VM_EXEC_CONTROL); + +	if (set) {
>> +		sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
>> +		sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; +	} else
>> { +		sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; +		if
>> (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) +			sec_exec_control
>> |= +					SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; +	}
>> +	vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); + +	for
>> (msr = 0x800; msr <= 0x8ff; msr++) +		vmx_intercept_for_msr_read(msr,
>> false, !set); + +	if (set) { +		/* According SDM, in x2apic mode, the
>> whole id reg is used. +		 * But in KVM, it only use the highest eight
>> bits. Need to +		 * intercept it*/ +		vmx_intercept_for_msr_read(0x802,
>> false, true); +		/* TMCCT */ +		vmx_intercept_for_msr_read(0x839,
>> false, true); +	} +	/* TPR */ +	vmx_intercept_for_msr_write(0x808,
>> false, !set); +} +
>>  static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) { 	u32
>>  exit_intr_info; @@ -7366,6 +7513,7 @@ static struct kvm_x86_ops
>>  vmx_x86_ops = { 	.enable_nmi_window = enable_nmi_window,
>>  	.enable_irq_window = enable_irq_window, 	.update_cr8_intercept =
>>  update_cr8_intercept,
>> +	.set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
>> 
>>  	.set_tss_addr = vmx_set_tss_addr,
>>  	.get_tdp_level = get_ept_level,
>> --
>> 1.7.1
> 
> --
> 			Gleb.


Best regards,
Yang

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux