RE: [PATCH v9 2/3] x86, apicv: add virtual x2apic support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Gleb Natapov wrote on 2013-01-10:
> On Thu, Jan 10, 2013 at 08:32:06AM +0000, Zhang, Yang Z wrote:
>> Gleb Natapov wrote on 2013-01-10:
>>> On Thu, Jan 10, 2013 at 03:26:07PM +0800, Yang Zhang wrote:
>>>> From: Yang Zhang <yang.z.zhang@xxxxxxxxx>
>>>> 
>>>> basically to benefit from apicv, we need to enable virtualized x2apic mode.
>>>> Currently, we only enable it when guest is really using x2apic.
>>>> 
>>>> Also, clear MSR bitmap for corresponding x2apic MSRs when guest enabled
>>> x2apic:
>>>>     0x800 - 0x8ff: no read intercept for apicv register virtualization,
>>>>     		   except APIC ID and TMCCT.
>>>>     APIC ID and TMCCT: need software's assistance to get right value.
>>>>     TPR,EOI,SELF-IPI: no write intercept for virtual interrupt delivery.
>>>> Signed-off-by: Kevin Tian <kevin.tian@xxxxxxxxx>
>>>> Signed-off-by: Yang Zhang <yang.z.zhang@xxxxxxxxx>
>>>> ---
>>>>  arch/x86/include/asm/kvm_host.h |    2 + arch/x86/include/asm/vmx.h
>>>>    |    1 + arch/x86/kvm/lapic.c            |    5 +-
>>>>    arch/x86/kvm/svm.c              |    6 + arch/x86/kvm/vmx.c |  194
>>>>    +++++++++++++++++++++++++++++++++++++-- 5 files
> changed, 200
>>>>  insertions(+), 8 deletions(-)
>>>> diff --git a/arch/x86/include/asm/kvm_host.h
>>>> b/arch/x86/include/asm/kvm_host.h index c431b33..572a562 100644 ---
>>>> a/arch/x86/include/asm/kvm_host.h +++
>>>> b/arch/x86/include/asm/kvm_host.h @@ -697,6 +697,8 @@ struct
>>>> kvm_x86_ops {
>>>>  	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
>>>>  	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
>>>>  	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
>>>> +	void (*enable_virtual_x2apic_mode)(struct kvm_vcpu *vcpu);
>>>> +	void (*disable_virtual_x2apic_mode)(struct kvm_vcpu *vcpu);
>>> Make one callback with enable/disable parameter. And do not forget SVM.
>>> 
>>>>  	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
>>>>  	int (*get_tdp_level)(void);
>>>>  	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool
> is_mmio);
>>>> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
>>>> index 44c3f7e..0a54df0 100644
>>>> --- a/arch/x86/include/asm/vmx.h
>>>> +++ b/arch/x86/include/asm/vmx.h
>>>> @@ -139,6 +139,7 @@
>>>>  #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 #define
>>>>  SECONDARY_EXEC_ENABLE_EPT               0x00000002 #define
>>>>  SECONDARY_EXEC_RDTSCP			0x00000008 +#define
>>>>  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE   0x00000010 #define
>>>>  SECONDARY_EXEC_ENABLE_VPID              0x00000020 #define
>>>>  SECONDARY_EXEC_WBINVD_EXITING		0x00000040 #define
>>>>  SECONDARY_EXEC_UNRESTRICTED_GUEST	0x00000080
>>>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>>>> index 0664c13..ec38906 100644
>>>> --- a/arch/x86/kvm/lapic.c
>>>> +++ b/arch/x86/kvm/lapic.c
>>>> @@ -1328,7 +1328,10 @@ void kvm_lapic_set_base(struct kvm_vcpu
> *vcpu,
>>> u64 value)
>>>>  		u32 id = kvm_apic_id(apic);
>>>>  		u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
>>>>  		kvm_apic_set_ldr(apic, ldr);
>>>> -	}
>>>> +		kvm_x86_ops->enable_virtual_x2apic_mode(vcpu);
>>>> +	} else
>>>> +		kvm_x86_ops->disable_virtual_x2apic_mode(vcpu);
>>>> +
>>> You just broke SVM.
>>>>  	apic->base_address = apic->vcpu->arch.apic_base &
>>>>  			     MSR_IA32_APICBASE_BASE;
>>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>>>> index d29d3cd..0b82cb1 100644
>>>> --- a/arch/x86/kvm/svm.c
>>>> +++ b/arch/x86/kvm/svm.c
>>>> @@ -3571,6 +3571,11 @@ static void update_cr8_intercept(struct
> kvm_vcpu
>>> *vcpu, int tpr, int irr)
>>>>  		set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
>>>>  }
>>>> +static void svm_enable_virtual_x2apic_mode(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +	return;
>>>> +}
>>>> +
>>>>  static int svm_nmi_allowed(struct kvm_vcpu *vcpu) { 	struct vcpu_svm
>>>>  *svm = to_svm(vcpu); @@ -4290,6 +4295,7 @@ static struct kvm_x86_ops
>>>>  svm_x86_ops = { 	.enable_nmi_window = enable_nmi_window,
>>>>  	.enable_irq_window = enable_irq_window, 	.update_cr8_intercept =
>>>>  update_cr8_intercept,
>>>> +	.enable_virtual_x2apic_mode = svm_enable_virtual_x2apic_mode,
>>>> 
>>>>  	.set_tss_addr = svm_set_tss_addr,
>>>>  	.get_tdp_level = get_npt_level,
>>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>>> index 688f43f..b203ce7 100644
>>>> --- a/arch/x86/kvm/vmx.c
>>>> +++ b/arch/x86/kvm/vmx.c
>>>> @@ -433,6 +433,8 @@ struct vcpu_vmx {
>>>> 
>>>>  	bool rdtscp_enabled;
>>>> +	bool virtual_x2apic_enabled;
>>>> +
>>>>  	/* Support for a guest hypervisor (nested VMX) */
>>>>  	struct nested_vmx nested;
>>>>  };
>>>> @@ -767,12 +769,23 @@ static inline bool
>>> cpu_has_vmx_virtualize_apic_accesses(void)
>>>>  		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
>>>>  }
>>>> +static inline bool cpu_has_vmx_virtualize_x2apic_mode(void)
>>>> +{
>>>> +	return vmcs_config.cpu_based_2nd_exec_ctrl &
>>>> +		SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
>>>> +}
>>>> +
>>>>  static inline bool cpu_has_vmx_apic_register_virt(void)
>>>>  {
>>>>  	return vmcs_config.cpu_based_2nd_exec_ctrl &
>>>>  		SECONDARY_EXEC_APIC_REGISTER_VIRT;
>>>>  }
>>>> +static inline bool cpu_has_vmx_virtual_intr_delivery(void)
>>>> +{
>>>> +	return false;
>>>> +}
>>>> +
>>>>  static inline bool cpu_has_vmx_flexpriority(void)
>>>>  {
>>>>  	return cpu_has_vmx_tpr_shadow() &&
>>>> @@ -2544,6 +2557,7 @@ static __init int setup_vmcs_config(struct
>>> vmcs_config *vmcs_conf)
>>>>  	if (_cpu_based_exec_control &
>>>>  CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { 		min2 = 0; 		opt2 =
>>>>  SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
>>>>  +			SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
>>>>  			SECONDARY_EXEC_WBINVD_EXITING | 	SECONDARY_EXEC_ENABLE_VPID |
>>>>  			SECONDARY_EXEC_ENABLE_EPT | @@ -3731,7 +3745,45 @@ static void
>>>>  free_vpid(struct vcpu_vmx *vmx) 	spin_unlock(&vmx_vpid_lock); }
>>>> -static void __vmx_disable_intercept_for_msr(unsigned long
>>>> *msr_bitmap, u32 msr) +#define MSR_TYPE_R	1 +#define MSR_TYPE_W	2
>>>> +static void __vmx_disable_intercept_for_msr(unsigned long
>>>> *msr_bitmap, + 					u32 msr, int type) +{ +	int f = sizeof(unsigned
>>>> long); + +	if (!cpu_has_vmx_msr_bitmap()) +		return; + +	/* +	 * See
>>>> Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals +	 *
>>>> have the write-low and read-high bitmap offsets the wrong way round.
>>>> +	 * We can control MSRs 0x00000000-0x00001fff and
>>>> 0xc0000000-0xc0001fff. +	 */ +	if (msr <= 0x1fff) { +		if (type &
>>>> MSR_TYPE_R) +			/* read-low */ +			__clear_bit(msr, msr_bitmap +
>>>> 0x000 / f); + +		if (type & MSR_TYPE_W) +			/* write-low */ +
>>>> 	__clear_bit(msr, msr_bitmap + 0x800 / f); + +	} else if ((msr >=
>>>> 0xc0000000) && (msr <= 0xc0001fff)) { +		msr &= 0x1fff; +		if (type &
>>>> MSR_TYPE_R) + 	/* read-high */ +			__clear_bit(msr, msr_bitmap +
>>>> 0x400 / f); + +		if (type & MSR_TYPE_W) +			/* write-high */ +
>>>> 	__clear_bit(msr, msr_bitmap + 0xc00 / f); + +	} +} + +static void
>>>> __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, + 					u32
>>>> msr, int type)
>>>>  {
>>>>  	int f = sizeof(unsigned long);
>>>> @@ -3744,20 +3796,75 @@ static void
>>> __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
>>>>  	 * We can control MSRs 0x00000000-0x00001fff and
>>>>  0xc0000000-0xc0001fff. 	 */ 	if (msr <= 0x1fff) {
>>>> -		__clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */
>>>> -		__clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */
>>>> +		if (type & MSR_TYPE_R)
>>>> +			/* read-low */
>>>> +			__set_bit(msr, msr_bitmap + 0x000 / f);
>>>> +
>>>> +		if (type & MSR_TYPE_W)
>>>> +			/* write-low */
>>>> +			__set_bit(msr, msr_bitmap + 0x800 / f);
>>>> +
>>>>  	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
>>>>  		msr &= 0x1fff;
>>>> -		__clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */
>>>> -		__clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */
>>>> +		if (type & MSR_TYPE_R)
>>>> +			/* read-high */
>>>> +			__set_bit(msr, msr_bitmap + 0x400 / f);
>>>> +
>>>> +		if (type & MSR_TYPE_W)
>>>> +			/* write-high */
>>>> +			__set_bit(msr, msr_bitmap + 0xc00 / f);
>>>> +
>>>>  	} } + static void vmx_disable_intercept_for_msr(u32 msr, bool
>>>>  longmode_only) { 	if (!longmode_only)
>>>> -		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr);
>>>> -	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr);
>>>> +		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, +						msr,
>>>> MSR_TYPE_R | MSR_TYPE_W);
>>>> +	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
>>>> +						msr, MSR_TYPE_R | MSR_TYPE_W); +} + +static void
>>>> vmx_intercept_for_msr_read(u32 msr, bool longmode_only, +					bool
>>>> set) +{ +	if (!longmode_only) { +		if (set) +
>>>> 	__vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy, +					msr,
>>>> MSR_TYPE_R); +		else +
>>>> 	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, +					msr,
>>>> MSR_TYPE_R); + +	} +	if (set)
>>>> +		__vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode, +				msr,
>>>> MSR_TYPE_R); +	else
>>>> +		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, +				msr,
>>>> MSR_TYPE_R); +} + +static void vmx_intercept_for_msr_write(u32 msr,
>>>> bool longmode_only, +					bool set) +{ +	if (!longmode_only) { +		if
>>>> (set) + 	__vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy,
>>>> +					msr, MSR_TYPE_W); +		else +
>>>> 	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, +					msr,
>>>> MSR_TYPE_W); + +	} +	if (set)
>>>> +		__vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode, +				msr,
>>>> MSR_TYPE_W); +	else
>>>> +		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, +				msr,
>>>> MSR_TYPE_W);
>>>>  }
>>>>  
>>>>  /*
>>>> @@ -3855,6 +3962,7 @@ static u32 vmx_secondary_exec_control(struct
>>> vcpu_vmx *vmx)
>>>>  		exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; 	if
>>>>  (!enable_apicv_reg_vid) 		exec_control &=
>>>>  ~SECONDARY_EXEC_APIC_REGISTER_VIRT; +	exec_control &=
>>>>  ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; 	return
> exec_control; }
>>>> @@ -6110,6 +6218,76 @@ static void update_cr8_intercept(struct
> kvm_vcpu
>>> *vcpu, int tpr, int irr)
>>>>  	vmcs_write32(TPR_THRESHOLD, irr);
>>>>  }
>>>> +static void vmx_enable_virtual_x2apic_mode(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +	u32 exec_control;
>>>> +	int msr;
>>>> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
>>>> +
>>>> +	if (!cpu_has_vmx_virtualize_x2apic_mode())
>>>> +		return;
>>>> +
>>>> +	exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
>>>> +	/* virtualize x2apic mode relies on tpr shadow */
>>>> +	if (!(exec_control & CPU_BASED_TPR_SHADOW))
>>>> +		return;
>>>> +
>>>> +	exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
>>>> +	exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
>>>> +	exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
>>>> +	vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
>>>> +	vmx->virtual_x2apic_enabled = true;
>>> Why track it?
>> With this flag, we don't need to read vmcs to check whether we enabled
>> virtua x2apic before.
>> 
> Why do you care? Just disabled it regardless.
kvm_lapic_set_base will be called when creating lapic. At that time, vcpu didn't initialized. Then read/write vmcs in vmx_disable_virtual_x2apic_mode will cause error.
With this flag, we only disable the virtual x2apic mode if it is enabled before. 

Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux