Re: [PATCH v12 2/3] x86, apicv: add virtual x2apic support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Jan 23, 2013 at 10:47:25PM +0800, Yang Zhang wrote:
> From: Yang Zhang <yang.z.zhang@xxxxxxxxx>
> 
> basically to benefit from apicv, we need to enable virtualized x2apic mode.
> Currently, we only enable it when guest is really using x2apic.
> 
> Also, clear MSR bitmap for corresponding x2apic MSRs when guest enabled x2apic:
> 0x800 - 0x8ff: no read intercept for apicv register virtualization,
>                except APIC ID and TMCCT which need software's assistance to
>                get right value.
> 
> Signed-off-by: Kevin Tian <kevin.tian@xxxxxxxxx>
> Signed-off-by: Yang Zhang <yang.z.zhang@xxxxxxxxx>
> ---
>  arch/x86/include/asm/kvm_host.h |    1 +
>  arch/x86/include/asm/vmx.h      |    1 +
>  arch/x86/kvm/lapic.c            |   14 ++-
>  arch/x86/kvm/svm.c              |    6 +
>  arch/x86/kvm/vmx.c              |  203 +++++++++++++++++++++++++++++++++++----
>  5 files changed, 201 insertions(+), 24 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index f75e1fe..e1306c1 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -692,6 +692,7 @@ struct kvm_x86_ops {
>  	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
>  	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
>  	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
> +	void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
>  	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
>  	int (*get_tdp_level)(void);
>  	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index 44c3f7e..0a54df0 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -139,6 +139,7 @@
>  #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
>  #define SECONDARY_EXEC_ENABLE_EPT               0x00000002
>  #define SECONDARY_EXEC_RDTSCP			0x00000008
> +#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE   0x00000010
>  #define SECONDARY_EXEC_ENABLE_VPID              0x00000020
>  #define SECONDARY_EXEC_WBINVD_EXITING		0x00000040
>  #define SECONDARY_EXEC_UNRESTRICTED_GUEST	0x00000080
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 0664c13..83a9547 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -1303,6 +1303,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
>  
>  void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
>  {
> +	u64 old_value = vcpu->arch.apic_base;
>  	struct kvm_lapic *apic = vcpu->arch.apic;
>  
>  	if (!apic) {
> @@ -1324,11 +1325,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
>  		value &= ~MSR_IA32_APICBASE_BSP;
>  
>  	vcpu->arch.apic_base = value;
> -	if (apic_x2apic_mode(apic)) {
> -		u32 id = kvm_apic_id(apic);
> -		u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
> -		kvm_apic_set_ldr(apic, ldr);
> +	if ((old_value ^ value) & X2APIC_ENABLE) {
> +		if (value & X2APIC_ENABLE) {
> +			u32 id = kvm_apic_id(apic);
> +			u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
> +			kvm_apic_set_ldr(apic, ldr);
> +			kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true);
> +		} else
> +			kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false);
>  	}
> +
>  	apic->base_address = apic->vcpu->arch.apic_base &
>  			     MSR_IA32_APICBASE_BASE;
>  
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index d29d3cd..38407e9 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -3571,6 +3571,11 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
>  		set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
>  }
>  
> +static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
> +{
> +	return;
> +}
> +
>  static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
> @@ -4290,6 +4295,7 @@ static struct kvm_x86_ops svm_x86_ops = {
>  	.enable_nmi_window = enable_nmi_window,
>  	.enable_irq_window = enable_irq_window,
>  	.update_cr8_intercept = update_cr8_intercept,
> +	.set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
>  
>  	.set_tss_addr = svm_set_tss_addr,
>  	.get_tdp_level = get_npt_level,
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 8a8116a..c2bc989 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -643,6 +643,8 @@ static unsigned long *vmx_io_bitmap_a;
>  static unsigned long *vmx_io_bitmap_b;
>  static unsigned long *vmx_msr_bitmap_legacy;
>  static unsigned long *vmx_msr_bitmap_longmode;
> +static unsigned long *vmx_msr_bitmap_legacy_x2apic;
> +static unsigned long *vmx_msr_bitmap_longmode_x2apic;
>  
>  static bool cpu_has_load_ia32_efer;
>  static bool cpu_has_load_perf_global_ctrl;
> @@ -767,6 +769,12 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
>  		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
>  }
>  
> +static inline bool cpu_has_vmx_virtualize_x2apic_mode(void)
> +{
> +	return vmcs_config.cpu_based_2nd_exec_ctrl &
> +		SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
> +}
> +
>  static inline bool cpu_has_vmx_apic_register_virt(void)
>  {
>  	return vmcs_config.cpu_based_2nd_exec_ctrl &
> @@ -1830,6 +1838,24 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
>  	vmx->guest_msrs[from] = tmp;
>  }
>  
> +static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
> +{
> +	unsigned long *msr_bitmap;
> +
> +	if (vcpu->arch.apic_base & X2APIC_ENABLE)
 if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->apic))

> +		if (is_long_mode(vcpu))
> +			msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
> +		else
> +			msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
> +	else
> +		if (is_long_mode(vcpu))
> +			msr_bitmap = vmx_msr_bitmap_longmode;
> +		else
> +			msr_bitmap = vmx_msr_bitmap_legacy;
> +
> +	vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
> +}
> +
>  /*
>   * Set up the vmcs to automatically save and restore system
>   * msrs.  Don't touch the 64-bit msrs if the guest is in legacy
> @@ -1838,7 +1864,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
>  static void setup_msrs(struct vcpu_vmx *vmx)
>  {
>  	int save_nmsrs, index;
> -	unsigned long *msr_bitmap;
>  
>  	save_nmsrs = 0;
>  #ifdef CONFIG_X86_64
> @@ -1870,14 +1895,8 @@ static void setup_msrs(struct vcpu_vmx *vmx)
>  
>  	vmx->save_nmsrs = save_nmsrs;
>  
> -	if (cpu_has_vmx_msr_bitmap()) {
> -		if (is_long_mode(&vmx->vcpu))
> -			msr_bitmap = vmx_msr_bitmap_longmode;
> -		else
> -			msr_bitmap = vmx_msr_bitmap_legacy;
> -
> -		vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
> -	}
> +	if (cpu_has_vmx_msr_bitmap())
> +		vmx_set_msr_bitmap(&vmx->vcpu);
>  }
>  
>  /*
> @@ -2543,6 +2562,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
>  	if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
>  		min2 = 0;
>  		opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
> +			SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
>  			SECONDARY_EXEC_WBINVD_EXITING |
>  			SECONDARY_EXEC_ENABLE_VPID |
>  			SECONDARY_EXEC_ENABLE_EPT |
> @@ -2564,7 +2584,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
>  
>  	if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
>  		_cpu_based_2nd_exec_control &= ~(
> -				SECONDARY_EXEC_APIC_REGISTER_VIRT);
> +				SECONDARY_EXEC_APIC_REGISTER_VIRT |
> +				SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
>  
>  	if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
>  		/* CR3 accesses and invlpg don't need to cause VM Exits when EPT
> @@ -3724,7 +3745,10 @@ static void free_vpid(struct vcpu_vmx *vmx)
>  	spin_unlock(&vmx_vpid_lock);
>  }
>  
> -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
> +#define MSR_TYPE_R	1
> +#define MSR_TYPE_W	2
> +static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
> +						u32 msr, int type)
>  {
>  	int f = sizeof(unsigned long);
>  
> @@ -3737,20 +3761,93 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
>  	 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
>  	 */
>  	if (msr <= 0x1fff) {
> -		__clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */
> -		__clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */
> +		if (type & MSR_TYPE_R)
> +			/* read-low */
> +			__clear_bit(msr, msr_bitmap + 0x000 / f);
> +
> +		if (type & MSR_TYPE_W)
> +			/* write-low */
> +			__clear_bit(msr, msr_bitmap + 0x800 / f);
> +
>  	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
>  		msr &= 0x1fff;
> -		__clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */
> -		__clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */
> +		if (type & MSR_TYPE_R)
> +			/* read-high */
> +			__clear_bit(msr, msr_bitmap + 0x400 / f);
> +
> +		if (type & MSR_TYPE_W)
> +			/* write-high */
> +			__clear_bit(msr, msr_bitmap + 0xc00 / f);
> +
> +	}
> +}
> +
> +static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
> +						u32 msr, int type)
> +{
> +	int f = sizeof(unsigned long);
> +
> +	if (!cpu_has_vmx_msr_bitmap())
> +		return;
> +
> +	/*
> +	 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
> +	 * have the write-low and read-high bitmap offsets the wrong way round.
> +	 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
> +	 */
> +	if (msr <= 0x1fff) {
> +		if (type & MSR_TYPE_R)
> +			/* read-low */
> +			__set_bit(msr, msr_bitmap + 0x000 / f);
> +
> +		if (type & MSR_TYPE_W)
> +			/* write-low */
> +			__set_bit(msr, msr_bitmap + 0x800 / f);
> +
> +	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
> +		msr &= 0x1fff;
> +		if (type & MSR_TYPE_R)
> +			/* read-high */
> +			__set_bit(msr, msr_bitmap + 0x400 / f);
> +
> +		if (type & MSR_TYPE_W)
> +			/* write-high */
> +			__set_bit(msr, msr_bitmap + 0xc00 / f);
> +
>  	}
>  }
>  
>  static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
>  {
>  	if (!longmode_only)
> -		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr);
> -	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr);
> +		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
> +						msr, MSR_TYPE_R | MSR_TYPE_W);
> +	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
> +						msr, MSR_TYPE_R | MSR_TYPE_W);
> +}
> +
> +static void vmx_enable_intercept_msr_read_x2apic(u32 msr)
> +{
> +	__vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> +			msr, MSR_TYPE_R);
> +	__vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> +			msr, MSR_TYPE_R);
> +}
> +
> +static void vmx_disable_intercept_msr_read_x2apic(u32 msr)
> +{
> +	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> +			msr, MSR_TYPE_R);
> +	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> +			msr, MSR_TYPE_R);
> +}
> +
> +static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
> +{
> +	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> +			msr, MSR_TYPE_W);
> +	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> +			msr, MSR_TYPE_W);
>  }
>  
>  /*
> @@ -3848,6 +3945,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
>  		exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
>  	if (!enable_apicv_reg || !irqchip_in_kernel(vmx->vcpu.kvm))
>  		exec_control &= ~SECONDARY_EXEC_APIC_REGISTER_VIRT;
> +	exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
>  	return exec_control;
>  }
>  
> @@ -6103,6 +6201,40 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
>  	vmcs_write32(TPR_THRESHOLD, irr);
>  }
>  
> +static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
> +{
> +	u32 exec_control, sec_exec_control;
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +
> +	/* There is not point to enable virtualize x2apic without enable
> +	 * apicv
> +	 */
> +	if (!cpu_has_vmx_virtualize_x2apic_mode() || !enable_apicv_reg)
> +		return;
> +
> +	if (set) {
Just add vm_need_tpr_shadow() to the if above.

> +		exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
> +		/* virtualize x2apic mode relies on tpr shadow */
> +		if (!(exec_control & CPU_BASED_TPR_SHADOW))
> +			return;
> +	}
> +
> +	sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
> +
> +	if (set) {
> +		sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
> +		sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
> +	} else {
> +		sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
> +		if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
How enable_apicv_reg can be true without virtualized apic access?

> +			sec_exec_control |=
> +					SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
> +	}
> +	vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
> +
> +	vmx_set_msr_bitmap(vcpu);
> +}
> +
>  static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
>  {
>  	u32 exit_intr_info;
> @@ -7366,6 +7498,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
>  	.enable_nmi_window = enable_nmi_window,
>  	.enable_irq_window = enable_irq_window,
>  	.update_cr8_intercept = update_cr8_intercept,
> +	.set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
>  
>  	.set_tss_addr = vmx_set_tss_addr,
>  	.get_tdp_level = get_ept_level,
> @@ -7398,7 +7531,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
>  
>  static int __init vmx_init(void)
>  {
> -	int r, i;
> +	int r, i, msr;
>  
>  	rdmsrl_safe(MSR_EFER, &host_efer);
>  
> @@ -7419,11 +7552,19 @@ static int __init vmx_init(void)
>  	if (!vmx_msr_bitmap_legacy)
>  		goto out1;
>  
> +	vmx_msr_bitmap_legacy_x2apic =
> +				(unsigned long *)__get_free_page(GFP_KERNEL);
> +	if (!vmx_msr_bitmap_legacy_x2apic)
> +		goto out2;
>  
>  	vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
>  	if (!vmx_msr_bitmap_longmode)
> -		goto out2;
> +		goto out3;
>  
> +	vmx_msr_bitmap_longmode_x2apic =
> +				(unsigned long *)__get_free_page(GFP_KERNEL);
> +	if (!vmx_msr_bitmap_longmode_x2apic)
> +		goto out4;
>  
>  	/*
>  	 * Allow direct access to the PC debug port (it is often used for I/O
> @@ -7455,6 +7596,24 @@ static int __init vmx_init(void)
>  	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
>  	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
>  	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
> +	memcpy(vmx_msr_bitmap_legacy_x2apic,
> +			vmx_msr_bitmap_legacy, PAGE_SIZE);
> +	memcpy(vmx_msr_bitmap_longmode_x2apic,
> +			vmx_msr_bitmap_longmode, PAGE_SIZE);
> +
> +	if (enable_apicv_reg) {
> +		for (msr = 0x800; msr <= 0x8ff; msr++)
> +			vmx_disable_intercept_msr_read_x2apic(msr);
> +
> +		/* According SDM, in x2apic mode, the whole id reg is used.
> +		 * But in KVM, it only use the highest eight bits. Need to
> +		 * intercept it */
> +		vmx_enable_intercept_msr_read_x2apic(0x802);
> +		/* TMCCT */
> +		vmx_enable_intercept_msr_read_x2apic(0x839);
> +		/* TPR */
> +		vmx_disable_intercept_msr_write_x2apic(0x808);
> +	}
>  
>  	if (enable_ept) {
>  		kvm_mmu_set_mask_ptes(0ull,
> @@ -7468,8 +7627,10 @@ static int __init vmx_init(void)
>  
>  	return 0;
>  
> -out3:
> +out4:
>  	free_page((unsigned long)vmx_msr_bitmap_longmode);
> +out3:
> +	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
>  out2:
>  	free_page((unsigned long)vmx_msr_bitmap_legacy);
>  out1:
> @@ -7481,6 +7642,8 @@ out:
>  
>  static void __exit vmx_exit(void)
>  {
> +	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
> +	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
>  	free_page((unsigned long)vmx_msr_bitmap_legacy);
>  	free_page((unsigned long)vmx_msr_bitmap_longmode);
>  	free_page((unsigned long)vmx_io_bitmap_b);
> -- 
> 1.7.1

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux