RE: [PATCH v4] KVM: x86: Implement PCID/INVPCID for guests with EPT

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi, Avi

Any comments for the patch?

Best Regards
Junjie Mao

> -----Original Message-----
> From: Marcelo Tosatti [mailto:mtosatti@xxxxxxxxxx]
> Sent: Saturday, June 16, 2012 10:32 AM
> To: Mao, Junjie
> Cc: 'kvm@xxxxxxxxxxxxxxx'; Avi Kivity
> Subject: Re: [PATCH v4] KVM: x86: Implement PCID/INVPCID for guests with
> EPT
> 
> On Thu, Jun 14, 2012 at 02:04:25AM +0000, Mao, Junjie wrote:
> > This patch handles PCID/INVPCID for guests.
> >
> > Process-context identifiers (PCIDs) are a facility by which a logical
> > processor may cache information for multiple linear-address spaces so
> > that the processor may retain cached information when software
> > switches to a different linear address space. Refer to section 4.10.1
> > in IA32 Intel Software Developer's Manual Volume 3A for details.
> >
> > For guests with EPT, the PCID feature is enabled and INVPCID behaves
> > as running natively.
> > For guests without EPT, the PCID feature is disabled and INVPCID triggers
> #UD.
> >
> > Changes from v3:
> > 	Rebase to the latest tree
> > 	Expose PCID to nested guests
> > 	Remove the pcid_supported callback
> >
> > Changes from v2:
> > 	Seperate management of PCID and INVPCID
> > 	Prevent PCID bit in CPUID from exposing on guest hypervisors
> > 	Don't check the lower 12 bits when loading cr3 if cr4.PCIDE is set
> > 	Explicitly disable INVPCID for L2 guests
> > 	Support both enable and disable INVPCID in vmx_cpuid_update()
> >
> > Changes from v1:
> > 	Move cr0/cr4 writing checks to x86.c
> > 	Update comments for the reason why PCID is disabled for non-EPT guests
> > 	Do not support PCID/INVPCID for nested guests at present
> > 	Clean up useless symbols
> >
> > Signed-off-by: Junjie Mao <junjie.mao@xxxxxxxxx>
> 
> Looks good to me.
> 
> > ---
> >  arch/x86/include/asm/kvm_host.h        |    4 ++-
> >  arch/x86/include/asm/processor-flags.h |    2 +
> >  arch/x86/include/asm/vmx.h             |    2 +
> >  arch/x86/kvm/cpuid.c                   |    6 +++-
> >  arch/x86/kvm/cpuid.h                   |    8 +++++++
> >  arch/x86/kvm/svm.c                     |    6 +++++
> >  arch/x86/kvm/vmx.c                     |   37
> +++++++++++++++++++++++++++++++-
> >  arch/x86/kvm/x86.c                     |   24
> ++++++++++++++++++--
> >  8 files changed, 82 insertions(+), 7 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h
> > b/arch/x86/include/asm/kvm_host.h index db7c1f2..95828a4 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -48,12 +48,13 @@
> >
> >  #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
> > #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT |
> > X86_CR3_PCD))
> > +#define CR3_PCID_ENABLED_RESERVED_BITS 0xFFFFFF0000000000ULL
> >  #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS |
> 	\
> >  				  0xFFFFFF0000000000ULL)
> >  #define CR4_RESERVED_BITS
> \
> >  	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD |
> X86_CR4_DE\
> >  			  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
> > -			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR  \
> > +			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR |
> X86_CR4_PCIDE \
> >  			  | X86_CR4_OSXSAVE | X86_CR4_SMEP |
> X86_CR4_RDWRGSFS \
> >  			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
> >
> > @@ -661,6 +662,7 @@ struct kvm_x86_ops {
> >  	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
> >  	int (*get_lpage_level)(void);
> >  	bool (*rdtscp_supported)(void);
> > +	bool (*invpcid_supported)(void);
> >  	void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment,
> > bool host);
> >
> >  	void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); diff
> > --git a/arch/x86/include/asm/processor-flags.h
> > b/arch/x86/include/asm/processor-flags.h
> > index f8ab3ea..aea1d1d 100644
> > --- a/arch/x86/include/asm/processor-flags.h
> > +++ b/arch/x86/include/asm/processor-flags.h
> > @@ -44,6 +44,7 @@
> >   */
> >  #define X86_CR3_PWT	0x00000008 /* Page Write Through */
> >  #define X86_CR3_PCD	0x00000010 /* Page Cache Disable */
> > +#define X86_CR3_PCID_MASK 0x00000fff /* PCID Mask */
> >
> >  /*
> >   * Intel CPU features in CR4
> > @@ -61,6 +62,7 @@
> >  #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE
> exceptions */
> >  #define X86_CR4_VMXE	0x00002000 /* enable VMX virtualization */
> >  #define X86_CR4_RDWRGSFS 0x00010000 /* enable RDWRGSFS support */
> > +#define X86_CR4_PCIDE	0x00020000 /* enable PCID support */
> >  #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */
> >  #define X86_CR4_SMEP	0x00100000 /* enable SMEP support */
> >
> > diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> > index 31f180c..b81525c 100644
> > --- a/arch/x86/include/asm/vmx.h
> > +++ b/arch/x86/include/asm/vmx.h
> > @@ -60,6 +60,7 @@
> >  #define SECONDARY_EXEC_WBINVD_EXITING		0x00000040
> >  #define SECONDARY_EXEC_UNRESTRICTED_GUEST	0x00000080
> >  #define SECONDARY_EXEC_PAUSE_LOOP_EXITING	0x00000400
> > +#define SECONDARY_EXEC_ENABLE_INVPCID		0x00001000
> >
> >
> >  #define PIN_BASED_EXT_INTR_MASK                 0x00000001
> > @@ -281,6 +282,7 @@ enum vmcs_field {
> >  #define EXIT_REASON_EPT_MISCONFIG       49
> >  #define EXIT_REASON_WBINVD		54
> >  #define EXIT_REASON_XSETBV		55
> > +#define EXIT_REASON_INVPCID		58
> >
> >  /*
> >   * Interruption-information format
> > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index
> > 7df1c6d..d13408a 100644
> > --- a/arch/x86/kvm/cpuid.c
> > +++ b/arch/x86/kvm/cpuid.c
> > @@ -201,6 +201,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2
> *entry, u32 function,
> >  	unsigned f_lm = 0;
> >  #endif
> >  	unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
> > +	unsigned f_pcid = boot_cpu_has(X86_FEATURE_PCID) ? F(PCID) : 0;
> > +	unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) :
> > +0;
> >
> >  	/* cpuid 1.edx */
> >  	const u32 kvm_supported_word0_x86_features = @@ -228,7 +230,7 @@
> > static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
> >  		0 /* DS-CPL, VMX, SMX, EST */ |
> >  		0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
> >  		F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
> > -		0 /* Reserved, DCA */ | F(XMM4_1) |
> > +		f_pcid | 0 /* Reserved, DCA */ | F(XMM4_1) |
> >  		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
> >  		0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
> >  		F(F16C) | F(RDRAND);
> > @@ -248,7 +250,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2
> *entry, u32 function,
> >  	/* cpuid 7.0.ebx */
> >  	const u32 kvm_supported_word9_x86_features =
> >  		F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
> > -		F(BMI2) | F(ERMS) | F(RTM);
> > +		F(BMI2) | F(ERMS) | f_invpcid | F(RTM);
> >
> >  	/* all calls to cpuid_count() should be made on the same cpu */
> >  	get_cpu();
> > diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index
> > 26d1fb4..e531d39 100644
> > --- a/arch/x86/kvm/cpuid.h
> > +++ b/arch/x86/kvm/cpuid.h
> > @@ -51,4 +51,12 @@ static inline bool guest_cpuid_has_osvw(struct
> kvm_vcpu *vcpu)
> >  	return best && (best->ecx & bit(X86_FEATURE_OSVW));  }
> >
> > +static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu) {
> > +	struct kvm_cpuid_entry2 *best;
> > +
> > +	best = kvm_find_cpuid_entry(vcpu, 1, 0);
> > +	return best && (best->ecx & bit(X86_FEATURE_PCID)); }
> > +
> >  #endif
> > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index
> > f75af40..81ed0ba 100644
> > --- a/arch/x86/kvm/svm.c
> > +++ b/arch/x86/kvm/svm.c
> > @@ -4044,6 +4044,11 @@ static bool svm_rdtscp_supported(void)
> >  	return false;
> >  }
> >
> > +static bool svm_invpcid_supported(void) {
> > +	return false;
> > +}
> > +
> >  static bool svm_has_wbinvd_exit(void)  {
> >  	return true;
> > @@ -4312,6 +4317,7 @@ static struct kvm_x86_ops svm_x86_ops = {
> >  	.cpuid_update = svm_cpuid_update,
> >
> >  	.rdtscp_supported = svm_rdtscp_supported,
> > +	.invpcid_supported = svm_invpcid_supported,
> >
> >  	.set_supported_cpuid = svm_set_supported_cpuid,
> >
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index
> > 32eb588..21760b9 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -849,6 +849,12 @@ static inline bool cpu_has_vmx_rdtscp(void)
> >  		SECONDARY_EXEC_RDTSCP;
> >  }
> >
> > +static inline bool cpu_has_vmx_invpcid(void) {
> > +	return vmcs_config.cpu_based_2nd_exec_ctrl &
> > +		SECONDARY_EXEC_ENABLE_INVPCID;
> > +}
> > +
> >  static inline bool cpu_has_virtual_nmis(void)  {
> >  	return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
> @@
> > -1739,6 +1745,11 @@ static bool vmx_rdtscp_supported(void)
> >  	return cpu_has_vmx_rdtscp();
> >  }
> >
> > +static bool vmx_invpcid_supported(void) {
> > +	return cpu_has_vmx_invpcid();
> > +}
> > +
> >  /*
> >   * Swap MSR entry in host/guest MSR entry array.
> >   */
> > @@ -2458,7 +2469,8 @@ static __init int setup_vmcs_config(struct
> vmcs_config *vmcs_conf)
> >  			SECONDARY_EXEC_ENABLE_EPT |
> >  			SECONDARY_EXEC_UNRESTRICTED_GUEST |
> >  			SECONDARY_EXEC_PAUSE_LOOP_EXITING |
> > -			SECONDARY_EXEC_RDTSCP;
> > +			SECONDARY_EXEC_RDTSCP |
> > +			SECONDARY_EXEC_ENABLE_INVPCID;
> >  		if (adjust_vmx_controls(min2, opt2,
> >  					MSR_IA32_VMX_PROCBASED_CTLS2,
> >  					&_cpu_based_2nd_exec_control) < 0) @@ -3731,6
> +3743,8 @@ static
> > u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
> >  	if (!enable_ept) {
> >  		exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
> >  		enable_unrestricted_guest = 0;
> > +		/* Enable INVPCID for non-ept guests may cause performance
> regression. */
> > +		exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> >  	}
> >  	if (!enable_unrestricted_guest)
> >  		exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
> > @@ -6467,6 +6481,23 @@ static void vmx_cpuid_update(struct kvm_vcpu
> *vcpu)
> >  			}
> >  		}
> >  	}
> > +
> > +	if (vmx_invpcid_supported()) {
> > +		exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
> > +		/* Exposing INVPCID only when PCID is exposed */
> > +		best = kvm_find_cpuid_entry(vcpu, 0x7, 0);
> > +		if (best && (best->ecx & bit(X86_FEATURE_INVPCID)) &&
> guest_cpuid_has_pcid(vcpu)) {
> > +			exec_control |= SECONDARY_EXEC_ENABLE_INVPCID;
> > +			vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
> > +				     exec_control);
> > +		} else {
> > +			exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> > +			vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
> > +				     exec_control);
> > +			if (best)
> > +				best->ecx &= ~bit(X86_FEATURE_INVPCID);
> > +		}
> > +	}
> >  }
> >
> >  static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2
> > *entry) @@ -6610,6 +6641,9 @@ static void prepare_vmcs02(struct
> kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
> >  				  page_to_phys(vmx->nested.apic_access_page));
> >  		}
> >
> > +		/* Explicitly disable INVPCID until PCID for L2 guest is supported */
> > +		exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> > +
> >  		vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
> >  	}
> >
> > @@ -7201,6 +7235,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
> >  	.cpuid_update = vmx_cpuid_update,
> >
> >  	.rdtscp_supported = vmx_rdtscp_supported,
> > +	.invpcid_supported = vmx_invpcid_supported,
> >
> >  	.set_supported_cpuid = vmx_set_supported_cpuid,
> >
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index
> > be6d549..3a66d7b 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -528,6 +528,10 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned
> long cr0)
> >  			return 1;
> >  	}
> >
> > +	if ((old_cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PG) &&
> > +	    kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
> > +		return 1;
> > +
> >  	kvm_x86_ops->set_cr0(vcpu, cr0);
> >
> >  	if ((cr0 ^ old_cr0) & X86_CR0_PG) {
> > @@ -604,10 +608,20 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned
> long cr4)
> >  				   kvm_read_cr3(vcpu)))
> >  		return 1;
> >
> > +	if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
> > +		if (!guest_cpuid_has_pcid(vcpu))
> > +			return 1;
> > +
> > +		/* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */
> > +		if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK)
> || !is_long_mode(vcpu))
> > +			return 1;
> > +	}
> > +
> >  	if (kvm_x86_ops->set_cr4(vcpu, cr4))
> >  		return 1;
> >
> > -	if ((cr4 ^ old_cr4) & pdptr_bits)
> > +	if (((cr4 ^ old_cr4) & pdptr_bits) ||
> > +	    (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
> >  		kvm_mmu_reset_context(vcpu);
> >
> >  	if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) @@ -626,8 +640,12 @@ int
> > kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
> >  	}
> >
> >  	if (is_long_mode(vcpu)) {
> > -		if (cr3 & CR3_L_MODE_RESERVED_BITS)
> > -			return 1;
> > +		if (kvm_read_cr4(vcpu) & X86_CR4_PCIDE) {
> > +			if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS)
> > +				return 1;
> > +		} else
> > +			if (cr3 & CR3_L_MODE_RESERVED_BITS)
> > +				return 1;
> >  	} else {
> >  		if (is_pae(vcpu)) {
> >  			if (cr3 & CR3_PAE_RESERVED_BITS)
> > --
> > 1.7.1
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in the
> > body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at
> > http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux