On Mon, May 21, 2012 at 12:40:42AM +0000, Mao, Junjie wrote: > > -----Original Message----- > > From: Marcelo Tosatti [mailto:mtosatti@xxxxxxxxxx] > > Sent: Saturday, May 19, 2012 5:51 AM > > To: Mao, Junjie > > Cc: nyh@xxxxxxxxxxxxxxxxxxx; 'kvm@xxxxxxxxxxxxxxx' > > Subject: Re: [PATCH v3] KVM: x86: Implement PCID/INVPCID for guests with > > EPT > > > > On Fri, May 18, 2012 at 06:17:05AM +0000, Mao, Junjie wrote: > > > This patch handles PCID/INVPCID for guests. > > > > > > Process-context identifiers (PCIDs) are a facility by which a logical > > > processor may cache information for multiple linear-address spaces so > > > that the processor may retain cached information when software > > > switches to a different linear address space. Refer to section 4.10.1 > > > in IA32 Intel Software Developer's Manual Volume 3A for details. > > > > > > For guests with EPT, the PCID feature is enabled and INVPCID behaves > > > as running natively. > > > For guests without EPT, the PCID feature is disabled and INVPCID triggers > > #UD. > > > > > > Changes from v2: > > > Seperate management of PCID and INVPCID > > > Prevent PCID bit in CPUID from exposing on guest hypervisors > > > Don't check the lower 12 bits when loading cr3 if cr4.PCIDE is set > > > Explicitly disable INVPCID for L2 guests > > > Support both enable and disable INVPCID in vmx_cpuid_update() > > > > > > Changes from v1: > > > Move cr0/cr4 writing checks to x86.c > > > Update comments for the reason why PCID is disabled for non-EPT guests > > > Do not support PCID/INVPCID for nested guests at present > > > Clean up useless symbols > > > > > > Signed-off-by: Junjie Mao <junjie.mao@xxxxxxxxx> > > > --- > > > arch/x86/include/asm/cpufeature.h | 1 + > > > arch/x86/include/asm/kvm_host.h | 5 ++- > > > arch/x86/include/asm/processor-flags.h | 2 + > > > arch/x86/include/asm/vmx.h | 2 + > > > arch/x86/kvm/cpuid.c | 6 ++- > > > arch/x86/kvm/cpuid.h | 8 +++++ > > > arch/x86/kvm/svm.c | 12 ++++++++ > > > arch/x86/kvm/vmx.c | 49 > > +++++++++++++++++++++++++++++++- > > > arch/x86/kvm/x86.c | 24 +++++++++++++-- > > > 9 files changed, 102 insertions(+), 7 deletions(-) > > > > > > diff --git a/arch/x86/include/asm/cpufeature.h > > > b/arch/x86/include/asm/cpufeature.h > > > index 8d67d42..1aedbc0 100644 > > > --- a/arch/x86/include/asm/cpufeature.h > > > +++ b/arch/x86/include/asm/cpufeature.h > > > @@ -203,6 +203,7 @@ > > > #define X86_FEATURE_SMEP (9*32+ 7) /* Supervisor Mode Execution > > Protection */ > > > #define X86_FEATURE_BMI2 (9*32+ 8) /* 2nd group bit manipulation > > extensions */ > > > #define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ > > > +#define X86_FEATURE_INVPCID (9*32+10) /* INVPCID instruction */ > > > > > > #if defined(__KERNEL__) && !defined(__ASSEMBLY__) > > > > > > diff --git a/arch/x86/include/asm/kvm_host.h > > > b/arch/x86/include/asm/kvm_host.h index 74c9edf..2c250e6 100644 > > > --- a/arch/x86/include/asm/kvm_host.h > > > +++ b/arch/x86/include/asm/kvm_host.h > > > @@ -47,12 +47,13 @@ > > > > > > #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) > > > #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | > > > X86_CR3_PCD)) > > > +#define CR3_PCID_ENABLED_RESERVED_BITS 0xFFFFFF0000000000ULL > > > #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | > > \ > > > 0xFFFFFF0000000000ULL) > > > #define CR4_RESERVED_BITS > > \ > > > (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | > > X86_CR4_DE\ > > > | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ > > > - | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ > > > + | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | > > X86_CR4_PCIDE \ > > > | X86_CR4_OSXSAVE | X86_CR4_SMEP | > > X86_CR4_RDWRGSFS \ > > > | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) > > > > > > @@ -660,6 +661,8 @@ struct kvm_x86_ops { > > > u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); > > > int (*get_lpage_level)(void); > > > bool (*rdtscp_supported)(void); > > > + bool (*pcid_supported)(void); > > > + bool (*invpcid_supported)(void); > > > void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment, > > > bool host); > > > > > > void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); diff > > > --git a/arch/x86/include/asm/processor-flags.h > > > b/arch/x86/include/asm/processor-flags.h > > > index f8ab3ea..aea1d1d 100644 > > > --- a/arch/x86/include/asm/processor-flags.h > > > +++ b/arch/x86/include/asm/processor-flags.h > > > @@ -44,6 +44,7 @@ > > > */ > > > #define X86_CR3_PWT 0x00000008 /* Page Write Through */ > > > #define X86_CR3_PCD 0x00000010 /* Page Cache Disable */ > > > +#define X86_CR3_PCID_MASK 0x00000fff /* PCID Mask */ > > > > > > /* > > > * Intel CPU features in CR4 > > > @@ -61,6 +62,7 @@ > > > #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE > > exceptions */ > > > #define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */ > > > #define X86_CR4_RDWRGSFS 0x00010000 /* enable RDWRGSFS support */ > > > +#define X86_CR4_PCIDE 0x00020000 /* enable PCID support */ > > > #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */ > > > #define X86_CR4_SMEP 0x00100000 /* enable SMEP support */ > > > > > > diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h > > > index 31f180c..b81525c 100644 > > > --- a/arch/x86/include/asm/vmx.h > > > +++ b/arch/x86/include/asm/vmx.h > > > @@ -60,6 +60,7 @@ > > > #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 > > > #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 > > > #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 > > > +#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 > > > > > > > > > #define PIN_BASED_EXT_INTR_MASK 0x00000001 > > > @@ -281,6 +282,7 @@ enum vmcs_field { > > > #define EXIT_REASON_EPT_MISCONFIG 49 > > > #define EXIT_REASON_WBINVD 54 > > > #define EXIT_REASON_XSETBV 55 > > > +#define EXIT_REASON_INVPCID 58 > > > > > > /* > > > * Interruption-information format > > > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index > > > 9fed5be..fd0b6b9 100644 > > > --- a/arch/x86/kvm/cpuid.c > > > +++ b/arch/x86/kvm/cpuid.c > > > @@ -201,6 +201,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 > > *entry, u32 function, > > > unsigned f_lm = 0; > > > #endif > > > unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; > > > + unsigned f_pcid = kvm_x86_ops->pcid_supported() ? F(PCID) : 0; > > > + unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : > > > +0; > > > > > > /* cpuid 1.edx */ > > > const u32 kvm_supported_word0_x86_features = @@ -228,7 +230,7 @@ > > > static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, > > > 0 /* DS-CPL, VMX, SMX, EST */ | > > > 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | > > > F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ | > > > - 0 /* Reserved, DCA */ | F(XMM4_1) | > > > + f_pcid | 0 /* Reserved, DCA */ | F(XMM4_1) | > > > F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | > > > 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | > > > F(F16C) | F(RDRAND); > > > @@ -247,7 +249,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 > > > *entry, u32 function, > > > > > > /* cpuid 7.0.ebx */ > > > const u32 kvm_supported_word9_x86_features = > > > - F(FSGSBASE) | F(BMI1) | F(AVX2) | F(SMEP) | F(BMI2) | F(ERMS); > > > + F(FSGSBASE) | F(BMI1) | F(AVX2) | F(SMEP) | F(BMI2) | F(ERMS) | > > > +f_invpcid; > > > > > > /* all calls to cpuid_count() should be made on the same cpu */ > > > get_cpu(); > > > diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index > > > 26d1fb4..e531d39 100644 > > > --- a/arch/x86/kvm/cpuid.h > > > +++ b/arch/x86/kvm/cpuid.h > > > @@ -51,4 +51,12 @@ static inline bool guest_cpuid_has_osvw(struct > > kvm_vcpu *vcpu) > > > return best && (best->ecx & bit(X86_FEATURE_OSVW)); } > > > > > > +static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu) { > > > + struct kvm_cpuid_entry2 *best; > > > + > > > + best = kvm_find_cpuid_entry(vcpu, 1, 0); > > > + return best && (best->ecx & bit(X86_FEATURE_PCID)); } > > > + > > > #endif > > > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index > > > 0b7690e..d42fcbe 100644 > > > --- a/arch/x86/kvm/svm.c > > > +++ b/arch/x86/kvm/svm.c > > > @@ -4012,6 +4012,16 @@ static bool svm_rdtscp_supported(void) > > > return false; > > > } > > > > > > +static bool svm_pcid_supported(void) > > > +{ > > > + return false; > > > +} > > > + > > > +static bool svm_invpcid_supported(void) { > > > + return false; > > > +} > > > + > > > static bool svm_has_wbinvd_exit(void) { > > > return true; > > > @@ -4280,6 +4290,8 @@ static struct kvm_x86_ops svm_x86_ops = { > > > .cpuid_update = svm_cpuid_update, > > > > > > .rdtscp_supported = svm_rdtscp_supported, > > > + .pcid_supported = svm_pcid_supported, > > > + .invpcid_supported = svm_invpcid_supported, > > > > > > .set_supported_cpuid = svm_set_supported_cpuid, > > > > > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index > > > d2bd719..1408d0b 100644 > > > --- a/arch/x86/kvm/vmx.c > > > +++ b/arch/x86/kvm/vmx.c > > > @@ -839,6 +839,12 @@ static inline bool cpu_has_vmx_rdtscp(void) > > > SECONDARY_EXEC_RDTSCP; > > > } > > > > > > +static inline bool cpu_has_vmx_invpcid(void) { > > > + return vmcs_config.cpu_based_2nd_exec_ctrl & > > > + SECONDARY_EXEC_ENABLE_INVPCID; > > > +} > > > + > > > static inline bool cpu_has_virtual_nmis(void) { > > > return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; > > @@ > > > -1711,6 +1717,22 @@ static bool vmx_rdtscp_supported(void) > > > return cpu_has_vmx_rdtscp(); > > > } > > > > > > +static bool vmx_pcid_supported(void) > > > +{ > > > + /* > > > + * This function is only for determining whether to expose PCID bit > > > + * in KVM_GET_SUPPORTED_CPUID ioctl. The HYPERVISOR bit is used to > > > + * prevent guest hypervisors from exposing it as PCID is currently > > > + * not supported on L2 guests. > > > + */ > > > + return (boot_cpu_has(X86_FEATURE_PCID) && !cpu_has_hypervisor); } > > > > This is not necessary: KVM will not expose secondary-exec-enable-invpcid > > support to the L1 guest, and so L1 guest will not attempt to expose PCID > > support to L2 guest. > > > > On a platform with PCID but without INVPCID, PCID will be exposed when both L1 and L2 are running with '-cpu host' if the '!cpu_has_hypervisor' check doesn't exist. Lacking of INVPCID doesn't prevent PCID from being exposed. Is it correct to expose PCID when INVPCID-EXITING is not supported ? That is, if INVPCID-EXITING is not supported by VMX, it should not expose PCID (because INVPCID is not emulated, and as you mentioned emulating it would be slow), no? The problem with cpu_has_hypervisor check is that its Linux specific. Any solution should also take into account other OSes running as L1 guest and virtualizing L2 guest. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html