On Thu, Jun 14, 2012 at 02:04:25AM +0000, Mao, Junjie wrote: > This patch handles PCID/INVPCID for guests. > > Process-context identifiers (PCIDs) are a facility by which a logical processor > may cache information for multiple linear-address spaces so that the processor > may retain cached information when software switches to a different linear > address space. Refer to section 4.10.1 in IA32 Intel Software Developer's Manual > Volume 3A for details. > > For guests with EPT, the PCID feature is enabled and INVPCID behaves as running > natively. > For guests without EPT, the PCID feature is disabled and INVPCID triggers #UD. > > Changes from v3: > Rebase to the latest tree > Expose PCID to nested guests > Remove the pcid_supported callback > > Changes from v2: > Seperate management of PCID and INVPCID > Prevent PCID bit in CPUID from exposing on guest hypervisors > Don't check the lower 12 bits when loading cr3 if cr4.PCIDE is set > Explicitly disable INVPCID for L2 guests > Support both enable and disable INVPCID in vmx_cpuid_update() > > Changes from v1: > Move cr0/cr4 writing checks to x86.c > Update comments for the reason why PCID is disabled for non-EPT guests > Do not support PCID/INVPCID for nested guests at present > Clean up useless symbols > > Signed-off-by: Junjie Mao <junjie.mao@xxxxxxxxx> Looks good to me. > --- > arch/x86/include/asm/kvm_host.h | 4 ++- > arch/x86/include/asm/processor-flags.h | 2 + > arch/x86/include/asm/vmx.h | 2 + > arch/x86/kvm/cpuid.c | 6 +++- > arch/x86/kvm/cpuid.h | 8 +++++++ > arch/x86/kvm/svm.c | 6 +++++ > arch/x86/kvm/vmx.c | 37 +++++++++++++++++++++++++++++++- > arch/x86/kvm/x86.c | 24 ++++++++++++++++++-- > 8 files changed, 82 insertions(+), 7 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index db7c1f2..95828a4 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -48,12 +48,13 @@ > > #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) > #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) > +#define CR3_PCID_ENABLED_RESERVED_BITS 0xFFFFFF0000000000ULL > #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ > 0xFFFFFF0000000000ULL) > #define CR4_RESERVED_BITS \ > (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ > | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ > - | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ > + | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ > | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_RDWRGSFS \ > | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) > > @@ -661,6 +662,7 @@ struct kvm_x86_ops { > u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); > int (*get_lpage_level)(void); > bool (*rdtscp_supported)(void); > + bool (*invpcid_supported)(void); > void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment, bool host); > > void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); > diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h > index f8ab3ea..aea1d1d 100644 > --- a/arch/x86/include/asm/processor-flags.h > +++ b/arch/x86/include/asm/processor-flags.h > @@ -44,6 +44,7 @@ > */ > #define X86_CR3_PWT 0x00000008 /* Page Write Through */ > #define X86_CR3_PCD 0x00000010 /* Page Cache Disable */ > +#define X86_CR3_PCID_MASK 0x00000fff /* PCID Mask */ > > /* > * Intel CPU features in CR4 > @@ -61,6 +62,7 @@ > #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */ > #define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */ > #define X86_CR4_RDWRGSFS 0x00010000 /* enable RDWRGSFS support */ > +#define X86_CR4_PCIDE 0x00020000 /* enable PCID support */ > #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */ > #define X86_CR4_SMEP 0x00100000 /* enable SMEP support */ > > diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h > index 31f180c..b81525c 100644 > --- a/arch/x86/include/asm/vmx.h > +++ b/arch/x86/include/asm/vmx.h > @@ -60,6 +60,7 @@ > #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 > #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 > #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 > +#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 > > > #define PIN_BASED_EXT_INTR_MASK 0x00000001 > @@ -281,6 +282,7 @@ enum vmcs_field { > #define EXIT_REASON_EPT_MISCONFIG 49 > #define EXIT_REASON_WBINVD 54 > #define EXIT_REASON_XSETBV 55 > +#define EXIT_REASON_INVPCID 58 > > /* > * Interruption-information format > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c > index 7df1c6d..d13408a 100644 > --- a/arch/x86/kvm/cpuid.c > +++ b/arch/x86/kvm/cpuid.c > @@ -201,6 +201,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, > unsigned f_lm = 0; > #endif > unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; > + unsigned f_pcid = boot_cpu_has(X86_FEATURE_PCID) ? F(PCID) : 0; > + unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0; > > /* cpuid 1.edx */ > const u32 kvm_supported_word0_x86_features = > @@ -228,7 +230,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, > 0 /* DS-CPL, VMX, SMX, EST */ | > 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | > F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ | > - 0 /* Reserved, DCA */ | F(XMM4_1) | > + f_pcid | 0 /* Reserved, DCA */ | F(XMM4_1) | > F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | > 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | > F(F16C) | F(RDRAND); > @@ -248,7 +250,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, > /* cpuid 7.0.ebx */ > const u32 kvm_supported_word9_x86_features = > F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | > - F(BMI2) | F(ERMS) | F(RTM); > + F(BMI2) | F(ERMS) | f_invpcid | F(RTM); > > /* all calls to cpuid_count() should be made on the same cpu */ > get_cpu(); > diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h > index 26d1fb4..e531d39 100644 > --- a/arch/x86/kvm/cpuid.h > +++ b/arch/x86/kvm/cpuid.h > @@ -51,4 +51,12 @@ static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu) > return best && (best->ecx & bit(X86_FEATURE_OSVW)); > } > > +static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu) > +{ > + struct kvm_cpuid_entry2 *best; > + > + best = kvm_find_cpuid_entry(vcpu, 1, 0); > + return best && (best->ecx & bit(X86_FEATURE_PCID)); > +} > + > #endif > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index f75af40..81ed0ba 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -4044,6 +4044,11 @@ static bool svm_rdtscp_supported(void) > return false; > } > > +static bool svm_invpcid_supported(void) > +{ > + return false; > +} > + > static bool svm_has_wbinvd_exit(void) > { > return true; > @@ -4312,6 +4317,7 @@ static struct kvm_x86_ops svm_x86_ops = { > .cpuid_update = svm_cpuid_update, > > .rdtscp_supported = svm_rdtscp_supported, > + .invpcid_supported = svm_invpcid_supported, > > .set_supported_cpuid = svm_set_supported_cpuid, > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 32eb588..21760b9 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -849,6 +849,12 @@ static inline bool cpu_has_vmx_rdtscp(void) > SECONDARY_EXEC_RDTSCP; > } > > +static inline bool cpu_has_vmx_invpcid(void) > +{ > + return vmcs_config.cpu_based_2nd_exec_ctrl & > + SECONDARY_EXEC_ENABLE_INVPCID; > +} > + > static inline bool cpu_has_virtual_nmis(void) > { > return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; > @@ -1739,6 +1745,11 @@ static bool vmx_rdtscp_supported(void) > return cpu_has_vmx_rdtscp(); > } > > +static bool vmx_invpcid_supported(void) > +{ > + return cpu_has_vmx_invpcid(); > +} > + > /* > * Swap MSR entry in host/guest MSR entry array. > */ > @@ -2458,7 +2469,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) > SECONDARY_EXEC_ENABLE_EPT | > SECONDARY_EXEC_UNRESTRICTED_GUEST | > SECONDARY_EXEC_PAUSE_LOOP_EXITING | > - SECONDARY_EXEC_RDTSCP; > + SECONDARY_EXEC_RDTSCP | > + SECONDARY_EXEC_ENABLE_INVPCID; > if (adjust_vmx_controls(min2, opt2, > MSR_IA32_VMX_PROCBASED_CTLS2, > &_cpu_based_2nd_exec_control) < 0) > @@ -3731,6 +3743,8 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) > if (!enable_ept) { > exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; > enable_unrestricted_guest = 0; > + /* Enable INVPCID for non-ept guests may cause performance regression. */ > + exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID; > } > if (!enable_unrestricted_guest) > exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; > @@ -6467,6 +6481,23 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) > } > } > } > + > + if (vmx_invpcid_supported()) { > + exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); > + /* Exposing INVPCID only when PCID is exposed */ > + best = kvm_find_cpuid_entry(vcpu, 0x7, 0); > + if (best && (best->ecx & bit(X86_FEATURE_INVPCID)) && guest_cpuid_has_pcid(vcpu)) { > + exec_control |= SECONDARY_EXEC_ENABLE_INVPCID; > + vmcs_write32(SECONDARY_VM_EXEC_CONTROL, > + exec_control); > + } else { > + exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID; > + vmcs_write32(SECONDARY_VM_EXEC_CONTROL, > + exec_control); > + if (best) > + best->ecx &= ~bit(X86_FEATURE_INVPCID); > + } > + } > } > > static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) > @@ -6610,6 +6641,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) > page_to_phys(vmx->nested.apic_access_page)); > } > > + /* Explicitly disable INVPCID until PCID for L2 guest is supported */ > + exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID; > + > vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); > } > > @@ -7201,6 +7235,7 @@ static struct kvm_x86_ops vmx_x86_ops = { > .cpuid_update = vmx_cpuid_update, > > .rdtscp_supported = vmx_rdtscp_supported, > + .invpcid_supported = vmx_invpcid_supported, > > .set_supported_cpuid = vmx_set_supported_cpuid, > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index be6d549..3a66d7b 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -528,6 +528,10 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) > return 1; > } > > + if ((old_cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PG) && > + kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)) > + return 1; > + > kvm_x86_ops->set_cr0(vcpu, cr0); > > if ((cr0 ^ old_cr0) & X86_CR0_PG) { > @@ -604,10 +608,20 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) > kvm_read_cr3(vcpu))) > return 1; > > + if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) { > + if (!guest_cpuid_has_pcid(vcpu)) > + return 1; > + > + /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */ > + if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu)) > + return 1; > + } > + > if (kvm_x86_ops->set_cr4(vcpu, cr4)) > return 1; > > - if ((cr4 ^ old_cr4) & pdptr_bits) > + if (((cr4 ^ old_cr4) & pdptr_bits) || > + (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) > kvm_mmu_reset_context(vcpu); > > if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) > @@ -626,8 +640,12 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) > } > > if (is_long_mode(vcpu)) { > - if (cr3 & CR3_L_MODE_RESERVED_BITS) > - return 1; > + if (kvm_read_cr4(vcpu) & X86_CR4_PCIDE) { > + if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS) > + return 1; > + } else > + if (cr3 & CR3_L_MODE_RESERVED_BITS) > + return 1; > } else { > if (is_pae(vcpu)) { > if (cr3 & CR3_PAE_RESERVED_BITS) > -- > 1.7.1 > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html