This patch handles PCID/INVPCID for guests. Process-context identifiers (PCIDs) are a facility by which a logical processor may cache information for multiple linear-address spaces so that the processor may retain cached information when software switches to a different linear address space. Refer to section 4.10.1 in IA32 Intel Software Developer's Manual Volume 3A for details. For guests with EPT, the PCID feature is enabled and INVPCID behaves as running natively. For guests without EPT, the PCID feature is disabled and INVPCID triggers #UD. Changes from v1: Move cr0/cr4 writing checks to x86.c Update comments for the reason why PCID is disabled for non-EPT guests Do not support PCID/INVPCID for nested guests at present Clean up useless symbols Signed-off-by: Junjie Mao <junjie.mao@xxxxxxxxx> --- arch/x86/include/asm/cpufeature.h | 1 + arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/include/asm/processor-flags.h | 2 ++ arch/x86/include/asm/vmx.h | 2 ++ arch/x86/kvm/cpuid.c | 6 ++++-- arch/x86/kvm/cpuid.h | 8 ++++++++ arch/x86/kvm/svm.c | 6 ++++++ arch/x86/kvm/vmx.c | 29 +++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 16 +++++++++++++++- 9 files changed, 69 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 8d67d42..1aedbc0 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -203,6 +203,7 @@ #define X86_FEATURE_SMEP (9*32+ 7) /* Supervisor Mode Execution Protection */ #define X86_FEATURE_BMI2 (9*32+ 8) /* 2nd group bit manipulation extensions */ #define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ +#define X86_FEATURE_INVPCID (9*32+10) /* INVPCID instruction */ #if defined(__KERNEL__) && !defined(__ASSEMBLY__) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 74c9edf..bb9a707 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -52,7 +52,7 @@ #define CR4_RESERVED_BITS \ (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ - | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ + | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_RDWRGSFS \ | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) @@ -660,6 +660,7 @@ struct kvm_x86_ops { u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); int (*get_lpage_level)(void); bool (*rdtscp_supported)(void); + bool (*pcid_supported)(void); void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment, bool host); void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index f8ab3ea..aea1d1d 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h @@ -44,6 +44,7 @@ */ #define X86_CR3_PWT 0x00000008 /* Page Write Through */ #define X86_CR3_PCD 0x00000010 /* Page Cache Disable */ +#define X86_CR3_PCID_MASK 0x00000fff /* PCID Mask */ /* * Intel CPU features in CR4 @@ -61,6 +62,7 @@ #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */ #define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */ #define X86_CR4_RDWRGSFS 0x00010000 /* enable RDWRGSFS support */ +#define X86_CR4_PCIDE 0x00020000 /* enable PCID support */ #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */ #define X86_CR4_SMEP 0x00100000 /* enable SMEP support */ diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 31f180c..b81525c 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -60,6 +60,7 @@ #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 +#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 #define PIN_BASED_EXT_INTR_MASK 0x00000001 @@ -281,6 +282,7 @@ enum vmcs_field { #define EXIT_REASON_EPT_MISCONFIG 49 #define EXIT_REASON_WBINVD 54 #define EXIT_REASON_XSETBV 55 +#define EXIT_REASON_INVPCID 58 /* * Interruption-information format diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 9fed5be..8d4a361 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -201,6 +201,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, unsigned f_lm = 0; #endif unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; + unsigned f_pcid = kvm_x86_ops->pcid_supported() ? F(PCID) : 0; + unsigned f_invpcid = kvm_x86_ops->pcid_supported() ? F(INVPCID) : 0; /* cpuid 1.edx */ const u32 kvm_supported_word0_x86_features = @@ -228,7 +230,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, 0 /* DS-CPL, VMX, SMX, EST */ | 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ | - 0 /* Reserved, DCA */ | F(XMM4_1) | + f_pcid | 0 /* Reserved, DCA */ | F(XMM4_1) | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | F(F16C) | F(RDRAND); @@ -247,7 +249,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.ebx */ const u32 kvm_supported_word9_x86_features = - F(FSGSBASE) | F(BMI1) | F(AVX2) | F(SMEP) | F(BMI2) | F(ERMS); + F(FSGSBASE) | F(BMI1) | F(AVX2) | F(SMEP) | F(BMI2) | F(ERMS) | f_invpcid; /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 26d1fb4..e531d39 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -51,4 +51,12 @@ static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu) return best && (best->ecx & bit(X86_FEATURE_OSVW)); } +static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 1, 0); + return best && (best->ecx & bit(X86_FEATURE_PCID)); +} + #endif diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 0b7690e..42726cf 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4012,6 +4012,11 @@ static bool svm_rdtscp_supported(void) return false; } +static bool svm_pcid_supported(void) +{ + return false; +} + static bool svm_has_wbinvd_exit(void) { return true; @@ -4280,6 +4285,7 @@ static struct kvm_x86_ops svm_x86_ops = { .cpuid_update = svm_cpuid_update, .rdtscp_supported = svm_rdtscp_supported, + .pcid_supported = svm_pcid_supported, .set_supported_cpuid = svm_set_supported_cpuid, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d2bd719..9c77711 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1711,6 +1711,18 @@ static bool vmx_rdtscp_supported(void) return cpu_has_vmx_rdtscp(); } +static bool vmx_pcid_supported(void) +{ + /* + * Enable INVPCID for non-ept guests may cause performance regression, + * and without INVPCID, PCID has little benefits. So disable them all + * for non-ept guests. + * + * PCID is not supported for nested guests yet. + */ + return enable_ept && (boot_cpu_data.x86_capability[4] & bit(X86_FEATURE_PCID)) && !cpu_has_hypervisor; +} + /* * Swap MSR entry in host/guest MSR entry array. */ @@ -2425,6 +2437,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) SECONDARY_EXEC_UNRESTRICTED_GUEST | SECONDARY_EXEC_PAUSE_LOOP_EXITING | SECONDARY_EXEC_RDTSCP; + if (vmx_pcid_supported()) + opt2 |= SECONDARY_EXEC_ENABLE_INVPCID; if (adjust_vmx_controls(min2, opt2, MSR_IA32_VMX_PROCBASED_CTLS2, &_cpu_based_2nd_exec_control) < 0) @@ -6420,6 +6434,20 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) } } } + + if (vmx_pcid_supported()) { + best = kvm_find_cpuid_entry(vcpu, 0x1, 0); + if (!best || !(best->ecx & bit(X86_FEATURE_PCID))) { + /* Hiding INVPCID when PCID is not exposed. */ + exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); + exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID; + vmcs_write32(SECONDARY_VM_EXEC_CONTROL, + exec_control); + best = kvm_find_cpuid_entry(vcpu, 0x7, 0); + if (best) + best->ecx &= ~bit(X86_FEATURE_INVPCID); + } + } } static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) @@ -7154,6 +7182,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .cpuid_update = vmx_cpuid_update, .rdtscp_supported = vmx_rdtscp_supported, + .pcid_supported = vmx_pcid_supported, .set_supported_cpuid = vmx_set_supported_cpuid, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c9d99e5..f930597 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -527,6 +527,10 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) return 1; } + if ((old_cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PG) && + kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)) + return 1; + kvm_x86_ops->set_cr0(vcpu, cr0); if ((cr0 ^ old_cr0) & X86_CR0_PG) { @@ -603,10 +607,20 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) kvm_read_cr3(vcpu))) return 1; + if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) { + if (!guest_cpuid_has_pcid(vcpu)) + return 1; + + /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */ + if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu)) + return 1; + } + if (kvm_x86_ops->set_cr4(vcpu, cr4)) return 1; - if ((cr4 ^ old_cr4) & pdptr_bits) + if (((cr4 ^ old_cr4) & pdptr_bits) || + (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) kvm_mmu_reset_context(vcpu); if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html