On Thu, Jun 11, 2020 at 2:48 PM Babu Moger <babu.moger@xxxxxxx> wrote: > > The following intercept is added for INVPCID instruction: > Code Name Cause > A2h VMEXIT_INVPCID INVPCID instruction > > The following bit is added to the VMCB layout control area > to control intercept of INVPCID: > Byte Offset Bit(s) Function > 14h 2 intercept INVPCID > > For the guests with nested page table (NPT) support, the INVPCID > feature works as running it natively. KVM does not need to do any > special handling in this case. > > Interceptions are required in the following cases. > 1. If the guest tries to disable the feature when the underlying > hardware supports it. In this case hypervisor needs to report #UD. Per the AMD documentation, attempts to use INVPCID at CPL>0 will result in a #GP, regardless of the intercept bit. If the guest CPUID doesn't enumerate the feature, shouldn't the instruction raise #UD regardless of CPL? This seems to imply that we should intercept #GP and decode the instruction to see if we should synthesize #UD instead. > 2. When the guest is running with shadow page table enabled, in > this case the hypervisor needs to handle the tlbflush based on the > type of invpcid instruction type. > > AMD documentation for INVPCID feature is available at "AMD64 > Architecture Programmer’s Manual Volume 2: System Programming, > Pub. 24593 Rev. 3.34(or later)" > > The documentation can be obtained at the links below: > Link: https://www.amd.com/system/files/TechDocs/24593.pdf > Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 > > Signed-off-by: Babu Moger <babu.moger@xxxxxxx> > --- > arch/x86/include/asm/svm.h | 4 ++++ > arch/x86/include/uapi/asm/svm.h | 2 ++ > arch/x86/kvm/svm/svm.c | 42 +++++++++++++++++++++++++++++++++++++++ > 3 files changed, 48 insertions(+) > > diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h > index 62649fba8908..6488094f67fa 100644 > --- a/arch/x86/include/asm/svm.h > +++ b/arch/x86/include/asm/svm.h > @@ -55,6 +55,10 @@ enum { > INTERCEPT_RDPRU, > }; > > +/* Extended Intercept bits */ > +enum { > + INTERCEPT_INVPCID = 2, > +}; > > struct __attribute__ ((__packed__)) vmcb_control_area { > u32 intercept_cr; > diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h > index 2e8a30f06c74..522d42dfc28c 100644 > --- a/arch/x86/include/uapi/asm/svm.h > +++ b/arch/x86/include/uapi/asm/svm.h > @@ -76,6 +76,7 @@ > #define SVM_EXIT_MWAIT_COND 0x08c > #define SVM_EXIT_XSETBV 0x08d > #define SVM_EXIT_RDPRU 0x08e > +#define SVM_EXIT_INVPCID 0x0a2 > #define SVM_EXIT_NPF 0x400 > #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 > #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 > @@ -171,6 +172,7 @@ > { SVM_EXIT_MONITOR, "monitor" }, \ > { SVM_EXIT_MWAIT, "mwait" }, \ > { SVM_EXIT_XSETBV, "xsetbv" }, \ > + { SVM_EXIT_INVPCID, "invpcid" }, \ > { SVM_EXIT_NPF, "npf" }, \ > { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ > { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \ > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c > index 285e5e1ff518..82d974338f68 100644 > --- a/arch/x86/kvm/svm/svm.c > +++ b/arch/x86/kvm/svm/svm.c > @@ -813,6 +813,11 @@ static __init void svm_set_cpu_caps(void) > if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) || > boot_cpu_has(X86_FEATURE_AMD_SSBD)) > kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD); > + > + /* Enable INVPCID if both PCID and INVPCID enabled */ > + if (boot_cpu_has(X86_FEATURE_PCID) && > + boot_cpu_has(X86_FEATURE_INVPCID)) > + kvm_cpu_cap_set(X86_FEATURE_INVPCID); > } > > static __init int svm_hardware_setup(void) > @@ -1099,6 +1104,17 @@ static void init_vmcb(struct vcpu_svm *svm) > clr_intercept(svm, INTERCEPT_PAUSE); > } > > + /* > + * Intercept INVPCID instruction only if shadow page table is > + * enabled. Interception is not required with nested page table. > + */ > + if (boot_cpu_has(X86_FEATURE_INVPCID)) { > + if (!npt_enabled) > + set_extended_intercept(svm, INTERCEPT_INVPCID); > + else > + clr_extended_intercept(svm, INTERCEPT_INVPCID); > + } > + > if (kvm_vcpu_apicv_active(&svm->vcpu)) > avic_init_vmcb(svm); > > @@ -2715,6 +2731,23 @@ static int mwait_interception(struct vcpu_svm *svm) > return nop_interception(svm); > } > > +static int invpcid_interception(struct vcpu_svm *svm) > +{ > + struct kvm_vcpu *vcpu = &svm->vcpu; > + unsigned long type; > + gva_t gva; > + > + /* > + * For an INVPCID intercept: > + * EXITINFO1 provides the linear address of the memory operand. > + * EXITINFO2 provides the contents of the register operand. > + */ > + type = svm->vmcb->control.exit_info_2; > + gva = svm->vmcb->control.exit_info_1; > + > + return kvm_handle_invpcid_types(vcpu, gva, type); > +} > + > static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { > [SVM_EXIT_READ_CR0] = cr_interception, > [SVM_EXIT_READ_CR3] = cr_interception, > @@ -2777,6 +2810,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { > [SVM_EXIT_MWAIT] = mwait_interception, > [SVM_EXIT_XSETBV] = xsetbv_interception, > [SVM_EXIT_RDPRU] = rdpru_interception, > + [SVM_EXIT_INVPCID] = invpcid_interception, > [SVM_EXIT_NPF] = npf_interception, > [SVM_EXIT_RSM] = rsm_interception, > [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception, > @@ -3562,6 +3596,14 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) > svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) && > guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS); > > + /* > + * Intercept INVPCID instruction if the baremetal has the support > + * but the guest doesn't claim the feature. > + */ > + if (boot_cpu_has(X86_FEATURE_INVPCID) && > + !guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) > + set_extended_intercept(svm, INTERCEPT_INVPCID); > + What if INVPCID is enabled in the guest CPUID later? Shouldn't we then clear this intercept bit? > if (!kvm_vcpu_apicv_active(vcpu)) > return; > >