----- junaids@xxxxxxxxxx wrote: > Implement support for INVPCID in shadow paging mode as well. > > Signed-off-by: Junaid Shahid <junaids@xxxxxxxxxx> > --- > arch/x86/include/asm/kvm_host.h | 1 + > arch/x86/kvm/mmu.c | 18 ++++++ > arch/x86/kvm/vmx.c | 100 > +++++++++++++++++++++++++++++++- > 3 files changed, 116 insertions(+), 3 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h > b/arch/x86/include/asm/kvm_host.h > index 21eb513205b0..9b30003b4429 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -1304,6 +1304,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu > *vcpu); > int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 > error_code, > void *insn, int insn_len); > void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); > +void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned > long pcid); > void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t old_cr3); > > void kvm_enable_tdp(void); > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > index d93eba7b8787..1258b8e4718b 100644 > --- a/arch/x86/kvm/mmu.c > +++ b/arch/x86/kvm/mmu.c > @@ -5121,6 +5121,24 @@ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, > gva_t gva) > } > EXPORT_SYMBOL_GPL(kvm_mmu_invlpg); > > +void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned > long pcid) > +{ > + struct kvm_mmu *mmu = &vcpu->arch.mmu; > + > + if (pcid == kvm_get_active_pcid(vcpu)) { > + mmu->invlpg(vcpu, gva); > + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); > + } > + > + ++vcpu->stat.invlpg; > + > + /* > + * Mappings not reachable via the current cr3 will be synced when > + * switching to that cr3, so nothing needs to be done here for > them. > + */ > +} > +EXPORT_SYMBOL_GPL(kvm_mmu_invpcid_gva); > + > void kvm_enable_tdp(void) > { > tdp_enabled = true; > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 7a73e97bae61..234498e0000f 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -2822,7 +2822,7 @@ static bool vmx_rdtscp_supported(void) > > static bool vmx_invpcid_supported(void) > { > - return cpu_has_vmx_invpcid() && enable_ept; > + return cpu_has_vmx_invpcid(); > } > > /* > @@ -5823,8 +5823,6 @@ static void > vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) > if (!enable_ept) { > exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; > enable_unrestricted_guest = 0; > - /* Enable INVPCID for non-ept guests may cause performance > regression. */ > - exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID; > } > if (!enable_unrestricted_guest) > exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; > @@ -8357,6 +8355,101 @@ static int handle_invvpid(struct kvm_vcpu > *vcpu) > return kvm_skip_emulated_instruction(vcpu); > } > > +static int handle_invpcid(struct kvm_vcpu *vcpu) > +{ > + u32 vmx_instruction_info; > + unsigned long type; > + bool pcid_enabled; > + gva_t gva; > + struct x86_exception e; > + struct { > + u64 pcid; > + u64 gla; > + } operand; > + > + if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) { > + kvm_queue_exception(vcpu, UD_VECTOR); > + return 1; > + } > + > + vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); > + type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & > 0xf); > + > + if (type > 3) { > + kvm_inject_gp(vcpu, 0); > + return 1; > + } > + > + /* According to the Intel instruction reference, the memory operand > + * is read even if it isn't needed (e.g., for type==all) > + */ > + if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), > + vmx_instruction_info, false, &gva)) > + return 1; > + > + if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand, > + sizeof(operand), &e)) { > + kvm_inject_page_fault(vcpu, &e); > + return 1; > + } > + > + if (operand.pcid >> 12 != 0) { > + kvm_inject_gp(vcpu, 0); > + return 1; > + } > + > + pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE); > + > + switch (type) { > + case INVPCID_TYPE_INDIV_ADDR: > + if ((!pcid_enabled && (operand.pcid != 0)) || > + is_noncanonical_address(operand.gla, vcpu)) { > + kvm_inject_gp(vcpu, 0); > + return 1; > + } > + kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid); > + skip_emulated_instruction(vcpu); > + return 1; > + > + case INVPCID_TYPE_SINGLE_CTXT: > + if (!pcid_enabled && (operand.pcid != 0)) { > + kvm_inject_gp(vcpu, 0); > + return 1; > + } > + > + if (kvm_get_active_pcid(vcpu) == operand.pcid) { > + kvm_mmu_sync_roots(vcpu); > + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); > + } > + > + /* > + * If the current cr3 does not use the given PCID, then nothing > + * needs to be done here because a resync will happen anyway > + * before switching to any other CR3. > + */ > + > + skip_emulated_instruction(vcpu); > + return 1; > + > + case INVPCID_TYPE_ALL_NON_GLOBAL: > + /* > + * Currently, KVM doesn't mark global entries in the shadow > + * page tables, so a non-global flush just degenerates to a > + * global flush. If needed, we could optimize this later by > + * keeping track of global entries in shadow page tables. > + */ > + > + /* fall-through */ > + case INVPCID_TYPE_ALL_INCL_GLOBAL: > + kvm_mmu_unload(vcpu); > + skip_emulated_instruction(vcpu); > + return 1; > + > + default: > + BUG(); /* We have already checked above that type <= 3 */ > + } > +} > + > static int handle_pml_full(struct kvm_vcpu *vcpu) > { > unsigned long exit_qualification; > @@ -8560,6 +8653,7 @@ static int (*const > kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { > [EXIT_REASON_XSAVES] = handle_xsaves, > [EXIT_REASON_XRSTORS] = handle_xrstors, > [EXIT_REASON_PML_FULL] = handle_pml_full, > + [EXIT_REASON_INVPCID] = handle_invpcid, > [EXIT_REASON_VMFUNC] = handle_vmfunc, > [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, > }; > -- > 2.17.0.441.gb46fe60e1d-goog We should make sure to add more tests to kvm-unit-tests x86/pcid.c for all these edge-cases in INVPCID emulation.