Re: [PATCH 07/11] kvm: vmx: Support INVPCID in shadow paging mode

Liran Alon <liran.alon@xxxxxxxxxx> · Sun, 13 May 2018 16:50:15 -0700 (PDT)

----- junaids@xxxxxxxxxx wrote:

> Implement support for INVPCID in shadow paging mode as well.
> 
> Signed-off-by: Junaid Shahid <junaids@xxxxxxxxxx>
> ---
>  arch/x86/include/asm/kvm_host.h |   1 +
>  arch/x86/kvm/mmu.c              |  18 ++++++
>  arch/x86/kvm/vmx.c              | 100
> +++++++++++++++++++++++++++++++-
>  3 files changed, 116 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h
> b/arch/x86/include/asm/kvm_host.h
> index 21eb513205b0..9b30003b4429 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1304,6 +1304,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu
> *vcpu);
>  int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64
> error_code,
>  		       void *insn, int insn_len);
>  void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
> +void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned
> long pcid);
>  void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t old_cr3);
>  
>  void kvm_enable_tdp(void);
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index d93eba7b8787..1258b8e4718b 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -5121,6 +5121,24 @@ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu,
> gva_t gva)
>  }
>  EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
>  
> +void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned
> long pcid)
> +{
> +	struct kvm_mmu *mmu = &vcpu->arch.mmu;
> +
> +	if (pcid == kvm_get_active_pcid(vcpu)) {
> +		mmu->invlpg(vcpu, gva);
> +		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
> +	}
> +
> +	++vcpu->stat.invlpg;
> +
> +	/*
> +	 * Mappings not reachable via the current cr3 will be synced when
> +	 * switching to that cr3, so nothing needs to be done here for
> them.
> +	 */
> +}
> +EXPORT_SYMBOL_GPL(kvm_mmu_invpcid_gva);
> +
>  void kvm_enable_tdp(void)
>  {
>  	tdp_enabled = true;
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 7a73e97bae61..234498e0000f 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -2822,7 +2822,7 @@ static bool vmx_rdtscp_supported(void)
>  
>  static bool vmx_invpcid_supported(void)
>  {
> -	return cpu_has_vmx_invpcid() && enable_ept;
> +	return cpu_has_vmx_invpcid();
>  }
>  
>  /*
> @@ -5823,8 +5823,6 @@ static void
> vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
>  	if (!enable_ept) {
>  		exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
>  		enable_unrestricted_guest = 0;
> -		/* Enable INVPCID for non-ept guests may cause performance
> regression. */
> -		exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
>  	}
>  	if (!enable_unrestricted_guest)
>  		exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
> @@ -8357,6 +8355,101 @@ static int handle_invvpid(struct kvm_vcpu
> *vcpu)
>  	return kvm_skip_emulated_instruction(vcpu);
>  }
>  
> +static int handle_invpcid(struct kvm_vcpu *vcpu)
> +{
> +	u32 vmx_instruction_info;
> +	unsigned long type;
> +	bool pcid_enabled;
> +	gva_t gva;
> +	struct x86_exception e;
> +	struct {
> +		u64 pcid;
> +		u64 gla;
> +	} operand;
> +
> +	if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
> +		kvm_queue_exception(vcpu, UD_VECTOR);
> +		return 1;
> +	}
> +
> +	vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
> +	type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) &
> 0xf);
> +
> +	if (type > 3) {
> +		kvm_inject_gp(vcpu, 0);
> +		return 1;
> +	}
> +
> +	/* According to the Intel instruction reference, the memory operand
> +	 * is read even if it isn't needed (e.g., for type==all)
> +	 */
> +	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
> +				vmx_instruction_info, false, &gva))
> +		return 1;
> +
> +	if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
> +				sizeof(operand), &e)) {
> +		kvm_inject_page_fault(vcpu, &e);
> +		return 1;
> +	}
> +
> +	if (operand.pcid >> 12 != 0) {
> +		kvm_inject_gp(vcpu, 0);
> +		return 1;
> +	}
> +
> +	pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
> +
> +	switch (type) {
> +	case INVPCID_TYPE_INDIV_ADDR:
> +		if ((!pcid_enabled && (operand.pcid != 0)) ||
> +		    is_noncanonical_address(operand.gla, vcpu)) {
> +			kvm_inject_gp(vcpu, 0);
> +			return 1;
> +		}
> +		kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid);
> +		skip_emulated_instruction(vcpu);
> +		return 1;
> +
> +	case INVPCID_TYPE_SINGLE_CTXT:
> +		if (!pcid_enabled && (operand.pcid != 0)) {
> +			kvm_inject_gp(vcpu, 0);
> +			return 1;
> +		}
> +
> +		if (kvm_get_active_pcid(vcpu) == operand.pcid) {
> +			kvm_mmu_sync_roots(vcpu);
> +			kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
> +		}
> +
> +		/*
> +		 * If the current cr3 does not use the given PCID, then nothing
> +		 * needs to be done here because a resync will happen anyway
> +		 * before switching to any other CR3.
> +		 */
> +
> +		skip_emulated_instruction(vcpu);
> +		return 1;
> +
> +	case INVPCID_TYPE_ALL_NON_GLOBAL:
> +		/*
> +		 * Currently, KVM doesn't mark global entries in the shadow
> +		 * page tables, so a non-global flush just degenerates to a
> +		 * global flush. If needed, we could optimize this later by
> +		 * keeping track of global entries in shadow page tables.
> +		 */
> +
> +		/* fall-through */
> +	case INVPCID_TYPE_ALL_INCL_GLOBAL:
> +		kvm_mmu_unload(vcpu);
> +		skip_emulated_instruction(vcpu);
> +		return 1;
> +
> +	default:
> +		BUG(); /* We have already checked above that type <= 3 */
> +	}
> +}
> +
>  static int handle_pml_full(struct kvm_vcpu *vcpu)
>  {
>  	unsigned long exit_qualification;
> @@ -8560,6 +8653,7 @@ static int (*const
> kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
>  	[EXIT_REASON_XSAVES]                  = handle_xsaves,
>  	[EXIT_REASON_XRSTORS]                 = handle_xrstors,
>  	[EXIT_REASON_PML_FULL]		      = handle_pml_full,
> +	[EXIT_REASON_INVPCID]                 = handle_invpcid,
>  	[EXIT_REASON_VMFUNC]                  = handle_vmfunc,
>  	[EXIT_REASON_PREEMPTION_TIMER]	      = handle_preemption_timer,
>  };
> -- 
> 2.17.0.441.gb46fe60e1d-goog

We should make sure to add more tests to kvm-unit-tests x86/pcid.c for all these
edge-cases in INVPCID emulation.