Re: [PATCH v2 03/17] kvm: x86: Add fast CR3 switch code path

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Jun 12, 2018 at 03:52:30PM -0700, Junaid Shahid wrote:
> When using shadow paging, a CR3 switch in the guest results in a VM Exit.
> In the common case, that VM exit doesn't require much processing by KVM.
> However, it does acquire the MMU lock, which can start showing signs of
> contention under some workloads even on a 2 VCPU VM when the guest is
> using KPTI. Therefore, we add a fast path that avoids acquiring the MMU
> lock in the most common cases e.g. when switching back and forth between
> the kernel and user mode CR3s used by KPTI with no guest page table
> changes in between.
> 
> For now, this fast path is implemented only for 64-bit guests and hosts
> to avoid the handling of PDPTEs, but it can be extended later to 32-bit
> guests and/or hosts as well.
> 
> Signed-off-by: Junaid Shahid <junaids@xxxxxxxxxx>
> ---
>  arch/x86/include/asm/kvm_host.h | 13 ++++++-
>  arch/x86/kvm/mmu.c              | 66 ++++++++++++++++++++++++++++++---
>  arch/x86/kvm/x86.c              |  3 +-
>  3 files changed, 74 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 0ebe659f2802..0869a684f852 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -325,6 +325,14 @@ struct rsvd_bits_validate {
>  	u64 bad_mt_xwr;
>  };
>  
> +struct kvm_mmu_root_info {
> +	gpa_t cr3;
> +	hpa_t hpa;
> +};
> +
> +#define KVM_MMU_ROOT_INFO_INVALID \
> +	((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE })
> +
>  /*
>   * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
>   * and 2-level 32-bit).  The kvm_mmu structure abstracts the details of the
> @@ -353,6 +361,7 @@ struct kvm_mmu {
>  	u8 shadow_root_level;
>  	u8 ept_ad;
>  	bool direct_map;
> +	struct kvm_mmu_root_info prev_root;
>  
>  	/*
>  	 * Bitmap; bit set = permission fault
> @@ -1279,7 +1288,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
>  int kvm_mmu_load(struct kvm_vcpu *vcpu);
>  void kvm_mmu_unload(struct kvm_vcpu *vcpu);
>  void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
> -void kvm_mmu_free_roots(struct kvm_vcpu *vcpu);
> +void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, bool free_prev_root);
>  gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
>  			   struct x86_exception *exception);
>  gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
> @@ -1298,7 +1307,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
>  int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code,
>  		       void *insn, int insn_len);
>  void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
> -void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu);
> +void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3);
>  
>  void kvm_enable_tdp(void);
>  void kvm_disable_tdp(void);
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index ad2e2a00dc71..0c52b5d1010b 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -3396,17 +3396,22 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
>  	*root_hpa = INVALID_PAGE;
>  }
>  
> -void kvm_mmu_free_roots(struct kvm_vcpu *vcpu)
> +void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, bool free_prev_root)
>  {
>  	int i;
>  	LIST_HEAD(invalid_list);
>  	struct kvm_mmu *mmu = &vcpu->arch.mmu;
>  
> -	if (!VALID_PAGE(mmu->root_hpa))
> +	if (!VALID_PAGE(mmu->root_hpa) &&
> +	    (!VALID_PAGE(mmu->prev_root.hpa) || !free_prev_root))
>  		return;
>  
>  	spin_lock(&vcpu->kvm->mmu_lock);
>  
> +	if (free_prev_root)
> +		mmu_free_root_page(vcpu->kvm, &mmu->prev_root.hpa,
> +				   &invalid_list);
> +
>  	if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
>  	    (mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) {
>  		mmu_free_root_page(vcpu->kvm, &mmu->root_hpa, &invalid_list);
> @@ -4006,13 +4011,59 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
>  	context->root_level = 0;
>  	context->shadow_root_level = PT32E_ROOT_LEVEL;
>  	context->root_hpa = INVALID_PAGE;
> +	context->prev_root = KVM_MMU_ROOT_INFO_INVALID;
>  	context->direct_map = true;
>  	context->nx = false;
>  }
>  
> -void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu)
> +static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3)
>  {
> -	kvm_mmu_free_roots(vcpu);
> +	struct kvm_mmu *mmu = &vcpu->arch.mmu;
> +
> +	/*
> +	 * For now, limit the fast switch to 64-bit hosts+VMs in order to avoid
> +	 * having to deal with PDPTEs. We may add support for 32-bit hosts/VMs
> +	 * later if necessary.
> +	 */
> +	if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
> +	    mmu->root_level >= PT64_ROOT_4LEVEL) {
> +		gpa_t prev_cr3 = mmu->prev_root.cr3;
> +
> +		if (mmu_check_root(vcpu, new_cr3 >> PAGE_SHIFT))
> +			return false;
> +
> +		swap(mmu->root_hpa, mmu->prev_root.hpa);
> +		mmu->prev_root.cr3 = kvm_read_cr3(vcpu);
> +
> +		if (new_cr3 == prev_cr3 && VALID_PAGE(mmu->root_hpa)) {
> +			/*
> +			 * It is possible that the cached previous root page is
> +			 * obsolete because of a change in the MMU
> +			 * generation number. However, that is accompanied by
> +			 * KVM_REQ_MMU_RELOAD, which will free the root that we
> +			 * have set here and allocate a new one.
> +			 */
> +
> +			kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
> +			__clear_sp_write_flooding_count(
> +				page_header(mmu->root_hpa));
> +
> +			vcpu->arch.cr3 = new_cr3;
> +			__set_bit(VCPU_EXREG_CR3,
> +				  (ulong *)&vcpu->arch.regs_avail);

Setting arch.cr3 here is unnecessary, kvm_set_cr3() is handling that
duty before and after this patch.  It's also confusing because the
code in question is later removed in PATCH 06/17, which makes it seem
like that patch is introducing a bug.

> +			mmu->set_cr3(vcpu, mmu->root_hpa);
> +
> +			return true;
> +		}
> +	}
> +
> +	return false;
> +}
> +
> +void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3)
> +{
> +	if (!fast_cr3_switch(vcpu, new_cr3))
> +		kvm_mmu_free_roots(vcpu, false);
>  }
>  
>  static unsigned long get_cr3(struct kvm_vcpu *vcpu)
> @@ -4490,6 +4541,7 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu,
>  	context->update_pte = paging64_update_pte;
>  	context->shadow_root_level = level;
>  	context->root_hpa = INVALID_PAGE;
> +	context->prev_root = KVM_MMU_ROOT_INFO_INVALID;
>  	context->direct_map = false;
>  }
>  
> @@ -4520,6 +4572,7 @@ static void paging32_init_context(struct kvm_vcpu *vcpu,
>  	context->update_pte = paging32_update_pte;
>  	context->shadow_root_level = PT32E_ROOT_LEVEL;
>  	context->root_hpa = INVALID_PAGE;
> +	context->prev_root = KVM_MMU_ROOT_INFO_INVALID;
>  	context->direct_map = false;
>  }
>  
> @@ -4543,6 +4596,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
>  	context->update_pte = nonpaging_update_pte;
>  	context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu);
>  	context->root_hpa = INVALID_PAGE;
> +	context->prev_root = KVM_MMU_ROOT_INFO_INVALID;
>  	context->direct_map = true;
>  	context->set_cr3 = kvm_x86_ops->set_tdp_cr3;
>  	context->get_cr3 = get_cr3;
> @@ -4625,6 +4679,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
>  	context->update_pte = ept_update_pte;
>  	context->root_level = PT64_ROOT_4LEVEL;
>  	context->root_hpa = INVALID_PAGE;
> +	context->prev_root = KVM_MMU_ROOT_INFO_INVALID;
>  	context->direct_map = false;
>  	context->base_role.ad_disabled = !accessed_dirty;
>  	context->base_role.guest_mode = 1;
> @@ -4727,7 +4782,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load);
>  
>  void kvm_mmu_unload(struct kvm_vcpu *vcpu)
>  {
> -	kvm_mmu_free_roots(vcpu);
> +	kvm_mmu_free_roots(vcpu, true);
>  	WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
>  }
>  EXPORT_SYMBOL_GPL(kvm_mmu_unload);
> @@ -5107,6 +5162,7 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
>  {
>  	vcpu->arch.walk_mmu = &vcpu->arch.mmu;
>  	vcpu->arch.mmu.root_hpa = INVALID_PAGE;
> +	vcpu->arch.mmu.prev_root = KVM_MMU_ROOT_INFO_INVALID;
>  	vcpu->arch.mmu.translate_gpa = translate_gpa;
>  	vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 06dd4cdb2ca8..2d1041b5739b 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -867,9 +867,10 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
>  		   !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
>  		return 1;
>  
> +	kvm_mmu_new_cr3(vcpu, cr3);
>  	vcpu->arch.cr3 = cr3;
>  	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
> -	kvm_mmu_new_cr3(vcpu);
> +
>  	return 0;
>  }
>  EXPORT_SYMBOL_GPL(kvm_set_cr3);
> -- 
> 2.18.0.rc1.242.g61856ae69a-goog
> 



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux