Re: [PATCH v2 1/6] KVM: x86/mmu: add a new mmu zap helper to indicate memtype changes

Yan Zhao <yan.y.zhao@xxxxxxxxx> · Wed, 24 May 2023 10:22:06 +0800

On Tue, May 23, 2023 at 03:51:49PM -0700, Sean Christopherson wrote:
> Rather than add a helper to zap "for memtype", add a helper to query if KVM honors
> guest MTRRs.  The "for memtype" isn't intuitive and ends up being incomplete.
> That will also allow for deduplicating other code (and a comment).  Waiting until
> the zap also wastes cycles in the MTRR case, there's no need to compute start and
> end if the zap will ultimately be skipped.
Agreed.

> 
> E.g. over two or three patches:
> 
> ---
>  arch/x86/kvm/mmu.h     |  1 +
>  arch/x86/kvm/mmu/mmu.c | 19 ++++++++++++++-----
>  arch/x86/kvm/mtrr.c    |  2 +-
>  arch/x86/kvm/x86.c     |  2 +-
>  4 files changed, 17 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index 92d5a1924fc1..c3c50ec9d9db 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -103,6 +103,7 @@ static inline u8 kvm_get_shadow_phys_bits(void)
>  void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask);
>  void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask);
>  void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only);
> +bool kvm_mmu_honors_guest_mtrrs(struct kvm *kvm);
>  
>  void kvm_init_mmu(struct kvm_vcpu *vcpu);
>  void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index c8961f45e3b1..a2b1723bc6a4 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -4512,12 +4512,10 @@ static int kvm_tdp_mmu_page_fault(struct kvm_vcpu *vcpu,
>  }
>  #endif
>  
> -int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
> +bool kvm_mmu_honors_guest_mtrrs(struct kvm *kvm)
>  {
>  	/*
> -	 * If the guest's MTRRs may be used to compute the "real" memtype,
> -	 * restrict the mapping level to ensure KVM uses a consistent memtype
> -	 * across the entire mapping.  If the host MTRRs are ignored by TDP
> +	 * If the TDP is enabled, the host MTRRs are ignored by TDP
>  	 * (shadow_memtype_mask is non-zero), and the VM has non-coherent DMA
>  	 * (DMA doesn't snoop CPU caches), KVM's ABI is to honor the memtype
>  	 * from the guest's MTRRs so that guest accesses to memory that is
> @@ -4526,7 +4524,18 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>  	 * Note, KVM may still ultimately ignore guest MTRRs for certain PFNs,
>  	 * e.g. KVM will force UC memtype for host MMIO.
>  	 */
> -	if (shadow_memtype_mask && kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
> +	return tdp_enabled && shadow_memtype_mask &&
> +	       kvm_arch_has_noncoherent_dma(kvm);
> +}
> +
> +int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
> +{
> +	/*
> +	 * If the guest's MTRRs may be used to compute the "real" memtype,
> +	 * restrict the mapping level to ensure KVM uses a consistent memtype
> +	 * across the entire mapping.
> +	 */
> +	if (kvm_mmu_honors_guest_mtrrs(vcpu->kvm)) {
>  		for ( ; fault->max_level > PG_LEVEL_4K; --fault->max_level) {
>  			int page_num = KVM_PAGES_PER_HPAGE(fault->max_level);
>  			gfn_t base = gfn_round_for_level(fault->gfn,
> diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
> index 3eb6e7f47e96..a67c28a56417 100644
> --- a/arch/x86/kvm/mtrr.c
> +++ b/arch/x86/kvm/mtrr.c
> @@ -320,7 +320,7 @@ static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
>  	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
>  	gfn_t start, end;
>  
> -	if (!tdp_enabled || !kvm_arch_has_noncoherent_dma(vcpu->kvm))
> +	if (!kvm_mmu_honors_guest_mtrrs(vcpu->kvm))
Could we also add another helper kvm_mmu_cap_honors_guest_mtrrs(), which
does not check kvm_arch_has_noncoherent_dma()?

+static inline bool kvm_mmu_cap_honors_guest_mtrrs(struct kvm *kvm)
+{
+       return !!shadow_memtype_mask;
+}

This is because in patch 4 I plan to do the EPT zap when
noncoherent_dma_count goes from 1 to 0.

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 41d7bb51a297..ad0c43d7f532 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -13146,13 +13146,19 @@ EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);

 void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
 {
-       atomic_inc(&kvm->arch.noncoherent_dma_count);
+       if (atomic_inc_return(&kvm->arch.noncoherent_dma_count) == 1) {
+               if (kvm_mmu_cap_honors_guest_mtrrs(kvm))
+                       kvm_zap_gfn_range(kvm, 0, ~0ULL);
+       }
 }
 EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);

 void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
 {
-       atomic_dec(&kvm->arch.noncoherent_dma_count);
+       if (!atomic_dec_return(&kvm->arch.noncoherent_dma_count)) {
+               if (kvm_mmu_cap_honors_guest_mtrrs(kvm))
+                       kvm_zap_gfn_range(kvm, 0, ~0ULL);
+       }
 }
 EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);


>  		return;
>  
>  	if (!mtrr_is_enabled(mtrr_state) && msr != MSR_MTRRdefType)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 076d50f6321b..977dceb7ba7e 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -942,7 +942,7 @@ void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned lon
>  		kvm_mmu_reset_context(vcpu);
>  
>  	if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
> -	    kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
> +	    kvm_mmu_honors_guest_mtrrs(vcpu->kvm) &&
>  	    !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
>  		kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
>  }
> 
> base-commit: 8d4a1b4011c125b1510254b724433aaae746ce14
> -- 
>