Re: [PATCH v12 050/106] KVM: TDX: MTRR: implement get_mt_mask() for TDX

Chenyi Qiang <chenyi.qiang@xxxxxxxxx> · Thu, 9 Mar 2023 17:03:13 +0800

On 2/27/2023 4:22 PM, isaku.yamahata@xxxxxxxxx wrote:
> From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>
> 
> Because TDX virtualize cpuid[0x1].EDX[MTRR: bit 12] to fixed 1, guest TD
> thinks MTRR is supported.  Although TDX supports only WB for private GPA,
> it's desirable to support MTRR for shared GPA.  As guest access to MTRR
> MSRs causes #VE and KVM/x86 tracks the values of MTRR MSRs, the remining
> part is to implement get_mt_mask method for TDX for shared GPA.
> 
> Pass around shared bit from kvm fault handler to get_mt_mask method so that
> it can determine if the gfn is shared or private.  Implement get_mt_mask()
> following vmx case for shared GPA and return WB for private GPA.
> the existing vmx_get_mt_mask() can't be directly used as CPU state(CR0.CD)
> is protected.  GFN passed to kvm_mtrr_check_gfn_range_consistency() should
> include shared bit.
> 
> Suggested-by: Kai Huang <kai.huang@xxxxxxxxx>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>
> ---
> Changes from v11 to V12
> - Make common function for VMX and TDX
> - pass around shared bit from KVM fault handler to get_mt_mask method
> - updated commit message
> ---
>  arch/x86/kvm/mmu/mmu.c     |  2 +-
>  arch/x86/kvm/mmu/spte.c    |  5 +++--
>  arch/x86/kvm/mmu/spte.h    |  2 +-
>  arch/x86/kvm/vmx/common.h  |  2 ++
>  arch/x86/kvm/vmx/main.c    | 11 ++++++++++-
>  arch/x86/kvm/vmx/tdx.c     | 17 +++++++++++++++++
>  arch/x86/kvm/vmx/vmx.c     |  5 +++--
>  arch/x86/kvm/vmx/x86_ops.h |  2 ++
>  8 files changed, 39 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 6421f92e618e..0c852517c0e7 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -4555,7 +4555,7 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>  	if (shadow_memtype_mask && kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
>  		for ( ; fault->max_level > PG_LEVEL_4K; --fault->max_level) {
>  			int page_num = KVM_PAGES_PER_HPAGE(fault->max_level);
> -			gfn_t base = fault->gfn & ~(page_num - 1);
> +			gfn_t base = (fault->addr >> PAGE_SHIFT) & ~(page_num - 1);
>  
>  			if (kvm_mtrr_check_gfn_range_consistency(vcpu, base, page_num))
>  				break;
> diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
> index 48e17588a127..9c874bca69f6 100644
> --- a/arch/x86/kvm/mmu/spte.c
> +++ b/arch/x86/kvm/mmu/spte.c
> @@ -137,13 +137,14 @@ bool spte_has_volatile_bits(u64 spte)
>  
>  bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
>  	       const struct kvm_memory_slot *slot,
> -	       unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn,
> +	       unsigned int pte_access, gfn_t gfn_including_shared, kvm_pfn_t pfn,
>  	       u64 old_spte, bool prefetch, bool can_unsync,
>  	       bool host_writable, u64 *new_spte)
>  {
>  	int level = sp->role.level;
>  	u64 spte = SPTE_MMU_PRESENT_MASK;
>  	bool wrprot = false;
> +	gfn_t gfn = gfn_including_shared & ~kvm_gfn_shared_mask(vcpu->kvm);
>  
>  	WARN_ON_ONCE(!pte_access && !shadow_present_mask);
>  
> @@ -191,7 +192,7 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
>  		spte |= PT_PAGE_SIZE_MASK;
>  
>  	if (shadow_memtype_mask)
> -		spte |= static_call(kvm_x86_get_mt_mask)(vcpu, gfn,
> +		spte |= static_call(kvm_x86_get_mt_mask)(vcpu, gfn_including_shared,
>  							 kvm_is_mmio_pfn(pfn));
>  	if (host_writable)
>  		spte |= shadow_host_writable_mask;
> diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
> index 7046671b08cb..067ea1ae3a13 100644
> --- a/arch/x86/kvm/mmu/spte.h
> +++ b/arch/x86/kvm/mmu/spte.h
> @@ -481,7 +481,7 @@ bool spte_has_volatile_bits(u64 spte);
>  
>  bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
>  	       const struct kvm_memory_slot *slot,
> -	       unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn,
> +	       unsigned int pte_access, gfn_t gfn_including_shared, kvm_pfn_t pfn,
>  	       u64 old_spte, bool prefetch, bool can_unsync,
>  	       bool host_writable, u64 *new_spte);
>  u64 make_huge_page_split_spte(struct kvm *kvm, u64 huge_spte,
> diff --git a/arch/x86/kvm/vmx/common.h b/arch/x86/kvm/vmx/common.h
> index 235908f3e044..422b24af7fc1 100644
> --- a/arch/x86/kvm/vmx/common.h
> +++ b/arch/x86/kvm/vmx/common.h
> @@ -6,6 +6,8 @@
>  
>  #include "mmu.h"
>  
> +u8 __vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio, bool check_cr0_cd);
> +
>  static inline int __vmx_handle_ept_violation(struct kvm_vcpu *vcpu, gpa_t gpa,
>  					     unsigned long exit_qualification)
>  {
> diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
> index a2ca09f10b6e..f42617c7aeaf 100644
> --- a/arch/x86/kvm/vmx/main.c
> +++ b/arch/x86/kvm/vmx/main.c
> @@ -3,6 +3,7 @@
>  
>  #include "x86_ops.h"
>  #include "mmu.h"
> +#include "common.h"
>  #include "vmx.h"
>  #include "nested.h"
>  #include "mmu.h"
> @@ -206,6 +207,14 @@ static void vt_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
>  	vmx_load_mmu_pgd(vcpu, root_hpa, pgd_level);
>  }
>  
> +static u8 vt_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
> +{
> +	if (is_td_vcpu(vcpu))
> +		return tdx_get_mt_mask(vcpu, gfn, is_mmio);
> +
> +	return __vmx_get_mt_mask(vcpu, gfn, is_mmio, true);
> +}
> +
>  static int vt_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
>  {
>  	if (!is_td(kvm))
> @@ -326,7 +335,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
>  
>  	.set_tss_addr = vmx_set_tss_addr,
>  	.set_identity_map_addr = vmx_set_identity_map_addr,
> -	.get_mt_mask = vmx_get_mt_mask,
> +	.get_mt_mask = vt_get_mt_mask,
>  
>  	.get_exit_info = vmx_get_exit_info,
>  
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> index 8f191177bfe9..f532f5c352f3 100644
> --- a/arch/x86/kvm/vmx/tdx.c
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -5,6 +5,7 @@
>  
>  #include "capabilities.h"
>  #include "x86_ops.h"
> +#include "common.h"
>  #include "tdx.h"
>  #include "vmx.h"
>  #include "x86.h"
> @@ -345,6 +346,22 @@ int tdx_vm_init(struct kvm *kvm)
>  	return 0;
>  }
>  
> +u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
> +{
> +	/* TDX private GPA is always WB. */
> +	if (kvm_gfn_private(vcpu->kvm, gfn)) {

kvm_gfn_private() only drops shared bit instead of checking if gfn is
private of shared. It should be !(gfn & shared mask) as mentioned in
your v11 series :)

> +		/* MMIO is only for shared GPA. */
> +		WARN_ON_ONCE(is_mmio);
> +		return  MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT;
> +	}
> +
> +	/* Drop shared bit as MTRR doesn't know about shared bit. */
> +	gfn = kvm_gfn_private(vcpu->kvm, gfn);
> +
> +	/* As TDX enforces CR0.CD to 0, pass check_cr0_cd = false. */
> +	return __vmx_get_mt_mask(vcpu, gfn, is_mmio, false);
> +}
> +
>  int tdx_vcpu_create(struct kvm_vcpu *vcpu)
>  {
>  	struct kvm_cpuid_entry2 *e;
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 3ff3b33fe9af..72da86abf989 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -7552,7 +7552,8 @@ int vmx_vm_init(struct kvm *kvm)
>  	return 0;
>  }
>  
> -u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
> +u8 __vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio,
> +		     bool check_cr0_cd)
>  {
>  	u8 cache;
>  
> @@ -7580,7 +7581,7 @@ u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
>  	if (!kvm_arch_has_noncoherent_dma(vcpu->kvm))
>  		return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT;
>  
> -	if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
> +	if (check_cr0_cd && kvm_read_cr0(vcpu) & X86_CR0_CD) {
>  		if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
>  			cache = MTRR_TYPE_WRBACK;
>  		else