Re: [RFC PATCH v5 024/104] KVM: TDX: create/destroy VM structure

Isaku Yamahata <isaku.yamahata@xxxxxxxxx> · Thu, 31 Mar 2022 15:12:07 -0700

On Thu, Mar 31, 2022 at 05:17:37PM +1300,
Kai Huang <kai.huang@xxxxxxxxx> wrote:

> On Fri, 2022-03-04 at 11:48 -0800, isaku.yamahata@xxxxxxxxx wrote:
> > From: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
> > 

> > diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> > index 1c8222f54764..702953fd365f 100644
> > --- a/arch/x86/kvm/vmx/tdx.c
> > +++ b/arch/x86/kvm/vmx/tdx.c
> > @@ -31,14 +31,324 @@ struct tdx_capabilities {
> >  	struct tdx_cpuid_config cpuid_configs[TDX_MAX_NR_CPUID_CONFIGS];
> >  };
> >  
> > +/* KeyID used by TDX module */
> > +static u32 tdx_global_keyid __read_mostly;
> > +
> 
> It's really not clear why you need to know tdx_global_keyid in the context of
> creating/destroying a TD.

TDX module mpas TDR with TDX global key id. This page includes key id assigned
to this TD.  Then, TDX modules maps other TD-related pages with the HKID.
TDR requires the TDX global key id for cache flush unlike other TD-related
pages.
I'll add a comment.

> >  /* Capabilities of KVM + the TDX module. */
> >  struct tdx_capabilities tdx_caps;
> >  
> > +static DEFINE_MUTEX(tdx_lock);
> >  static struct mutex *tdx_mng_key_config_lock;
> >  
> >  static u64 hkid_mask __ro_after_init;
> >  static u8 hkid_start_pos __ro_after_init;
> >  
> > +static __always_inline hpa_t set_hkid_to_hpa(hpa_t pa, u16 hkid)
> > +{
> > +	pa &= ~hkid_mask;
> > +	pa |= (u64)hkid << hkid_start_pos;
> > +
> > +	return pa;
> > +}
> > +
> > +static inline bool is_td_created(struct kvm_tdx *kvm_tdx)
> > +{
> > +	return kvm_tdx->tdr.added;
> > +}
> > +
> > +static inline void tdx_hkid_free(struct kvm_tdx *kvm_tdx)
> > +{
> > +	tdx_keyid_free(kvm_tdx->hkid);
> > +	kvm_tdx->hkid = -1;
> > +}
> > +
> > +static inline bool is_hkid_assigned(struct kvm_tdx *kvm_tdx)
> > +{
> > +	return kvm_tdx->hkid > 0;
> > +}
> > +
> > +static void tdx_clear_page(unsigned long page)
> > +{
> > +	const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
> > +	unsigned long i;
> > +
> > +	/* Zeroing the page is only necessary for systems with MKTME-i. */
> 
> "only necessary for systems  with MKTME-i" because of what?
> 
> Please be more clear that on MKTME-i system, when re-assign one page from old
> keyid to a new keyid, MOVDIR64B is required to clear/write the page with new
> keyid to prevent integrity error when read on the page with new keyid.

Let me borrow this sentence as a comment on it.

> > +static int __tdx_reclaim_page(unsigned long va, hpa_t pa, bool do_wb, u16 hkid)
> > +{
> > +	struct tdx_module_output out;
> > +	u64 err;
> > +
> > +	err = tdh_phymem_page_reclaim(pa, &out);
> > +	if (WARN_ON_ONCE(err)) {
> > +		pr_tdx_error(TDH_PHYMEM_PAGE_RECLAIM, err, &out);
> > +		return -EIO;
> > +	}
> > +
> > +	if (do_wb) {
> 
> In the callers, please add some comments explaining why do_wb is needed, and why
> is not needed.

Will do.

> > +static int tdx_do_tdh_mng_key_config(void *param)
> > +{
> > +	hpa_t *tdr_p = param;
> > +	int cpu, cur_pkg;
> > +	u64 err;
> > +
> > +	cpu = raw_smp_processor_id();
> > +	cur_pkg = topology_physical_package_id(cpu);
> > +
> > +	mutex_lock(&tdx_mng_key_config_lock[cur_pkg]);
> > +	do {
> > +		err = tdh_mng_key_config(*tdr_p);
> > +	} while (err == TDX_KEY_GENERATION_FAILED);
> > +	mutex_unlock(&tdx_mng_key_config_lock[cur_pkg]);
> 
> Why not squashing patch 20 ("KVM: TDX: allocate per-package mutex") into this
> patch?  

Will do.

> > +
> > +	if (WARN_ON_ONCE(err)) {
> > +		pr_tdx_error(TDH_MNG_KEY_CONFIG, err, NULL);
> > +		return -EIO;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +int tdx_vm_init(struct kvm *kvm)
> > +{
> > +	struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
> > +	cpumask_var_t packages;
> > +	int ret, i;
> > +	u64 err;
> > +
> > +	/* vCPUs can't be created until after KVM_TDX_INIT_VM. */
> > +	kvm->max_vcpus = 0;
> > +
> > +	kvm_tdx->hkid = tdx_keyid_alloc();
> > +	if (kvm_tdx->hkid < 0)
> > +		return -EBUSY;
> > +
> > +	ret = tdx_alloc_td_page(&kvm_tdx->tdr);
> > +	if (ret)
> > +		goto free_hkid;
> > +
> > +	kvm_tdx->tdcs = kcalloc(tdx_caps.tdcs_nr_pages, sizeof(*kvm_tdx->tdcs),
> > +				GFP_KERNEL_ACCOUNT);
> > +	if (!kvm_tdx->tdcs)
> > +		goto free_tdr;
> > +	for (i = 0; i < tdx_caps.tdcs_nr_pages; i++) {
> > +		ret = tdx_alloc_td_page(&kvm_tdx->tdcs[i]);
> > +		if (ret)
> > +			goto free_tdcs;
> > +	}
> > +
> > +	mutex_lock(&tdx_lock);
> > +	err = tdh_mng_create(kvm_tdx->tdr.pa, kvm_tdx->hkid);
> > +	mutex_unlock(&tdx_lock);
> 
> Please add comment explaining why locking is needed.

I'll add a comment on tdx_lock. Not each TDX seamcalls.

> > diff --git a/arch/x86/kvm/vmx/tdx_ops.h b/arch/x86/kvm/vmx/tdx_ops.h
> > index 0bed43879b82..3dd5b4c3f04c 100644
> > --- a/arch/x86/kvm/vmx/tdx_ops.h
> > +++ b/arch/x86/kvm/vmx/tdx_ops.h
> > @@ -6,6 +6,7 @@
> >  
> >  #include <linux/compiler.h>
> >  
> > +#include <asm/cacheflush.h>
> >  #include <asm/asm.h>
> >  #include <asm/kvm_host.h>
> >  
> > @@ -15,8 +16,14 @@
> >  
> >  #ifdef CONFIG_INTEL_TDX_HOST
> >  
> > +static inline void tdx_clflush_page(hpa_t addr)
> > +{
> > +	clflush_cache_range(__va(addr), PAGE_SIZE);
> > +}
> > +
> >  static inline u64 tdh_mng_addcx(hpa_t tdr, hpa_t addr)
> >  {
> > +	tdx_clflush_page(addr);
> 
> Please add comment to explain why clflush is needed.
> 
> And you don't need the tdx_clflush_page() wrapper -- it's not a TDX specific
> ops.  You can just use clflush_cache_range().

Will remove it.

The plan was to enhance tdx_cflush_page(addr, page_level) to support large page
to avoid repeating page_level_to_size. Defer it to large page support.
-- 
Isaku Yamahata <isaku.yamahata@xxxxxxxxx>