On Thu, Mar 31, 2022 at 05:17:37PM +1300, Kai Huang <kai.huang@xxxxxxxxx> wrote: > On Fri, 2022-03-04 at 11:48 -0800, isaku.yamahata@xxxxxxxxx wrote: > > From: Sean Christopherson <sean.j.christopherson@xxxxxxxxx> > > > > diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c > > index 1c8222f54764..702953fd365f 100644 > > --- a/arch/x86/kvm/vmx/tdx.c > > +++ b/arch/x86/kvm/vmx/tdx.c > > @@ -31,14 +31,324 @@ struct tdx_capabilities { > > struct tdx_cpuid_config cpuid_configs[TDX_MAX_NR_CPUID_CONFIGS]; > > }; > > > > +/* KeyID used by TDX module */ > > +static u32 tdx_global_keyid __read_mostly; > > + > > It's really not clear why you need to know tdx_global_keyid in the context of > creating/destroying a TD. TDX module mpas TDR with TDX global key id. This page includes key id assigned to this TD. Then, TDX modules maps other TD-related pages with the HKID. TDR requires the TDX global key id for cache flush unlike other TD-related pages. I'll add a comment. > > /* Capabilities of KVM + the TDX module. */ > > struct tdx_capabilities tdx_caps; > > > > +static DEFINE_MUTEX(tdx_lock); > > static struct mutex *tdx_mng_key_config_lock; > > > > static u64 hkid_mask __ro_after_init; > > static u8 hkid_start_pos __ro_after_init; > > > > +static __always_inline hpa_t set_hkid_to_hpa(hpa_t pa, u16 hkid) > > +{ > > + pa &= ~hkid_mask; > > + pa |= (u64)hkid << hkid_start_pos; > > + > > + return pa; > > +} > > + > > +static inline bool is_td_created(struct kvm_tdx *kvm_tdx) > > +{ > > + return kvm_tdx->tdr.added; > > +} > > + > > +static inline void tdx_hkid_free(struct kvm_tdx *kvm_tdx) > > +{ > > + tdx_keyid_free(kvm_tdx->hkid); > > + kvm_tdx->hkid = -1; > > +} > > + > > +static inline bool is_hkid_assigned(struct kvm_tdx *kvm_tdx) > > +{ > > + return kvm_tdx->hkid > 0; > > +} > > + > > +static void tdx_clear_page(unsigned long page) > > +{ > > + const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0))); > > + unsigned long i; > > + > > + /* Zeroing the page is only necessary for systems with MKTME-i. */ > > "only necessary for systems with MKTME-i" because of what? > > Please be more clear that on MKTME-i system, when re-assign one page from old > keyid to a new keyid, MOVDIR64B is required to clear/write the page with new > keyid to prevent integrity error when read on the page with new keyid. Let me borrow this sentence as a comment on it. > > +static int __tdx_reclaim_page(unsigned long va, hpa_t pa, bool do_wb, u16 hkid) > > +{ > > + struct tdx_module_output out; > > + u64 err; > > + > > + err = tdh_phymem_page_reclaim(pa, &out); > > + if (WARN_ON_ONCE(err)) { > > + pr_tdx_error(TDH_PHYMEM_PAGE_RECLAIM, err, &out); > > + return -EIO; > > + } > > + > > + if (do_wb) { > > In the callers, please add some comments explaining why do_wb is needed, and why > is not needed. Will do. > > +static int tdx_do_tdh_mng_key_config(void *param) > > +{ > > + hpa_t *tdr_p = param; > > + int cpu, cur_pkg; > > + u64 err; > > + > > + cpu = raw_smp_processor_id(); > > + cur_pkg = topology_physical_package_id(cpu); > > + > > + mutex_lock(&tdx_mng_key_config_lock[cur_pkg]); > > + do { > > + err = tdh_mng_key_config(*tdr_p); > > + } while (err == TDX_KEY_GENERATION_FAILED); > > + mutex_unlock(&tdx_mng_key_config_lock[cur_pkg]); > > Why not squashing patch 20 ("KVM: TDX: allocate per-package mutex") into this > patch? Will do. > > + > > + if (WARN_ON_ONCE(err)) { > > + pr_tdx_error(TDH_MNG_KEY_CONFIG, err, NULL); > > + return -EIO; > > + } > > + > > + return 0; > > +} > > + > > +int tdx_vm_init(struct kvm *kvm) > > +{ > > + struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm); > > + cpumask_var_t packages; > > + int ret, i; > > + u64 err; > > + > > + /* vCPUs can't be created until after KVM_TDX_INIT_VM. */ > > + kvm->max_vcpus = 0; > > + > > + kvm_tdx->hkid = tdx_keyid_alloc(); > > + if (kvm_tdx->hkid < 0) > > + return -EBUSY; > > + > > + ret = tdx_alloc_td_page(&kvm_tdx->tdr); > > + if (ret) > > + goto free_hkid; > > + > > + kvm_tdx->tdcs = kcalloc(tdx_caps.tdcs_nr_pages, sizeof(*kvm_tdx->tdcs), > > + GFP_KERNEL_ACCOUNT); > > + if (!kvm_tdx->tdcs) > > + goto free_tdr; > > + for (i = 0; i < tdx_caps.tdcs_nr_pages; i++) { > > + ret = tdx_alloc_td_page(&kvm_tdx->tdcs[i]); > > + if (ret) > > + goto free_tdcs; > > + } > > + > > + mutex_lock(&tdx_lock); > > + err = tdh_mng_create(kvm_tdx->tdr.pa, kvm_tdx->hkid); > > + mutex_unlock(&tdx_lock); > > Please add comment explaining why locking is needed. I'll add a comment on tdx_lock. Not each TDX seamcalls. > > diff --git a/arch/x86/kvm/vmx/tdx_ops.h b/arch/x86/kvm/vmx/tdx_ops.h > > index 0bed43879b82..3dd5b4c3f04c 100644 > > --- a/arch/x86/kvm/vmx/tdx_ops.h > > +++ b/arch/x86/kvm/vmx/tdx_ops.h > > @@ -6,6 +6,7 @@ > > > > #include <linux/compiler.h> > > > > +#include <asm/cacheflush.h> > > #include <asm/asm.h> > > #include <asm/kvm_host.h> > > > > @@ -15,8 +16,14 @@ > > > > #ifdef CONFIG_INTEL_TDX_HOST > > > > +static inline void tdx_clflush_page(hpa_t addr) > > +{ > > + clflush_cache_range(__va(addr), PAGE_SIZE); > > +} > > + > > static inline u64 tdh_mng_addcx(hpa_t tdr, hpa_t addr) > > { > > + tdx_clflush_page(addr); > > Please add comment to explain why clflush is needed. > > And you don't need the tdx_clflush_page() wrapper -- it's not a TDX specific > ops. You can just use clflush_cache_range(). Will remove it. The plan was to enhance tdx_cflush_page(addr, page_level) to support large page to avoid repeating page_level_to_size. Defer it to large page support. -- Isaku Yamahata <isaku.yamahata@xxxxxxxxx>