On Mon, Nov 14, 2022 at 02:46:22PM +0800, Yuan Yao <yuan.yao@xxxxxxxxxxxxxxx> wrote: > On Sat, Oct 29, 2022 at 11:22:25PM -0700, isaku.yamahata@xxxxxxxxx wrote: > > From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx> > > > > The next step of TDX guest creation is to create vcpu. Allocate TDX vcpu > > structures, initialize it. Allocate pages of TDX vcpu for the TDX module. > > > > In the case of the conventional case, cpuid is empty at the initialization. > > and cpuid is configured after the vcpu initialization. Because TDX > > supports only X2APIC mode, cpuid is forcibly initialized to support X2APIC > > on the vcpu initialization. > > > > Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx> > > --- > > arch/x86/kvm/vmx/main.c | 40 +++++++++-- > > arch/x86/kvm/vmx/tdx.c | 138 +++++++++++++++++++++++++++++++++++++ > > arch/x86/kvm/vmx/x86_ops.h | 8 +++ > > 3 files changed, 182 insertions(+), 4 deletions(-) > > > > diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c > > index b4e4c6c677f6..c125b2e3e8b4 100644 > > --- a/arch/x86/kvm/vmx/main.c > > +++ b/arch/x86/kvm/vmx/main.c > > @@ -63,6 +63,38 @@ static void vt_vm_free(struct kvm *kvm) > > return tdx_vm_free(kvm); > > } > > > > +static int vt_vcpu_precreate(struct kvm *kvm) > > +{ > > + if (is_td(kvm)) > > + return 0; > > + > > + return vmx_vcpu_precreate(kvm); > > +} > > + > > +static int vt_vcpu_create(struct kvm_vcpu *vcpu) > > +{ > > + if (is_td_vcpu(vcpu)) > > + return tdx_vcpu_create(vcpu); > > + > > + return vmx_vcpu_create(vcpu); > > +} > > + > > +static void vt_vcpu_free(struct kvm_vcpu *vcpu) > > +{ > > + if (is_td_vcpu(vcpu)) > > + return tdx_vcpu_free(vcpu); > > + > > + return vmx_vcpu_free(vcpu); > > +} > > + > > +static void vt_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) > > +{ > > + if (is_td_vcpu(vcpu)) > > + return tdx_vcpu_reset(vcpu, init_event); > > + > > + return vmx_vcpu_reset(vcpu, init_event); > > +} > > + > > static int vt_mem_enc_ioctl(struct kvm *kvm, void __user *argp) > > { > > if (!is_td(kvm)) > > @@ -89,10 +121,10 @@ struct kvm_x86_ops vt_x86_ops __initdata = { > > .vm_destroy = vt_vm_destroy, > > .vm_free = vt_vm_free, > > > > - .vcpu_precreate = vmx_vcpu_precreate, > > - .vcpu_create = vmx_vcpu_create, > > - .vcpu_free = vmx_vcpu_free, > > - .vcpu_reset = vmx_vcpu_reset, > > + .vcpu_precreate = vt_vcpu_precreate, > > + .vcpu_create = vt_vcpu_create, > > + .vcpu_free = vt_vcpu_free, > > + .vcpu_reset = vt_vcpu_reset, > > > > .prepare_switch_to_guest = vmx_prepare_switch_to_guest, > > .vcpu_load = vmx_vcpu_load, > > diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c > > index 54045e0576e7..0625c354b341 100644 > > --- a/arch/x86/kvm/vmx/tdx.c > > +++ b/arch/x86/kvm/vmx/tdx.c > > @@ -49,6 +49,11 @@ static __always_inline hpa_t set_hkid_to_hpa(hpa_t pa, u16 hkid) > > return pa | ((hpa_t)hkid << boot_cpu_data.x86_phys_bits); > > } > > > > +static inline bool is_td_vcpu_created(struct vcpu_tdx *tdx) > > +{ > > + return tdx->tdvpr.added; > > +} > > + > > static inline bool is_td_created(struct kvm_tdx *kvm_tdx) > > { > > return kvm_tdx->tdr.added; > > @@ -296,6 +301,139 @@ int tdx_vm_init(struct kvm *kvm) > > return 0; > > } > > > > +int tdx_vcpu_create(struct kvm_vcpu *vcpu) > > +{ > > + struct vcpu_tdx *tdx = to_tdx(vcpu); > > + int ret, i; > > + > > + /* TDX only supports x2APIC, which requires an in-kernel local APIC. */ > > + if (!vcpu->arch.apic) > > + return -EINVAL; > > + > > + fpstate_set_confidential(&vcpu->arch.guest_fpu); > > + > > + ret = tdx_alloc_td_page(&tdx->tdvpr); > > + if (ret) > > + return ret; > > + > > + tdx->tdvpx = kcalloc(tdx_caps.tdvpx_nr_pages, sizeof(*tdx->tdvpx), > > + GFP_KERNEL_ACCOUNT); > > + if (!tdx->tdvpx) { > > + ret = -ENOMEM; > > + goto free_tdvpr; > > + } > > + for (i = 0; i < tdx_caps.tdvpx_nr_pages; i++) { > > + ret = tdx_alloc_td_page(&tdx->tdvpx[i]); > > + if (ret) > > + goto free_tdvpx; > > + } > > + > > + vcpu->arch.efer = EFER_SCE | EFER_LME | EFER_LMA | EFER_NX; > > + > > + vcpu->arch.cr0_guest_owned_bits = -1ul; > > + vcpu->arch.cr4_guest_owned_bits = -1ul; > > + > > + vcpu->arch.tsc_offset = to_kvm_tdx(vcpu->kvm)->tsc_offset; > > + vcpu->arch.l1_tsc_offset = vcpu->arch.tsc_offset; > > + vcpu->arch.guest_state_protected = > > + !(to_kvm_tdx(vcpu->kvm)->attributes & TDX_TD_ATTRIBUTE_DEBUG); > > + > > + return 0; > > + > > +free_tdvpx: > > + /* @i points at the TDVPX page that failed allocation. */ > > + for (--i; i >= 0; i--) > > + free_page(tdx->tdvpx[i].va); > > + kfree(tdx->tdvpx); > > + tdx->tdvpx = NULL; > > +free_tdvpr: > > + free_page(tdx->tdvpr.va); > > + > > + return ret; > > +} > > + > > +void tdx_vcpu_free(struct kvm_vcpu *vcpu) > > +{ > > + struct vcpu_tdx *tdx = to_tdx(vcpu); > > + int i; > > + > > + /* Can't reclaim or free pages if teardown failed. */ > > + if (is_hkid_assigned(to_kvm_tdx(vcpu->kvm))) > > + return; > > + > > + if (tdx->tdvpx) { > > + for (i = 0; i < tdx_caps.tdvpx_nr_pages; i++) > > + tdx_reclaim_td_page(&tdx->tdvpx[i]); > > + kfree(tdx->tdvpx); > > + tdx->tdvpx = NULL; > > + } > > + tdx_reclaim_td_page(&tdx->tdvpr); > > +} > > + > > +void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) > > +{ > > + struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm); > > + struct vcpu_tdx *tdx = to_tdx(vcpu); > > + struct msr_data apic_base_msr; > > + u64 err; > > + int i; > > + > > + /* TDX doesn't support INIT event. */ > > + if (WARN_ON_ONCE(init_event)) > > + goto td_bugged; > > + if (WARN_ON_ONCE(is_td_vcpu_created(tdx))) > > + goto td_bugged; > > + > > + err = tdh_vp_create(kvm_tdx->tdr.pa, tdx->tdvpr.pa); > > + if (WARN_ON_ONCE(err)) { > > + pr_tdx_error(TDH_VP_CREATE, err, NULL); > > + goto td_bugged; > > + } > > + tdx_mark_td_page_added(&tdx->tdvpr); > > + > > + for (i = 0; i < tdx_caps.tdvpx_nr_pages; i++) { > > + err = tdh_vp_addcx(tdx->tdvpr.pa, tdx->tdvpx[i].pa); > > + if (WARN_ON_ONCE(err)) { > > + pr_tdx_error(TDH_VP_ADDCX, err, NULL); > > + goto td_bugged; > > + } > > + tdx_mark_td_page_added(&tdx->tdvpx[i]); > > + } > > + > > + if (!vcpu->arch.cpuid_entries) { > > + /* > > + * On cpu creation, cpuid entry is blank. Forcibly enable > > + * X2APIC feature to allow X2APIC. > > + */ > > + struct kvm_cpuid_entry2 *e; > > + > > + e = kvmalloc_array(1, sizeof(*e), GFP_KERNEL_ACCOUNT); > > NULL checking is necessary for kvmalloc_array. Fixed. Because vcpu_reset() function doesn't return error, this logic is moved to tdx_vcpu_create(). Thanks, -- Isaku Yamahata <isaku.yamahata@xxxxxxxxx>