>+/* VMM can pass one 64bit auxiliary data to vcpu via RCX for guest BIOS. */ >+static int tdx_td_vcpu_init(struct kvm_vcpu *vcpu, u64 vcpu_rcx) >+{ >+ struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm); >+ struct vcpu_tdx *tdx = to_tdx(vcpu); >+ unsigned long *tdvpx_pa = NULL; >+ unsigned long tdvpr_pa; >+ unsigned long va; >+ int ret, i; >+ u64 err; >+ >+ if (is_td_vcpu_created(tdx)) >+ return -EINVAL; >+ >+ /* >+ * vcpu_free method frees allocated pages. Avoid partial setup so >+ * that the method can't handle it. >+ */ >+ va = __get_free_page(GFP_KERNEL_ACCOUNT); >+ if (!va) >+ return -ENOMEM; >+ tdvpr_pa = __pa(va); >+ >+ tdvpx_pa = kcalloc(tdx_info->nr_tdvpx_pages, sizeof(*tdx->tdvpx_pa), >+ GFP_KERNEL_ACCOUNT); >+ if (!tdvpx_pa) { >+ ret = -ENOMEM; >+ goto free_tdvpr; >+ } >+ for (i = 0; i < tdx_info->nr_tdvpx_pages; i++) { >+ va = __get_free_page(GFP_KERNEL_ACCOUNT); >+ if (!va) { >+ ret = -ENOMEM; >+ goto free_tdvpx; >+ } >+ tdvpx_pa[i] = __pa(va); >+ } >+ >+ err = tdh_vp_create(kvm_tdx->tdr_pa, tdvpr_pa); >+ if (KVM_BUG_ON(err, vcpu->kvm)) { >+ ret = -EIO; >+ pr_tdx_error(TDH_VP_CREATE, err, NULL); >+ goto free_tdvpx; >+ } >+ tdx->tdvpr_pa = tdvpr_pa; >+ >+ tdx->tdvpx_pa = tdvpx_pa; >+ for (i = 0; i < tdx_info->nr_tdvpx_pages; i++) { Can you merge the for-loop above into this one? then ... >+ err = tdh_vp_addcx(tdx->tdvpr_pa, tdvpx_pa[i]); >+ if (KVM_BUG_ON(err, vcpu->kvm)) { >+ pr_tdx_error(TDH_VP_ADDCX, err, NULL); >+ for (; i < tdx_info->nr_tdvpx_pages; i++) { >+ free_page((unsigned long)__va(tdvpx_pa[i])); >+ tdvpx_pa[i] = 0; >+ } ... no need to free remaining pages. >+ /* vcpu_free method frees TDVPX and TDR donated to TDX */ >+ return -EIO; >+ } >+ } >+ >+ err = tdh_vp_init(tdx->tdvpr_pa, vcpu_rcx); >+ if (KVM_BUG_ON(err, vcpu->kvm)) { >+ pr_tdx_error(TDH_VP_INIT, err, NULL); >+ return -EIO; >+ } >+ >+ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; >+ tdx->td_vcpu_created = true; >+ return 0; >+ >+free_tdvpx: >+ for (i = 0; i < tdx_info->nr_tdvpx_pages; i++) { >+ if (tdvpx_pa[i]) >+ free_page((unsigned long)__va(tdvpx_pa[i])); >+ tdvpx_pa[i] = 0; >+ } >+ kfree(tdvpx_pa); >+ tdx->tdvpx_pa = NULL; >+free_tdvpr: >+ if (tdvpr_pa) >+ free_page((unsigned long)__va(tdvpr_pa)); >+ tdx->tdvpr_pa = 0; >+ >+ return ret; >+} >+ >+int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) >+{ >+ struct msr_data apic_base_msr; >+ struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm); >+ struct vcpu_tdx *tdx = to_tdx(vcpu); >+ struct kvm_tdx_cmd cmd; >+ int ret; >+ >+ if (tdx->initialized) >+ return -EINVAL; >+ >+ if (!is_hkid_assigned(kvm_tdx) || is_td_finalized(kvm_tdx)) These checks look random e.g., I am not sure why is_td_created() isn't check here. A few helper functions and boolean variables are added to track which stage the TD or TD vCPU is in. e.g., is_hkid_assigned() is_td_finalized() is_td_created() tdx->initialized td_vcpu_created Insteading of doing this, I am wondering if adding two state machines for TD and TD vCPU would make the implementation clear and easy to extend. >+ return -EINVAL; >+ >+ if (copy_from_user(&cmd, argp, sizeof(cmd))) >+ return -EFAULT; >+ >+ if (cmd.error) >+ return -EINVAL; >+ >+ /* Currently only KVM_TDX_INTI_VCPU is defined for vcpu operation. */ >+ if (cmd.flags || cmd.id != KVM_TDX_INIT_VCPU) >+ return -EINVAL; Even though KVM_TD_INIT_VCPU is the only supported command, it is worthwhile to use a switch-case statement. New commands can be added easily without the need to refactor this function first. >+ >+ /* >+ * As TDX requires X2APIC, set local apic mode to X2APIC. User space >+ * VMM, e.g. qemu, is required to set CPUID[0x1].ecx.X2APIC=1 by >+ * KVM_SET_CPUID2. Otherwise kvm_set_apic_base() will fail. >+ */ >+ apic_base_msr = (struct msr_data) { >+ .host_initiated = true, >+ .data = APIC_DEFAULT_PHYS_BASE | LAPIC_MODE_X2APIC | >+ (kvm_vcpu_is_reset_bsp(vcpu) ? MSR_IA32_APICBASE_BSP : 0), >+ }; >+ if (kvm_set_apic_base(vcpu, &apic_base_msr)) >+ return -EINVAL; Exporting kvm_vcpu_is_reset_bsp() and kvm_set_apic_base() should be done here (rather than in a previous patch). >+ >+ ret = tdx_td_vcpu_init(vcpu, (u64)cmd.data); >+ if (ret) >+ return ret; >+ >+ tdx->initialized = true; >+ return 0; >+} >+ >diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >index c002761bb662..2bd4b7c8fa51 100644 >--- a/arch/x86/kvm/x86.c >+++ b/arch/x86/kvm/x86.c >@@ -6274,6 +6274,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp, > case KVM_SET_DEVICE_ATTR: > r = kvm_vcpu_ioctl_device_attr(vcpu, ioctl, argp); > break; >+ case KVM_MEMORY_ENCRYPT_OP: >+ r = -ENOTTY; Maybe -EINVAL is better. Because previously trying to call this on vCPU fd failed with -EINVAL given ... >+ if (!kvm_x86_ops.vcpu_mem_enc_ioctl) >+ goto out; >+ r = kvm_x86_ops.vcpu_mem_enc_ioctl(vcpu, argp); >+ break; > default: > r = -EINVAL; ... this. > } >-- >2.25.1 > >