Re: [PATCH v19 044/130] KVM: TDX: Do TDX specific vcpu initialization

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Mar 21, 2024 at 01:43:14PM +0800,
Chao Gao <chao.gao@xxxxxxxxx> wrote:

> >+/* VMM can pass one 64bit auxiliary data to vcpu via RCX for guest BIOS. */
> >+static int tdx_td_vcpu_init(struct kvm_vcpu *vcpu, u64 vcpu_rcx)
> >+{
> >+	struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
> >+	struct vcpu_tdx *tdx = to_tdx(vcpu);
> >+	unsigned long *tdvpx_pa = NULL;
> >+	unsigned long tdvpr_pa;
> >+	unsigned long va;
> >+	int ret, i;
> >+	u64 err;
> >+
> >+	if (is_td_vcpu_created(tdx))
> >+		return -EINVAL;
> >+
> >+	/*
> >+	 * vcpu_free method frees allocated pages.  Avoid partial setup so
> >+	 * that the method can't handle it.
> >+	 */
> >+	va = __get_free_page(GFP_KERNEL_ACCOUNT);
> >+	if (!va)
> >+		return -ENOMEM;
> >+	tdvpr_pa = __pa(va);
> >+
> >+	tdvpx_pa = kcalloc(tdx_info->nr_tdvpx_pages, sizeof(*tdx->tdvpx_pa),
> >+			   GFP_KERNEL_ACCOUNT);
> >+	if (!tdvpx_pa) {
> >+		ret = -ENOMEM;
> >+		goto free_tdvpr;
> >+	}
> >+	for (i = 0; i < tdx_info->nr_tdvpx_pages; i++) {
> >+		va = __get_free_page(GFP_KERNEL_ACCOUNT);
> >+		if (!va) {
> >+			ret = -ENOMEM;
> >+			goto free_tdvpx;
> >+		}
> >+		tdvpx_pa[i] = __pa(va);
> >+	}
> >+
> >+	err = tdh_vp_create(kvm_tdx->tdr_pa, tdvpr_pa);
> >+	if (KVM_BUG_ON(err, vcpu->kvm)) {
> >+		ret = -EIO;
> >+		pr_tdx_error(TDH_VP_CREATE, err, NULL);
> >+		goto free_tdvpx;
> >+	}
> >+	tdx->tdvpr_pa = tdvpr_pa;
> >+
> >+	tdx->tdvpx_pa = tdvpx_pa;
> >+	for (i = 0; i < tdx_info->nr_tdvpx_pages; i++) {
> 
> Can you merge the for-loop above into this one? then ...
> 
> >+		err = tdh_vp_addcx(tdx->tdvpr_pa, tdvpx_pa[i]);
> >+		if (KVM_BUG_ON(err, vcpu->kvm)) {
> >+			pr_tdx_error(TDH_VP_ADDCX, err, NULL);
> 
> >+			for (; i < tdx_info->nr_tdvpx_pages; i++) {
> >+				free_page((unsigned long)__va(tdvpx_pa[i]));
> >+				tdvpx_pa[i] = 0;
> >+			}
> 
> ... no need to free remaining pages.

Makes sense. Let me clean up this.


> >+			/* vcpu_free method frees TDVPX and TDR donated to TDX */
> >+			return -EIO;
> >+		}
> >+	}
> >+
> >+	err = tdh_vp_init(tdx->tdvpr_pa, vcpu_rcx);
> >+	if (KVM_BUG_ON(err, vcpu->kvm)) {
> >+		pr_tdx_error(TDH_VP_INIT, err, NULL);
> >+		return -EIO;
> >+	}
> >+
> >+	vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
> >+	tdx->td_vcpu_created = true;
> >+	return 0;
> >+
> >+free_tdvpx:
> >+	for (i = 0; i < tdx_info->nr_tdvpx_pages; i++) {
> >+		if (tdvpx_pa[i])
> >+			free_page((unsigned long)__va(tdvpx_pa[i]));
> >+		tdvpx_pa[i] = 0;
> >+	}
> >+	kfree(tdvpx_pa);
> >+	tdx->tdvpx_pa = NULL;
> >+free_tdvpr:
> >+	if (tdvpr_pa)
> >+		free_page((unsigned long)__va(tdvpr_pa));
> >+	tdx->tdvpr_pa = 0;
> >+
> >+	return ret;
> >+}
> >+
> >+int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp)
> >+{
> >+	struct msr_data apic_base_msr;
> >+	struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
> >+	struct vcpu_tdx *tdx = to_tdx(vcpu);
> >+	struct kvm_tdx_cmd cmd;
> >+	int ret;
> >+
> >+	if (tdx->initialized)
> >+		return -EINVAL;
> >+
> >+	if (!is_hkid_assigned(kvm_tdx) || is_td_finalized(kvm_tdx))
> 
> These checks look random e.g., I am not sure why is_td_created() isn't check here.
> 
> A few helper functions and boolean variables are added to track which stage the
> TD or TD vCPU is in. e.g.,
> 
> is_hkid_assigned()
> is_td_finalized()
> is_td_created()
> tdx->initialized
> td_vcpu_created
> 
> Insteading of doing this, I am wondering if adding two state machines for
> TD and TD vCPU would make the implementation clear and easy to extend.

Let me look into the state machine. Originally I hoped we don't need it, but
it seems to deserve the state machine..


> >+		return -EINVAL;
> >+
> >+	if (copy_from_user(&cmd, argp, sizeof(cmd)))
> >+		return -EFAULT;
> >+
> >+	if (cmd.error)
> >+		return -EINVAL;
> >+
> >+	/* Currently only KVM_TDX_INTI_VCPU is defined for vcpu operation. */
> >+	if (cmd.flags || cmd.id != KVM_TDX_INIT_VCPU)
> >+		return -EINVAL;
> 
> Even though KVM_TD_INIT_VCPU is the only supported command, it is worthwhile to
> use a switch-case statement. New commands can be added easily without the need
> to refactor this function first.

Yes. For KVM_MAP_MEMORY, I will make KVM_TDX_INIT_MEM_REGION vcpu ioctl instead
of vm ioctl because it is consistent and scalable.  We'll have switch statement
in the next respin.

> >+
> >+	/*
> >+	 * As TDX requires X2APIC, set local apic mode to X2APIC.  User space
> >+	 * VMM, e.g. qemu, is required to set CPUID[0x1].ecx.X2APIC=1 by
> >+	 * KVM_SET_CPUID2.  Otherwise kvm_set_apic_base() will fail.
> >+	 */
> >+	apic_base_msr = (struct msr_data) {
> >+		.host_initiated = true,
> >+		.data = APIC_DEFAULT_PHYS_BASE | LAPIC_MODE_X2APIC |
> >+		(kvm_vcpu_is_reset_bsp(vcpu) ? MSR_IA32_APICBASE_BSP : 0),
> >+	};
> >+	if (kvm_set_apic_base(vcpu, &apic_base_msr))
> >+		return -EINVAL;
> 
> Exporting kvm_vcpu_is_reset_bsp() and kvm_set_apic_base() should be done
> here (rather than in a previous patch).

Sure.


> >+
> >+	ret = tdx_td_vcpu_init(vcpu, (u64)cmd.data);
> >+	if (ret)
> >+		return ret;
> >+
> >+	tdx->initialized = true;
> >+	return 0;
> >+}
> >+
> 
> >diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >index c002761bb662..2bd4b7c8fa51 100644
> >--- a/arch/x86/kvm/x86.c
> >+++ b/arch/x86/kvm/x86.c
> >@@ -6274,6 +6274,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
> > 	case KVM_SET_DEVICE_ATTR:
> > 		r = kvm_vcpu_ioctl_device_attr(vcpu, ioctl, argp);
> > 		break;
> >+	case KVM_MEMORY_ENCRYPT_OP:
> >+		r = -ENOTTY;
> 
> Maybe -EINVAL is better. Because previously trying to call this on vCPU fd
> failed with -EINVAL given ...

Oh, ok. Will change it.  I followed VM ioctl case as default value. But vcpu
ioctl seems to have -EINVAL as default value.
-- 
Isaku Yamahata <isaku.yamahata@xxxxxxxxx>




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux