Re: [PATCH v19 044/130] KVM: TDX: Do TDX specific vcpu initialization

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



>+/* VMM can pass one 64bit auxiliary data to vcpu via RCX for guest BIOS. */
>+static int tdx_td_vcpu_init(struct kvm_vcpu *vcpu, u64 vcpu_rcx)
>+{
>+	struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
>+	struct vcpu_tdx *tdx = to_tdx(vcpu);
>+	unsigned long *tdvpx_pa = NULL;
>+	unsigned long tdvpr_pa;
>+	unsigned long va;
>+	int ret, i;
>+	u64 err;
>+
>+	if (is_td_vcpu_created(tdx))
>+		return -EINVAL;
>+
>+	/*
>+	 * vcpu_free method frees allocated pages.  Avoid partial setup so
>+	 * that the method can't handle it.
>+	 */
>+	va = __get_free_page(GFP_KERNEL_ACCOUNT);
>+	if (!va)
>+		return -ENOMEM;
>+	tdvpr_pa = __pa(va);
>+
>+	tdvpx_pa = kcalloc(tdx_info->nr_tdvpx_pages, sizeof(*tdx->tdvpx_pa),
>+			   GFP_KERNEL_ACCOUNT);
>+	if (!tdvpx_pa) {
>+		ret = -ENOMEM;
>+		goto free_tdvpr;
>+	}
>+	for (i = 0; i < tdx_info->nr_tdvpx_pages; i++) {
>+		va = __get_free_page(GFP_KERNEL_ACCOUNT);
>+		if (!va) {
>+			ret = -ENOMEM;
>+			goto free_tdvpx;
>+		}
>+		tdvpx_pa[i] = __pa(va);
>+	}
>+
>+	err = tdh_vp_create(kvm_tdx->tdr_pa, tdvpr_pa);
>+	if (KVM_BUG_ON(err, vcpu->kvm)) {
>+		ret = -EIO;
>+		pr_tdx_error(TDH_VP_CREATE, err, NULL);
>+		goto free_tdvpx;
>+	}
>+	tdx->tdvpr_pa = tdvpr_pa;
>+
>+	tdx->tdvpx_pa = tdvpx_pa;
>+	for (i = 0; i < tdx_info->nr_tdvpx_pages; i++) {

Can you merge the for-loop above into this one? then ...

>+		err = tdh_vp_addcx(tdx->tdvpr_pa, tdvpx_pa[i]);
>+		if (KVM_BUG_ON(err, vcpu->kvm)) {
>+			pr_tdx_error(TDH_VP_ADDCX, err, NULL);

>+			for (; i < tdx_info->nr_tdvpx_pages; i++) {
>+				free_page((unsigned long)__va(tdvpx_pa[i]));
>+				tdvpx_pa[i] = 0;
>+			}

... no need to free remaining pages.

>+			/* vcpu_free method frees TDVPX and TDR donated to TDX */
>+			return -EIO;
>+		}
>+	}
>+
>+	err = tdh_vp_init(tdx->tdvpr_pa, vcpu_rcx);
>+	if (KVM_BUG_ON(err, vcpu->kvm)) {
>+		pr_tdx_error(TDH_VP_INIT, err, NULL);
>+		return -EIO;
>+	}
>+
>+	vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
>+	tdx->td_vcpu_created = true;
>+	return 0;
>+
>+free_tdvpx:
>+	for (i = 0; i < tdx_info->nr_tdvpx_pages; i++) {
>+		if (tdvpx_pa[i])
>+			free_page((unsigned long)__va(tdvpx_pa[i]));
>+		tdvpx_pa[i] = 0;
>+	}
>+	kfree(tdvpx_pa);
>+	tdx->tdvpx_pa = NULL;
>+free_tdvpr:
>+	if (tdvpr_pa)
>+		free_page((unsigned long)__va(tdvpr_pa));
>+	tdx->tdvpr_pa = 0;
>+
>+	return ret;
>+}
>+
>+int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp)
>+{
>+	struct msr_data apic_base_msr;
>+	struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
>+	struct vcpu_tdx *tdx = to_tdx(vcpu);
>+	struct kvm_tdx_cmd cmd;
>+	int ret;
>+
>+	if (tdx->initialized)
>+		return -EINVAL;
>+
>+	if (!is_hkid_assigned(kvm_tdx) || is_td_finalized(kvm_tdx))

These checks look random e.g., I am not sure why is_td_created() isn't check here.

A few helper functions and boolean variables are added to track which stage the
TD or TD vCPU is in. e.g.,

is_hkid_assigned()
is_td_finalized()
is_td_created()
tdx->initialized
td_vcpu_created

Insteading of doing this, I am wondering if adding two state machines for
TD and TD vCPU would make the implementation clear and easy to extend.

>+		return -EINVAL;
>+
>+	if (copy_from_user(&cmd, argp, sizeof(cmd)))
>+		return -EFAULT;
>+
>+	if (cmd.error)
>+		return -EINVAL;
>+
>+	/* Currently only KVM_TDX_INTI_VCPU is defined for vcpu operation. */
>+	if (cmd.flags || cmd.id != KVM_TDX_INIT_VCPU)
>+		return -EINVAL;

Even though KVM_TD_INIT_VCPU is the only supported command, it is worthwhile to
use a switch-case statement. New commands can be added easily without the need
to refactor this function first.

>+
>+	/*
>+	 * As TDX requires X2APIC, set local apic mode to X2APIC.  User space
>+	 * VMM, e.g. qemu, is required to set CPUID[0x1].ecx.X2APIC=1 by
>+	 * KVM_SET_CPUID2.  Otherwise kvm_set_apic_base() will fail.
>+	 */
>+	apic_base_msr = (struct msr_data) {
>+		.host_initiated = true,
>+		.data = APIC_DEFAULT_PHYS_BASE | LAPIC_MODE_X2APIC |
>+		(kvm_vcpu_is_reset_bsp(vcpu) ? MSR_IA32_APICBASE_BSP : 0),
>+	};
>+	if (kvm_set_apic_base(vcpu, &apic_base_msr))
>+		return -EINVAL;

Exporting kvm_vcpu_is_reset_bsp() and kvm_set_apic_base() should be done
here (rather than in a previous patch).

>+
>+	ret = tdx_td_vcpu_init(vcpu, (u64)cmd.data);
>+	if (ret)
>+		return ret;
>+
>+	tdx->initialized = true;
>+	return 0;
>+}
>+

>diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>index c002761bb662..2bd4b7c8fa51 100644
>--- a/arch/x86/kvm/x86.c
>+++ b/arch/x86/kvm/x86.c
>@@ -6274,6 +6274,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
> 	case KVM_SET_DEVICE_ATTR:
> 		r = kvm_vcpu_ioctl_device_attr(vcpu, ioctl, argp);
> 		break;
>+	case KVM_MEMORY_ENCRYPT_OP:
>+		r = -ENOTTY;

Maybe -EINVAL is better. Because previously trying to call this on vCPU fd
failed with -EINVAL given ...

>+		if (!kvm_x86_ops.vcpu_mem_enc_ioctl)
>+			goto out;
>+		r = kvm_x86_ops.vcpu_mem_enc_ioctl(vcpu, argp);
>+		break;
> 	default:
> 		r = -EINVAL;

... this.

> 	}
>-- 
>2.25.1
>
>




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux