On Mon, Feb 26, 2024 at 12:25:41AM -0800, isaku.yamahata@xxxxxxxxx wrote: >From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx> ... > >TDX requires additional parameters for TDX VM for confidential execution to >protect the confidentiality of its memory contents and CPU state from any >other software, including VMM. When creating a guest TD VM before creating >vcpu, the number of vcpu, TSC frequency (the values are the same among >vcpus, and it can't change.) CPUIDs which the TDX module emulates. Guest >TDs can trust those CPUIDs and sha384 values for measurement. > >Add a new subcommand, KVM_TDX_INIT_VM, to pass parameters for the TDX >guest. It assigns an encryption key to the TDX guest for memory >encryption. TDX encrypts memory per guest basis. The device model, say >qemu, passes per-VM parameters for the TDX guest. The maximum number of >vcpus, TSC frequency (TDX guest has fixed VM-wide TSC frequency, not per >vcpu. The TDX guest can not change it.), attributes (production or debug), >available extended features (which configure guest XCR0, IA32_XSS MSR), >CPUIDs, sha384 measurements, etc. > >Call this subcommand before creating vcpu and KVM_SET_CPUID2, i.e. CPUID >configurations aren't available yet. So CPUIDs configuration values need >to be passed in struct kvm_tdx_init_vm. The device model's responsibility >to make this CPUID config for KVM_TDX_INIT_VM and KVM_SET_CPUID2. > >Signed-off-by: Xiaoyao Li <xiaoyao.li@xxxxxxxxx> >Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx> the SOB chain makes no sense. >+static void setup_tdparams_cpuids(struct kvm_cpuid2 *cpuid, >+ struct td_params *td_params) >+{ >+ int i; >+ >+ /* >+ * td_params.cpuid_values: The number and the order of cpuid_value must >+ * be same to the one of struct tdsysinfo.{num_cpuid_config, cpuid_configs} >+ * It's assumed that td_params was zeroed. >+ */ >+ for (i = 0; i < tdx_info->num_cpuid_config; i++) { >+ const struct kvm_tdx_cpuid_config *c = &tdx_info->cpuid_configs[i]; >+ /* KVM_TDX_CPUID_NO_SUBLEAF means index = 0. */ >+ u32 index = c->sub_leaf == KVM_TDX_CPUID_NO_SUBLEAF ? 0 : c->sub_leaf; >+ const struct kvm_cpuid_entry2 *entry = >+ kvm_find_cpuid_entry2(cpuid->entries, cpuid->nent, >+ c->leaf, index); >+ struct tdx_cpuid_value *value = &td_params->cpuid_values[i]; >+ >+ if (!entry) >+ continue; >+ >+ /* >+ * tdsysinfo.cpuid_configs[].{eax, ebx, ecx, edx} >+ * bit 1 means it can be configured to zero or one. >+ * bit 0 means it must be zero. >+ * Mask out non-configurable bits. >+ */ >+ value->eax = entry->eax & c->eax; >+ value->ebx = entry->ebx & c->ebx; >+ value->ecx = entry->ecx & c->ecx; >+ value->edx = entry->edx & c->edx; Any reason to mask off non-configurable bits rather than return an error? this is misleading to userspace because guest sees the values emulated by TDX module instead of the values passed from userspace (i.e., the request from userspace isn't done but there is no indication of that to userspace). >+ } >+} >+ >+static int setup_tdparams_xfam(struct kvm_cpuid2 *cpuid, struct td_params *td_params) >+{ >+ const struct kvm_cpuid_entry2 *entry; >+ u64 guest_supported_xcr0; >+ u64 guest_supported_xss; >+ >+ /* Setup td_params.xfam */ >+ entry = kvm_find_cpuid_entry2(cpuid->entries, cpuid->nent, 0xd, 0); >+ if (entry) >+ guest_supported_xcr0 = (entry->eax | ((u64)entry->edx << 32)); >+ else >+ guest_supported_xcr0 = 0; >+ guest_supported_xcr0 &= kvm_caps.supported_xcr0; >+ >+ entry = kvm_find_cpuid_entry2(cpuid->entries, cpuid->nent, 0xd, 1); >+ if (entry) >+ guest_supported_xss = (entry->ecx | ((u64)entry->edx << 32)); >+ else >+ guest_supported_xss = 0; >+ >+ /* >+ * PT and CET can be exposed to TD guest regardless of KVM's XSS, PT >+ * and, CET support. >+ */ >+ guest_supported_xss &= >+ (kvm_caps.supported_xss | XFEATURE_MASK_PT | TDX_TD_XFAM_CET); >+ >+ td_params->xfam = guest_supported_xcr0 | guest_supported_xss; >+ if (td_params->xfam & XFEATURE_MASK_LBR) { >+ /* >+ * TODO: once KVM supports LBR(save/restore LBR related >+ * registers around TDENTER), remove this guard. >+ */ >+#define MSG_LBR "TD doesn't support LBR yet. KVM needs to save/restore IA32_LBR_DEPTH properly.\n" >+ pr_warn(MSG_LBR); Drop the pr_warn() because userspace can trigger it at will. I don't think KVM needs to relay TDX module capabilities to userspace as-is. KVM should advertise a feature only if both TDX module's and KVM's support are in place. if KVM masked out LBR and PERFMON, it should be a problem of userspace and we don't need to warn here. >+ return -EOPNOTSUPP; >+ } >+ >+ return 0; >+} >+ >+static int setup_tdparams(struct kvm *kvm, struct td_params *td_params, >+ struct kvm_tdx_init_vm *init_vm) >+{ >+ struct kvm_cpuid2 *cpuid = &init_vm->cpuid; >+ int ret; >+ >+ if (kvm->created_vcpus) >+ return -EBUSY; -EINVAL >+ >+ if (init_vm->attributes & TDX_TD_ATTRIBUTE_PERFMON) { >+ /* >+ * TODO: save/restore PMU related registers around TDENTER. >+ * Once it's done, remove this guard. >+ */ >+#define MSG_PERFMON "TD doesn't support perfmon yet. KVM needs to save/restore host perf registers properly.\n" >+ pr_warn(MSG_PERFMON); drop the pr_warn(). >+ return -EOPNOTSUPP; >+ } >+ >+ td_params->max_vcpus = kvm->max_vcpus; >+ td_params->attributes = init_vm->attributes; >+ td_params->exec_controls = TDX_CONTROL_FLAG_NO_RBP_MOD; >+ td_params->tsc_frequency = TDX_TSC_KHZ_TO_25MHZ(kvm->arch.default_tsc_khz); >+ >+ ret = setup_tdparams_eptp_controls(cpuid, td_params); >+ if (ret) >+ return ret; >+ setup_tdparams_cpuids(cpuid, td_params); >+ ret = setup_tdparams_xfam(cpuid, td_params); >+ if (ret) >+ return ret; >+ >+#define MEMCPY_SAME_SIZE(dst, src) \ >+ do { \ >+ BUILD_BUG_ON(sizeof(dst) != sizeof(src)); \ >+ memcpy((dst), (src), sizeof(dst)); \ >+ } while (0) >+ >+ MEMCPY_SAME_SIZE(td_params->mrconfigid, init_vm->mrconfigid); >+ MEMCPY_SAME_SIZE(td_params->mrowner, init_vm->mrowner); >+ MEMCPY_SAME_SIZE(td_params->mrownerconfig, init_vm->mrownerconfig); >+ >+ return 0; >+} >+ >+static int __tdx_td_init(struct kvm *kvm, struct td_params *td_params, >+ u64 *seamcall_err) > { > struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm); >+ struct tdx_module_args out; > cpumask_var_t packages; > unsigned long *tdcs_pa = NULL; > unsigned long tdr_pa = 0; >@@ -426,6 +581,7 @@ static int __tdx_td_init(struct kvm *kvm) > int ret, i; > u64 err; > >+ *seamcall_err = 0; > ret = tdx_guest_keyid_alloc(); > if (ret < 0) > return ret; >@@ -540,10 +696,23 @@ static int __tdx_td_init(struct kvm *kvm) > } > } > >- /* >- * Note, TDH_MNG_INIT cannot be invoked here. TDH_MNG_INIT requires a dedicated >- * ioctl() to define the configure CPUID values for the TD. >- */ >+ err = tdh_mng_init(kvm_tdx->tdr_pa, __pa(td_params), &out); >+ if ((err & TDX_SEAMCALL_STATUS_MASK) == TDX_OPERAND_INVALID) { >+ /* >+ * Because a user gives operands, don't warn. >+ * Return a hint to the user because it's sometimes hard for the >+ * user to figure out which operand is invalid. SEAMCALL status >+ * code includes which operand caused invalid operand error. >+ */ >+ *seamcall_err = err; >+ ret = -EINVAL; >+ goto teardown; >+ } else if (WARN_ON_ONCE(err)) { >+ pr_tdx_error(TDH_MNG_INIT, err, &out); >+ ret = -EIO; >+ goto teardown; >+ } >+ > return 0; > > /* >@@ -586,6 +755,76 @@ static int __tdx_td_init(struct kvm *kvm) > return ret; > } > >+static int tdx_td_init(struct kvm *kvm, struct kvm_tdx_cmd *cmd) >+{ >+ struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm); >+ struct kvm_tdx_init_vm *init_vm = NULL; no need to initialize it to NULL. >+ struct td_params *td_params = NULL; >+ int ret; >+ >+ BUILD_BUG_ON(sizeof(*init_vm) != 8 * 1024); >+ BUILD_BUG_ON(sizeof(struct td_params) != 1024); >+ >+ if (is_hkid_assigned(kvm_tdx)) >+ return -EINVAL; >+ >+ if (cmd->flags) >+ return -EINVAL; >+ >+ init_vm = kzalloc(sizeof(*init_vm) + >+ sizeof(init_vm->cpuid.entries[0]) * KVM_MAX_CPUID_ENTRIES, >+ GFP_KERNEL); no need to zero the memory given ... >+ if (!init_vm) >+ return -ENOMEM; >+ if (copy_from_user(init_vm, (void __user *)cmd->data, sizeof(*init_vm))) { ... this. >+ ret = -EFAULT; >+ goto out; >+ } >+ if (init_vm->cpuid.nent > KVM_MAX_CPUID_ENTRIES) { >+ ret = -E2BIG; >+ goto out; >+ } >+ if (copy_from_user(init_vm->cpuid.entries, >+ (void __user *)cmd->data + sizeof(*init_vm), >+ flex_array_size(init_vm, cpuid.entries, init_vm->cpuid.nent))) { >+ ret = -EFAULT; >+ goto out; >+ } >+ >+ if (memchr_inv(init_vm->reserved, 0, sizeof(init_vm->reserved))) { >+ ret = -EINVAL; >+ goto out; >+ }